[macruby-changes] [3746] MacRuby/trunk

source_changes at macosforge.org source_changes at macosforge.org
Fri Mar 12 15:56:55 PST 2010


Revision: 3746
          http://trac.macosforge.org/projects/ruby/changeset/3746
Author:   lsansonetti at apple.com
Date:     2010-03-12 15:56:52 -0800 (Fri, 12 Mar 2010)
Log Message:
-----------
merge icu branch

Modified Paths:
--------------
    MacRuby/trunk/array.c
    MacRuby/trunk/class.c
    MacRuby/trunk/compiler.cpp
    MacRuby/trunk/compiler.h
    MacRuby/trunk/complex.c
    MacRuby/trunk/dispatcher.cpp
    MacRuby/trunk/encoding.c
    MacRuby/trunk/error.c
    MacRuby/trunk/eval.c
    MacRuby/trunk/ext/bigdecimal/bigdecimal.c
    MacRuby/trunk/ext/digest/bubblebabble/bubblebabble.c
    MacRuby/trunk/ext/digest/digest.c
    MacRuby/trunk/ext/json/rubyext.c
    MacRuby/trunk/ext/libyaml/rubyext.c
    MacRuby/trunk/ext/openssl/ossl.c
    MacRuby/trunk/ext/openssl/ossl_bio.c
    MacRuby/trunk/ext/openssl/ossl_digest.c
    MacRuby/trunk/ext/openssl/ossl_pkey_ec.c
    MacRuby/trunk/ext/openssl/ossl_ssl.c
    MacRuby/trunk/ext/ripper/extconf.rb
    MacRuby/trunk/ext/socket/socket.c
    MacRuby/trunk/ext/zlib/zlib.c
    MacRuby/trunk/file.c
    MacRuby/trunk/gc.c
    MacRuby/trunk/hash.c
    MacRuby/trunk/id.c
    MacRuby/trunk/id.h
    MacRuby/trunk/include/ruby/encoding.h
    MacRuby/trunk/include/ruby/intern.h
    MacRuby/trunk/include/ruby/io.h
    MacRuby/trunk/include/ruby/ruby.h
    MacRuby/trunk/inits.c
    MacRuby/trunk/io.c
    MacRuby/trunk/lib/irb/locale.rb
    MacRuby/trunk/lib/net/telnet.rb
    MacRuby/trunk/lib/rubygems/remote_fetcher.rb
    MacRuby/trunk/lib/stringio.rb
    MacRuby/trunk/lib/strscan.rb
    MacRuby/trunk/marshal.c
    MacRuby/trunk/objc.h
    MacRuby/trunk/objc.m
    MacRuby/trunk/object.c
    MacRuby/trunk/pack.c
    MacRuby/trunk/parse.y
    MacRuby/trunk/rakelib/builder/builder.rb
    MacRuby/trunk/rakelib/builder/options.rb
    MacRuby/trunk/random.c
    MacRuby/trunk/rational.c
    MacRuby/trunk/ruby.c
    MacRuby/trunk/spec/frozen/core/matchdata/element_reference_spec.rb
    MacRuby/trunk/spec/frozen/core/matchdata/names_spec.rb
    MacRuby/trunk/spec/frozen/core/regexp/named_captures_spec.rb
    MacRuby/trunk/spec/frozen/core/regexp/names_spec.rb
    MacRuby/trunk/spec/frozen/language/regexp/anchors_spec.rb
    MacRuby/trunk/spec/frozen/language/versions/regexp_1.9.rb
    MacRuby/trunk/sprintf.c
    MacRuby/trunk/string.c
    MacRuby/trunk/time.c
    MacRuby/trunk/util.c
    MacRuby/trunk/vm.cpp
    MacRuby/trunk/vm.h

Added Paths:
-----------
    MacRuby/trunk/encoding.h
    MacRuby/trunk/icu-1060/
    MacRuby/trunk/icu-1060/unicode/
    MacRuby/trunk/icu-1060/unicode/basictz.h
    MacRuby/trunk/icu-1060/unicode/brkiter.h
    MacRuby/trunk/icu-1060/unicode/calendar.h
    MacRuby/trunk/icu-1060/unicode/caniter.h
    MacRuby/trunk/icu-1060/unicode/chariter.h
    MacRuby/trunk/icu-1060/unicode/choicfmt.h
    MacRuby/trunk/icu-1060/unicode/coleitr.h
    MacRuby/trunk/icu-1060/unicode/coll.h
    MacRuby/trunk/icu-1060/unicode/curramt.h
    MacRuby/trunk/icu-1060/unicode/currunit.h
    MacRuby/trunk/icu-1060/unicode/datefmt.h
    MacRuby/trunk/icu-1060/unicode/dbbi.h
    MacRuby/trunk/icu-1060/unicode/dcfmtsym.h
    MacRuby/trunk/icu-1060/unicode/decimfmt.h
    MacRuby/trunk/icu-1060/unicode/docmain.h
    MacRuby/trunk/icu-1060/unicode/dtfmtsym.h
    MacRuby/trunk/icu-1060/unicode/dtintrv.h
    MacRuby/trunk/icu-1060/unicode/dtitvfmt.h
    MacRuby/trunk/icu-1060/unicode/dtitvinf.h
    MacRuby/trunk/icu-1060/unicode/dtptngen.h
    MacRuby/trunk/icu-1060/unicode/dtrule.h
    MacRuby/trunk/icu-1060/unicode/fieldpos.h
    MacRuby/trunk/icu-1060/unicode/fmtable.h
    MacRuby/trunk/icu-1060/unicode/format.h
    MacRuby/trunk/icu-1060/unicode/gregocal.h
    MacRuby/trunk/icu-1060/unicode/locid.h
    MacRuby/trunk/icu-1060/unicode/measfmt.h
    MacRuby/trunk/icu-1060/unicode/measunit.h
    MacRuby/trunk/icu-1060/unicode/measure.h
    MacRuby/trunk/icu-1060/unicode/msgfmt.h
    MacRuby/trunk/icu-1060/unicode/normlzr.h
    MacRuby/trunk/icu-1060/unicode/numfmt.h
    MacRuby/trunk/icu-1060/unicode/parseerr.h
    MacRuby/trunk/icu-1060/unicode/parsepos.h
    MacRuby/trunk/icu-1060/unicode/platform.h
    MacRuby/trunk/icu-1060/unicode/plurfmt.h
    MacRuby/trunk/icu-1060/unicode/plurrule.h
    MacRuby/trunk/icu-1060/unicode/ppalmos.h
    MacRuby/trunk/icu-1060/unicode/putil.h
    MacRuby/trunk/icu-1060/unicode/pwin32.h
    MacRuby/trunk/icu-1060/unicode/rbbi.h
    MacRuby/trunk/icu-1060/unicode/rbnf.h
    MacRuby/trunk/icu-1060/unicode/rbtz.h
    MacRuby/trunk/icu-1060/unicode/regex.h
    MacRuby/trunk/icu-1060/unicode/rep.h
    MacRuby/trunk/icu-1060/unicode/resbund.h
    MacRuby/trunk/icu-1060/unicode/schriter.h
    MacRuby/trunk/icu-1060/unicode/search.h
    MacRuby/trunk/icu-1060/unicode/simpletz.h
    MacRuby/trunk/icu-1060/unicode/smpdtfmt.h
    MacRuby/trunk/icu-1060/unicode/sortkey.h
    MacRuby/trunk/icu-1060/unicode/strenum.h
    MacRuby/trunk/icu-1060/unicode/stsearch.h
    MacRuby/trunk/icu-1060/unicode/symtable.h
    MacRuby/trunk/icu-1060/unicode/tblcoll.h
    MacRuby/trunk/icu-1060/unicode/timezone.h
    MacRuby/trunk/icu-1060/unicode/translit.h
    MacRuby/trunk/icu-1060/unicode/tzrule.h
    MacRuby/trunk/icu-1060/unicode/tztrans.h
    MacRuby/trunk/icu-1060/unicode/ubidi.h
    MacRuby/trunk/icu-1060/unicode/ubrk.h
    MacRuby/trunk/icu-1060/unicode/ucal.h
    MacRuby/trunk/icu-1060/unicode/ucasemap.h
    MacRuby/trunk/icu-1060/unicode/ucat.h
    MacRuby/trunk/icu-1060/unicode/uchar.h
    MacRuby/trunk/icu-1060/unicode/uchriter.h
    MacRuby/trunk/icu-1060/unicode/uclean.h
    MacRuby/trunk/icu-1060/unicode/ucnv.h
    MacRuby/trunk/icu-1060/unicode/ucnv_cb.h
    MacRuby/trunk/icu-1060/unicode/ucnv_err.h
    MacRuby/trunk/icu-1060/unicode/ucol.h
    MacRuby/trunk/icu-1060/unicode/ucoleitr.h
    MacRuby/trunk/icu-1060/unicode/uconfig.h
    MacRuby/trunk/icu-1060/unicode/ucsdet.h
    MacRuby/trunk/icu-1060/unicode/ucurr.h
    MacRuby/trunk/icu-1060/unicode/udat.h
    MacRuby/trunk/icu-1060/unicode/udata.h
    MacRuby/trunk/icu-1060/unicode/udatpg.h
    MacRuby/trunk/icu-1060/unicode/udeprctd.h
    MacRuby/trunk/icu-1060/unicode/udraft.h
    MacRuby/trunk/icu-1060/unicode/uenum.h
    MacRuby/trunk/icu-1060/unicode/uidna.h
    MacRuby/trunk/icu-1060/unicode/uintrnal.h
    MacRuby/trunk/icu-1060/unicode/uiter.h
    MacRuby/trunk/icu-1060/unicode/uloc.h
    MacRuby/trunk/icu-1060/unicode/ulocdata.h
    MacRuby/trunk/icu-1060/unicode/umachine.h
    MacRuby/trunk/icu-1060/unicode/umisc.h
    MacRuby/trunk/icu-1060/unicode/umsg.h
    MacRuby/trunk/icu-1060/unicode/unifilt.h
    MacRuby/trunk/icu-1060/unicode/unifunct.h
    MacRuby/trunk/icu-1060/unicode/unimatch.h
    MacRuby/trunk/icu-1060/unicode/unirepl.h
    MacRuby/trunk/icu-1060/unicode/uniset.h
    MacRuby/trunk/icu-1060/unicode/unistr.h
    MacRuby/trunk/icu-1060/unicode/unorm.h
    MacRuby/trunk/icu-1060/unicode/unum.h
    MacRuby/trunk/icu-1060/unicode/uobject.h
    MacRuby/trunk/icu-1060/unicode/uobslete.h
    MacRuby/trunk/icu-1060/unicode/urbtok.h
    MacRuby/trunk/icu-1060/unicode/uregex.h
    MacRuby/trunk/icu-1060/unicode/urename.h
    MacRuby/trunk/icu-1060/unicode/urep.h
    MacRuby/trunk/icu-1060/unicode/ures.h
    MacRuby/trunk/icu-1060/unicode/uscript.h
    MacRuby/trunk/icu-1060/unicode/usearch.h
    MacRuby/trunk/icu-1060/unicode/uset.h
    MacRuby/trunk/icu-1060/unicode/usetiter.h
    MacRuby/trunk/icu-1060/unicode/ushape.h
    MacRuby/trunk/icu-1060/unicode/usprep.h
    MacRuby/trunk/icu-1060/unicode/ustdio.h
    MacRuby/trunk/icu-1060/unicode/ustream.h
    MacRuby/trunk/icu-1060/unicode/ustring.h
    MacRuby/trunk/icu-1060/unicode/usystem.h
    MacRuby/trunk/icu-1060/unicode/utext.h
    MacRuby/trunk/icu-1060/unicode/utf.h
    MacRuby/trunk/icu-1060/unicode/utf16.h
    MacRuby/trunk/icu-1060/unicode/utf32.h
    MacRuby/trunk/icu-1060/unicode/utf8.h
    MacRuby/trunk/icu-1060/unicode/utf_old.h
    MacRuby/trunk/icu-1060/unicode/utmscale.h
    MacRuby/trunk/icu-1060/unicode/utrace.h
    MacRuby/trunk/icu-1060/unicode/utrans.h
    MacRuby/trunk/icu-1060/unicode/utypes.h
    MacRuby/trunk/icu-1060/unicode/uversion.h
    MacRuby/trunk/icu-1060/unicode/vtzone.h
    MacRuby/trunk/include/ruby/re.h
    MacRuby/trunk/re.cpp
    MacRuby/trunk/re.h
    MacRuby/trunk/symbol.c
    MacRuby/trunk/symbol.h
    MacRuby/trunk/ucnv.c

Removed Paths:
-------------
    MacRuby/trunk/icu-1060/unicode/
    MacRuby/trunk/icu-1060/unicode/basictz.h
    MacRuby/trunk/icu-1060/unicode/brkiter.h
    MacRuby/trunk/icu-1060/unicode/calendar.h
    MacRuby/trunk/icu-1060/unicode/caniter.h
    MacRuby/trunk/icu-1060/unicode/chariter.h
    MacRuby/trunk/icu-1060/unicode/choicfmt.h
    MacRuby/trunk/icu-1060/unicode/coleitr.h
    MacRuby/trunk/icu-1060/unicode/coll.h
    MacRuby/trunk/icu-1060/unicode/curramt.h
    MacRuby/trunk/icu-1060/unicode/currunit.h
    MacRuby/trunk/icu-1060/unicode/datefmt.h
    MacRuby/trunk/icu-1060/unicode/dbbi.h
    MacRuby/trunk/icu-1060/unicode/dcfmtsym.h
    MacRuby/trunk/icu-1060/unicode/decimfmt.h
    MacRuby/trunk/icu-1060/unicode/docmain.h
    MacRuby/trunk/icu-1060/unicode/dtfmtsym.h
    MacRuby/trunk/icu-1060/unicode/dtintrv.h
    MacRuby/trunk/icu-1060/unicode/dtitvfmt.h
    MacRuby/trunk/icu-1060/unicode/dtitvinf.h
    MacRuby/trunk/icu-1060/unicode/dtptngen.h
    MacRuby/trunk/icu-1060/unicode/dtrule.h
    MacRuby/trunk/icu-1060/unicode/fieldpos.h
    MacRuby/trunk/icu-1060/unicode/fmtable.h
    MacRuby/trunk/icu-1060/unicode/format.h
    MacRuby/trunk/icu-1060/unicode/gregocal.h
    MacRuby/trunk/icu-1060/unicode/locid.h
    MacRuby/trunk/icu-1060/unicode/measfmt.h
    MacRuby/trunk/icu-1060/unicode/measunit.h
    MacRuby/trunk/icu-1060/unicode/measure.h
    MacRuby/trunk/icu-1060/unicode/msgfmt.h
    MacRuby/trunk/icu-1060/unicode/normlzr.h
    MacRuby/trunk/icu-1060/unicode/numfmt.h
    MacRuby/trunk/icu-1060/unicode/parseerr.h
    MacRuby/trunk/icu-1060/unicode/parsepos.h
    MacRuby/trunk/icu-1060/unicode/platform.h
    MacRuby/trunk/icu-1060/unicode/plurfmt.h
    MacRuby/trunk/icu-1060/unicode/plurrule.h
    MacRuby/trunk/icu-1060/unicode/ppalmos.h
    MacRuby/trunk/icu-1060/unicode/putil.h
    MacRuby/trunk/icu-1060/unicode/pwin32.h
    MacRuby/trunk/icu-1060/unicode/rbbi.h
    MacRuby/trunk/icu-1060/unicode/rbnf.h
    MacRuby/trunk/icu-1060/unicode/rbtz.h
    MacRuby/trunk/icu-1060/unicode/regex.h
    MacRuby/trunk/icu-1060/unicode/rep.h
    MacRuby/trunk/icu-1060/unicode/resbund.h
    MacRuby/trunk/icu-1060/unicode/schriter.h
    MacRuby/trunk/icu-1060/unicode/search.h
    MacRuby/trunk/icu-1060/unicode/simpletz.h
    MacRuby/trunk/icu-1060/unicode/smpdtfmt.h
    MacRuby/trunk/icu-1060/unicode/sortkey.h
    MacRuby/trunk/icu-1060/unicode/strenum.h
    MacRuby/trunk/icu-1060/unicode/stsearch.h
    MacRuby/trunk/icu-1060/unicode/symtable.h
    MacRuby/trunk/icu-1060/unicode/tblcoll.h
    MacRuby/trunk/icu-1060/unicode/timezone.h
    MacRuby/trunk/icu-1060/unicode/translit.h
    MacRuby/trunk/icu-1060/unicode/tzrule.h
    MacRuby/trunk/icu-1060/unicode/tztrans.h
    MacRuby/trunk/icu-1060/unicode/ubidi.h
    MacRuby/trunk/icu-1060/unicode/ubrk.h
    MacRuby/trunk/icu-1060/unicode/ucal.h
    MacRuby/trunk/icu-1060/unicode/ucasemap.h
    MacRuby/trunk/icu-1060/unicode/ucat.h
    MacRuby/trunk/icu-1060/unicode/uchar.h
    MacRuby/trunk/icu-1060/unicode/uchriter.h
    MacRuby/trunk/icu-1060/unicode/uclean.h
    MacRuby/trunk/icu-1060/unicode/ucnv.h
    MacRuby/trunk/icu-1060/unicode/ucnv_cb.h
    MacRuby/trunk/icu-1060/unicode/ucnv_err.h
    MacRuby/trunk/icu-1060/unicode/ucol.h
    MacRuby/trunk/icu-1060/unicode/ucoleitr.h
    MacRuby/trunk/icu-1060/unicode/uconfig.h
    MacRuby/trunk/icu-1060/unicode/ucsdet.h
    MacRuby/trunk/icu-1060/unicode/ucurr.h
    MacRuby/trunk/icu-1060/unicode/udat.h
    MacRuby/trunk/icu-1060/unicode/udata.h
    MacRuby/trunk/icu-1060/unicode/udatpg.h
    MacRuby/trunk/icu-1060/unicode/udeprctd.h
    MacRuby/trunk/icu-1060/unicode/udraft.h
    MacRuby/trunk/icu-1060/unicode/uenum.h
    MacRuby/trunk/icu-1060/unicode/uidna.h
    MacRuby/trunk/icu-1060/unicode/uintrnal.h
    MacRuby/trunk/icu-1060/unicode/uiter.h
    MacRuby/trunk/icu-1060/unicode/uloc.h
    MacRuby/trunk/icu-1060/unicode/ulocdata.h
    MacRuby/trunk/icu-1060/unicode/umachine.h
    MacRuby/trunk/icu-1060/unicode/umisc.h
    MacRuby/trunk/icu-1060/unicode/umsg.h
    MacRuby/trunk/icu-1060/unicode/unifilt.h
    MacRuby/trunk/icu-1060/unicode/unifunct.h
    MacRuby/trunk/icu-1060/unicode/unimatch.h
    MacRuby/trunk/icu-1060/unicode/unirepl.h
    MacRuby/trunk/icu-1060/unicode/uniset.h
    MacRuby/trunk/icu-1060/unicode/unistr.h
    MacRuby/trunk/icu-1060/unicode/unorm.h
    MacRuby/trunk/icu-1060/unicode/unum.h
    MacRuby/trunk/icu-1060/unicode/uobject.h
    MacRuby/trunk/icu-1060/unicode/uobslete.h
    MacRuby/trunk/icu-1060/unicode/urbtok.h
    MacRuby/trunk/icu-1060/unicode/uregex.h
    MacRuby/trunk/icu-1060/unicode/urename.h
    MacRuby/trunk/icu-1060/unicode/urep.h
    MacRuby/trunk/icu-1060/unicode/ures.h
    MacRuby/trunk/icu-1060/unicode/uscript.h
    MacRuby/trunk/icu-1060/unicode/usearch.h
    MacRuby/trunk/icu-1060/unicode/uset.h
    MacRuby/trunk/icu-1060/unicode/usetiter.h
    MacRuby/trunk/icu-1060/unicode/ushape.h
    MacRuby/trunk/icu-1060/unicode/usprep.h
    MacRuby/trunk/icu-1060/unicode/ustdio.h
    MacRuby/trunk/icu-1060/unicode/ustream.h
    MacRuby/trunk/icu-1060/unicode/ustring.h
    MacRuby/trunk/icu-1060/unicode/usystem.h
    MacRuby/trunk/icu-1060/unicode/utext.h
    MacRuby/trunk/icu-1060/unicode/utf.h
    MacRuby/trunk/icu-1060/unicode/utf16.h
    MacRuby/trunk/icu-1060/unicode/utf32.h
    MacRuby/trunk/icu-1060/unicode/utf8.h
    MacRuby/trunk/icu-1060/unicode/utf_old.h
    MacRuby/trunk/icu-1060/unicode/utmscale.h
    MacRuby/trunk/icu-1060/unicode/utrace.h
    MacRuby/trunk/icu-1060/unicode/utrans.h
    MacRuby/trunk/icu-1060/unicode/utypes.h
    MacRuby/trunk/icu-1060/unicode/uversion.h
    MacRuby/trunk/icu-1060/unicode/vtzone.h
    MacRuby/trunk/include/ruby/oniguruma.h
    MacRuby/trunk/include/ruby/re.h
    MacRuby/trunk/include/ruby/regex.h
    MacRuby/trunk/onig/
    MacRuby/trunk/re.c

Property Changed:
----------------
    MacRuby/trunk/


Property changes on: MacRuby/trunk
___________________________________________________________________
Added: svn:mergeinfo
   + /MacRuby/branches/icu:3533-3744

Modified: MacRuby/trunk/array.c
===================================================================
--- MacRuby/trunk/array.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/array.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1381,12 +1381,14 @@
 	return 0;
     }
 
+#if 0 // TODO
     /* FIXME optimize!!! */
     if (TYPE(a) == T_STRING) {
 	if (TYPE(b) == T_STRING) {
 	    return rb_str_cmp(a, b);
 	}
     }
+#endif
 
     VALUE retval = rb_objs_cmp(a, b);
     return rb_cmpint(retval, a, b);

Modified: MacRuby/trunk/class.c
===================================================================
--- MacRuby/trunk/class.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/class.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -225,19 +225,17 @@
 {
     VALUE klass;
 
-    if (super == rb_cCFString) {
-	super = rb_cNSMutableString;
-    }
-    else {
-	if (!RCLASS_RUBY(super)) {
-	    const long v = RCLASS_VERSION(super);
-	    if (v & RCLASS_IS_HASH_SUBCLASS) {
-		super = rb_cNSMutableHash;
-	    }
-	    else if (v & RCLASS_IS_ARRAY_SUBCLASS) {
-		super = rb_cNSMutableArray;
-	    }
+    if (!RCLASS_RUBY(super)) {
+	const long v = RCLASS_VERSION(super);
+	if (v & RCLASS_IS_STRING_SUBCLASS) {
+	    super = rb_cNSMutableString;
 	}
+	else if (v & RCLASS_IS_HASH_SUBCLASS) {
+	    super = rb_cNSMutableHash;
+	}
+	else if (v & RCLASS_IS_ARRAY_SUBCLASS) {
+	    super = rb_cNSMutableArray;
+	}
     }
 
     klass = rb_objc_alloc_class(name, super, T_CLASS, rb_cClass);

Modified: MacRuby/trunk/compiler.cpp
===================================================================
--- MacRuby/trunk/compiler.cpp	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/compiler.cpp	2010-03-12 23:56:52 UTC (rev 3746)
@@ -18,12 +18,13 @@
 #include "ruby/ruby.h"
 #include "ruby/encoding.h"
 #include "ruby/node.h"
-#include "ruby/re.h"
 #include "id.h"
 #include "vm.h"
 #include "compiler.h"
 #include "objc.h"
 #include "version.h"
+#include "encoding.h"
+#include "re.h"
 
 extern "C" const char *ruby_node_name(int node);
 
@@ -502,10 +503,12 @@
 
 GlobalVariable *
 RoxorCompiler::compile_const_global_ustring(const UniChar *str,
-	const size_t len, CFHashCode hash)
+	const size_t len)
 {
     assert(len > 0);
 
+    const unsigned long hash = rb_str_hash_uchars(str, len);
+
     std::map<CFHashCode, GlobalVariable *>::iterator iter =
 	static_ustrings.find(hash);
 
@@ -2795,8 +2798,7 @@
 	//
 	//	10.times { s = 'foo'; s << 'bar' }
 	//
-	const size_t str_len = RSTRING_LEN(val);
-	if (str_len == 0) {
+	if (rb_str_chars_len(val) == 0) {
 	    if (newString3Func == NULL) {	
 		newString3Func = cast<Function>(
 			module->getOrInsertFunction(
@@ -2805,22 +2807,19 @@
 	    return CallInst::Create(newString3Func, "", bb);
 	}
 	else {
-	    UniChar *buf = (UniChar *)CFStringGetCharactersPtr(
-		    (CFStringRef)val);
-	    bool free_buf = false;
-	    if (buf == NULL) {
-		buf = (UniChar *)malloc(sizeof(UniChar) * str_len);
-		CFStringGetCharacters((CFStringRef)val,
-			CFRangeMake(0, str_len), buf);
-		free_buf = true;
-	    }
+	    UChar *chars = NULL;
+	    long chars_len = 0;
+	    bool need_free = false;
 
-	    GlobalVariable *str_gvar = compile_const_global_ustring(buf,
-		    str_len, CFHash((CFTypeRef)val));
+	    rb_str_get_uchars(val, &chars, &chars_len, &need_free);
+	    assert(chars_len > 0);
 
-	    if (free_buf) {
-		free(buf);
-		buf = NULL;
+	    GlobalVariable *str_gvar = compile_const_global_ustring(chars,
+		    chars_len);
+
+	    if (need_free) {
+		free(chars);
+		chars = NULL;
 	    }
 
 	    std::vector<Value *> idxs;
@@ -2839,7 +2838,7 @@
 
 	    std::vector<Value *> params;
 	    params.push_back(load);
-	    params.push_back(ConstantInt::get(Int32Ty, str_len));
+	    params.push_back(ConstantInt::get(Int32Ty, chars_len));
 
 	    return CallInst::Create(newString2Func, params.begin(),
 		    params.end(), "", bb);
@@ -4681,11 +4680,18 @@
 				NULL));
 		}
 
+		assert(nd_type(node->u1.node) == NODE_LIT);
+		assert(nd_type(node->u2.node) == NODE_LIT);
+		assert(TYPE(node->u1.node->nd_lit) == T_SYMBOL);
+		assert(TYPE(node->u2.node->nd_lit) == T_SYMBOL);
+
+		ID from = SYM2ID(node->u1.node->nd_lit);
+		ID to = SYM2ID(node->u2.node->nd_lit);
+
 		std::vector<Value *> params;
-
 		params.push_back(compile_current_class());
-		params.push_back(compile_id(node->u1.node->u1.node->u2.id));
-		params.push_back(compile_id(node->u2.node->u1.node->u2.id));
+		params.push_back(compile_id(from));
+		params.push_back(compile_id(to));
 		params.push_back(ConstantInt::get(Int8Ty,
 			    dynamic_class ? 1 : 0));
 
@@ -5622,8 +5628,10 @@
 		    RubyObjTy, PtrTy, NULL));
 
     Function *newRegexp2Func =
-	cast<Function>(module->getOrInsertFunction("rb_reg_new_retained",
-		    RubyObjTy, PtrTy, Int32Ty, Int32Ty, NULL));
+	cast<Function>(module->getOrInsertFunction(
+		    "rb_unicode_regex_new_retained",
+		    RubyObjTy, PointerType::getUnqual(Int16Ty), Int32Ty,
+		    Int32Ty, NULL));
 
     Function *newBignumFunc =
 	cast<Function>(module->getOrInsertFunction("rb_bignum_new_retained",
@@ -5670,28 +5678,32 @@
 
 	    case T_REGEXP:
 		{
-		    struct RRegexp *re = (struct RRegexp *)val;
+		    const UChar *chars = NULL;
+		    long chars_len = 0;
 
+		    regexp_get_uchars(val, &chars, &chars_len);
+
 		    Value *re_str;
-		    if (re->len == 0) {
-			re_str = compile_const_pointer(NULL, NULL);
+		    if (chars_len == 0) {
+			re_str = ConstantPointerNull::get(
+				PointerType::getUnqual(Int16Ty));
 		    }
 		    else {
-			GlobalVariable *rename_gvar =
-			    compile_const_global_string(re->str, re->len);
+			GlobalVariable *re_name_gvar =
+			    compile_const_global_ustring(chars, chars_len);
 
 			std::vector<Value *> idxs;
 			idxs.push_back(ConstantInt::get(Int32Ty, 0));
 			idxs.push_back(ConstantInt::get(Int32Ty, 0));
-			re_str = GetElementPtrInst::Create(rename_gvar,
+			re_str = GetElementPtrInst::Create(re_name_gvar,
 				idxs.begin(), idxs.end(), "");
 		    }
 
 		    std::vector<Value *> params;
 		    params.push_back(re_str);
-		    params.push_back(ConstantInt::get(Int32Ty, re->len));
+		    params.push_back(ConstantInt::get(Int32Ty, chars_len));
 		    params.push_back(ConstantInt::get(Int32Ty,
-				re->ptr->options));
+				rb_reg_options(val)));
 
 		    Instruction *call = CallInst::Create(newRegexp2Func,
 			    params.begin(), params.end(), "");

Modified: MacRuby/trunk/compiler.h
===================================================================
--- MacRuby/trunk/compiler.h	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/compiler.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -315,7 +315,7 @@
 	    return compile_const_global_string(str, strlen(str));
 	}
 	GlobalVariable *compile_const_global_ustring(const UniChar *str,
-		const size_t str_len, CFHashCode hash);
+		const size_t str_len);
 
 	Value *compile_arity(rb_vm_arity_t &arity);
 	Instruction *compile_range(Value *beg, Value *end, bool exclude_end,

Modified: MacRuby/trunk/complex.c
===================================================================
--- MacRuby/trunk/complex.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/complex.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -7,7 +7,6 @@
 
 #include "ruby.h"
 #include <math.h>
-#include "ruby/re.h"
 #include "ruby/node.h"
 #include "vm.h"
 #include "id.h"

Modified: MacRuby/trunk/dispatcher.cpp
===================================================================
--- MacRuby/trunk/dispatcher.cpp	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/dispatcher.cpp	2010-03-12 23:56:52 UTC (rev 3746)
@@ -16,6 +16,8 @@
 #include "dtrace.h"
 #include "array.h"
 #include "hash.h"
+#include "encoding.h"
+#include "re.h"
 
 #include <execinfo.h>
 #include <dlfcn.h>
@@ -838,10 +840,6 @@
 	    if (self == Qfalse) {
 		return rb_cFalseClass;
 	    }
-	    if (klass == (Class)rb_cCFString) {
-		return RSTRING_IMMUTABLE(self)
-		    ? rb_cNSString : rb_cNSMutableString;
-	    }
 	    return rb_class_real((VALUE)klass);
 	}
 
@@ -1243,22 +1241,31 @@
 	    return self == other ? Qtrue : Qfalse;
 
 	case T_STRING:
+	    if (self == other) {
+		return Qtrue;
+	    }
+	    if (TYPE(other) != self_type) {
+		return Qfalse;
+	    }
+	    return rb_str_equal(self, other);
+
 	case T_ARRAY:
-	case T_HASH:
 	    if (self == other) {
 		return Qtrue;
 	    }
 	    if (TYPE(other) != self_type) {
 		return Qfalse;
 	    }
-	    if (self_type == T_ARRAY) {
-		return rb_ary_equal(self, other);
+	    return rb_ary_equal(self, other);
+
+	case T_HASH:
+	    if (self == other) {
+		return Qtrue;
 	    }
-	    if (self_type == T_HASH) {
-		return rb_hash_equal(self, other);
+	    if (TYPE(other) != self_type) {
+		return Qfalse;
 	    }
-	    return CFEqual((CFTypeRef)self, (CFTypeRef)other)
-		? Qtrue : Qfalse;
+	    return rb_hash_equal(self, other);
 
 	case T_BIGNUM:
 	    return rb_big_eq(self, other);
@@ -1287,7 +1294,7 @@
 	    return rb_str_equal(self, other);
 
 	case T_REGEXP:
-	    return rb_reg_eqq(self, selEqq, other);
+	    return regexp_eqq(self, selEqq, other);
 
 	case T_SYMBOL:
 	    return (self == other ? Qtrue : Qfalse);
@@ -1344,12 +1351,14 @@
 		}
 		break;
 
+#if 0 // TODO
 	    case T_STRING:
 		if (*(VALUE *)obj == rb_cCFString) {
 		    rb_str_concat(obj, other);
 		    return obj;
 		}
 		break;
+#endif
 	}
     }
     return __rb_vm_dispatch(GET_VM(), cache, 0, obj, NULL, selLTLT, NULL, 0, 1,

Modified: MacRuby/trunk/encoding.c
===================================================================
--- MacRuby/trunk/encoding.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/encoding.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,5 +1,5 @@
 /* 
- * MacRuby implementation of Ruby 1.9's encoding.c.
+ * MacRuby implementation of Ruby 1.9 String.
  *
  * This file is covered by the Ruby license. See COPYING for more details.
  * 
@@ -9,594 +9,388 @@
  * Copyright (C) 2000 Information-technology Promotion Agency, Japan
  */
 
-#include "ruby/ruby.h"
+#include <string.h>
+
+#include "ruby.h"
 #include "ruby/encoding.h"
-#include "regenc.h"
-#include <ctype.h>
-#ifdef HAVE_LANGINFO_H
-#include <langinfo.h>
-#endif
+#include "encoding.h"
 
-static ID id_encoding, id_base_encoding;
 VALUE rb_cEncoding;
 
-static CFMutableDictionaryRef __encodings = NULL;
+static rb_encoding_t *default_internal = NULL;
+static rb_encoding_t *default_external = NULL;
+rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
 
+static void str_undefined_update_flags(rb_str_t *self) { abort(); }
+static void str_undefined_make_data_binary(rb_str_t *self) { abort(); }
+static bool str_undefined_try_making_data_uchars(rb_str_t *self) { abort(); }
+static long str_undefined_length(rb_str_t *self, bool ucs2_mode) { abort(); }
+static long str_undefined_bytesize(rb_str_t *self) { abort(); }
+static character_boundaries_t str_undefined_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode) { abort(); }
+static long str_undefined_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes, bool ucs2_mode) { abort(); }
+
 static VALUE
-enc_new(const CFStringEncoding *enc)
+mr_enc_s_list(VALUE klass, SEL sel)
 {
-    return Data_Wrap_Struct(rb_cEncoding, NULL, NULL, (void *)enc);
+    VALUE ary = rb_ary_new2(ENCODINGS_COUNT);
+    for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
+	rb_ary_push(ary, (VALUE)rb_encodings[i]);
+    }
+    return ary;
 }
 
-static void
-enc_init_db(void)
+static VALUE
+mr_enc_s_name_list(VALUE klass, SEL sel)
 {
-    const CFStringEncoding *e;
-
-    __encodings = CFDictionaryCreateMutable(NULL, 0, NULL, &kCFTypeDictionaryValueCallBacks);
-    
-    /* XXX CFStringGetListOfAvailableEncodings() is a costly call and should
-     * be called on demand and not by default when the interpreter starts.
-     */
-    e = CFStringGetListOfAvailableEncodings();
-    while (e != NULL && *e != kCFStringEncodingInvalidId) {
-	VALUE iana;
-	VALUE encoding;
-
-	encoding = enc_new(e);
-
-	iana = (VALUE)CFStringConvertEncodingToIANACharSetName(*e);
-	if (iana != 0) {
-	    const char *name;
-
-	    name = RSTRING_PTR(iana);
-
-	    // new_name = name.gsub(/-/, '_').upcase
-	    char *new_name = alloca(strlen(name));
-	    strcpy(new_name, name);
-	    char *p = strchr(name, '-');
-	    if (p != NULL) {
-		p = new_name + (p - name);
-		do {
-		    *p = '_';
-		    p++;
-		    p = strchr(p, '-');	
-		}
-		while (p != NULL);
-	    }
-	    p = new_name;
-	    while (*p != '\0') {
-		if (islower(*p)) {
-		    *p = toupper(*p);
-		}
-		p++;
-	    }
-
-	    ID encoding_id = rb_intern(new_name);
-	    if (!rb_const_defined(rb_cEncoding, encoding_id)) {
-		rb_const_set(rb_cEncoding, encoding_id, encoding);
-	    }
+    VALUE ary = rb_ary_new();
+    for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
+	rb_encoding_t *encoding = RENC(rb_encodings[i]);
+	// TODO: use US-ASCII strings
+	rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
+	for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
+	    rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[j]));
 	}
-	CFDictionarySetValue(__encodings, (const void *)iana, 
-	    (const void *)encoding);
-	e++;
     }
-
-    assert(CFDictionaryGetCount((CFDictionaryRef)__encodings) > 0);
-
-    // Define shortcuts.
-    rb_define_const(rb_cEncoding, "ASCII_8BIT",
-	    rb_const_get(rb_cEncoding, rb_intern("US_ASCII")));
-    rb_define_const(rb_cEncoding, "BINARY",
-	    rb_const_get(rb_cEncoding, rb_intern("US_ASCII")));
+    return ary;
 }
 
 static VALUE
-enc_make(const CFStringEncoding *enc)
+mr_enc_s_aliases(VALUE klass, SEL sel)
 {
-    VALUE iana, v;
-
-    assert(enc != NULL);
-    iana = (VALUE)CFStringConvertEncodingToIANACharSetName(*enc);
-    v = (VALUE)CFDictionaryGetValue((CFDictionaryRef)__encodings, 
-	(const void *)iana);
-    assert(v != 0);
-    return v;
+    VALUE hash = rb_hash_new();
+    for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
+	rb_encoding_t *encoding = RENC(rb_encodings[i]);
+	for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
+	    rb_hash_aset(hash, rb_usascii_str_new2(encoding->aliases[j]),
+		    rb_usascii_str_new2(encoding->public_name));
+	}
+    }
+    return hash;
 }
 
-VALUE
-rb_enc_from_encoding(rb_encoding *enc)
+static VALUE
+mr_enc_s_find(VALUE klass, SEL sel, VALUE name)
 {
-    return enc_make(enc);
+    StringValue(name);
+    rb_encoding_t *enc = rb_enc_find(RSTRING_PTR(name));
+    if (enc == NULL) {
+	rb_raise(rb_eArgError, "unknown encoding name - %s",
+		RSTRING_PTR(name));
+    }
+    return (VALUE)enc;
 }
 
-static inline CFStringEncoding
-rb_enc_to_enc(VALUE v)
+static VALUE
+mr_enc_s_default_internal(VALUE klass, SEL sel)
 {
-    return *(CFStringEncoding *)DATA_PTR(v);
+    return (VALUE)default_internal;
 }
 
-static inline CFStringEncoding *
-rb_enc_to_enc_ptr(VALUE v)
+static VALUE
+mr_enc_s_default_external(VALUE klass, SEL sel)
 {
-    return (CFStringEncoding *)DATA_PTR(v);
+    return (VALUE)default_external;
 }
 
-rb_encoding *
-rb_to_encoding(VALUE v)
+static VALUE
+mr_enc_name(VALUE self, SEL sel)
 {
-    if (TYPE(v) == T_STRING)
-	return rb_enc_find2(v);
-    return rb_enc_to_enc_ptr(v);
+    return rb_usascii_str_new2(RENC(self)->public_name);
 }
 
-/*
- * call-seq:
- *   enc.dummy? => true or false
- *
- * Returns true for dummy encodings.
- * A dummy encoding is an encoding for which character handling is not properly
- * implemented.
- * It is used for stateful encodings.
- *
- *   Encoding::ISO_2022_JP.dummy?       #=> true
- *   Encoding::UTF_8.dummy?             #=> false
- *
- */
 static VALUE
-enc_dummy_p(VALUE enc, SEL sel)
+mr_enc_inspect(VALUE self, SEL sel)
 {
-    return rb_enc_dummy_p(rb_to_encoding(enc)) ? Qtrue : Qfalse;
+    return rb_sprintf("#<%s:%s>", rb_obj_classname(self),
+	    RENC(self)->public_name);
 }
 
-ID
-rb_id_encoding(void)
+static VALUE
+mr_enc_names(VALUE self, SEL sel)
 {
-    if (!id_encoding) {
-	id_encoding = rb_intern("encoding");
-    }
-    return id_encoding;
-}
+    rb_encoding_t *encoding = RENC(self);
 
-rb_encoding*
-rb_enc_compatible(VALUE str1, VALUE str2)
-{
-    /* TODO */
-    rb_encoding *enc = rb_enc_get(str1);
-    if (enc == rb_enc_get(str2))
-	return enc;
-    return NULL;
-}
-
-/*
- *  call-seq:
- *     obj.encoding   => encoding
- *
- *  Returns the Encoding object that represents the encoding of obj.
- */
-
-VALUE
-rb_obj_encoding(VALUE obj, SEL sel)
-{
-    rb_encoding *enc = rb_enc_get(obj);
-    if (!enc) {
-	rb_raise(rb_eTypeError, "unknown encoding");
+    VALUE ary = rb_ary_new2(encoding->aliases_count + 1);
+    rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
+    for (unsigned int i = 0; i < encoding->aliases_count; ++i) {
+	rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[i]));
     }
-    return rb_enc_from_encoding(enc);
+    return ary;
 }
 
-/*
- * call-seq:
- *   enc.inspect => string
- *
- * Returns a string which represents the encoding for programmers.
- *
- *   Encoding::UTF_8.inspect       #=> "#<Encoding:UTF-8>"
- *   Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
- */
 static VALUE
-enc_inspect(VALUE self, SEL sel)
+mr_enc_ascii_compatible_p(VALUE self, SEL sel)
 {
-    char buffer[512];
-    VALUE enc_name;
-    long n;
-
-    enc_name = (VALUE)CFStringGetNameOfEncoding(rb_enc_to_enc(self));
-    
-    n = snprintf(buffer, sizeof buffer, "#<%s:%s>", rb_obj_classname(self),
-	RSTRING_PTR(enc_name));
-
-    return rb_str_new(buffer, n);
+    return RENC(self)->ascii_compatible ? Qtrue : Qfalse;
 }
 
-/*
- * call-seq:
- *   enc.name => string
- *
- * Returns the name of the encoding.
- *
- *   Encoding::UTF_8.name       => "UTF-8"
- */
 static VALUE
-enc_name(VALUE self, SEL sel)
+mr_enc_dummy_p(VALUE self, SEL sel)
 {
-    return rb_enc_name2(rb_enc_to_enc_ptr(self));
+    return Qfalse;
 }
 
-static VALUE
-enc_base_encoding(VALUE self, SEL sel)
+static void
+define_encoding_constant(const char *name, rb_encoding_t *encoding)
 {
-    return rb_attr_get(self, id_base_encoding);
-}
+    char c = name[0];
+    if ((c >= '0') && (c <= '9')) {
+	// constants can't start with a number
+	return;
+    }
 
-/*
- * call-seq:
- *   Encoding.list => [enc1, enc2, ...]
- *
- * Returns the list of loaded encodings.
- *
- *   Encoding.list
- *   => [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
- *       #<Encoding:ISO-2022-JP (dummy)>]
- *
- *   Encoding.find("US-ASCII")
- *   => #<Encoding:US-ASCII>
- *
- *   Encoding.list
- *   => [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
- *       #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
- *
- */
-static VALUE
-enc_list(VALUE klass, SEL sel)
-{
-    VALUE ary;
-    const CFStringEncoding *e;
+    char *name_copy = strdup(name);
+    if ((c >= 'a') && (c <= 'z')) {
+	// the first character must be upper case
+	name_copy[0] = c - ('a' - 'A');
+    }
 
-    ary = rb_ary_new();
-    e = CFStringGetListOfAvailableEncodings();
-    while (e != NULL && *e != kCFStringEncodingInvalidId) {
-	rb_ary_push(ary, enc_make(e));
-	e++;
+    // '.' and '-' must be transformed into '_'
+    for (int i = 0; name_copy[i]; ++i) {
+	if ((name_copy[i] == '.') || (name_copy[i] == '-')) {
+	    name_copy[i] = '_';
+	}
     }
-    return ary;
+    rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
+    free(name_copy);
 }
 
-/*
- * call-seq:
- *   Encoding.find(string) => enc
- *   Encoding.find(symbol) => enc
- *
- * Search the encoding with specified <i>name</i>.
- * <i>name</i> should be a string or symbol.
- *
- *   Encoding.find("US-ASCII")  => #<Encoding:US-ASCII>
- *   Encoding.find(:Shift_JIS)  => #<Encoding:Shift_JIS>
- *
- */
-static VALUE
-enc_find2(VALUE enc)
+extern void enc_init_ucnv_encoding(rb_encoding_t *encoding);
+
+enum {
+    ENCODING_TYPE_SPECIAL = 0,
+    ENCODING_TYPE_UCNV
+};
+
+static void
+add_encoding(
+	unsigned int encoding_index, // index of the encoding in the encodings
+				     // array
+	unsigned int rb_encoding_type,
+	const char *public_name, // public name for the encoding
+	unsigned char min_char_size,
+	bool single_byte_encoding, // in the encoding a character takes only
+				   // one byte
+	bool ascii_compatible, // is the encoding ASCII compatible or not
+	... // aliases for the encoding (should no include the public name)
+	    // - must end with a NULL
+	)
 {
-    CFStringRef str;
-    CFStringEncoding e;
+    assert(encoding_index < ENCODINGS_COUNT);
 
-    str = (CFStringRef)StringValue(enc);
-    if (CFStringCompare(str, CFSTR("ASCII-8BIT"), 
-			kCFCompareCaseInsensitive) == 0) {
-	str = CFSTR("ASCII");
+    // create an array for the aliases
+    unsigned int aliases_count = 0;
+    va_list va_aliases;
+    va_start(va_aliases, ascii_compatible);
+    while (va_arg(va_aliases, const char *) != NULL) {
+	++aliases_count;
     }
-    else if (CFStringCompare(str, CFSTR("SJIS"), 
-	     kCFCompareCaseInsensitive) == 0) {
-	str = CFSTR("Shift-JIS");
+    va_end(va_aliases);
+    const char **aliases = (const char **)
+	malloc(sizeof(const char *) * aliases_count);
+    va_start(va_aliases, ascii_compatible);
+    for (unsigned int i = 0; i < aliases_count; ++i) {
+	aliases[i] = va_arg(va_aliases, const char *);
     }
+    va_end(va_aliases);
 
-    e = CFStringConvertIANACharSetNameToEncoding(str);
-    if (e == kCFStringEncodingInvalidId)
-	return Qnil;
-    return enc_make(&e);
-}
+    // create the MacRuby object
+    NEWOBJ(encoding, rb_encoding_t);
+    encoding->basic.flags = 0;
+    encoding->basic.klass = rb_cEncoding;
+    rb_encodings[encoding_index] = encoding;
+    GC_RETAIN(encoding); // it should never be deallocated
 
-static VALUE
-enc_find(VALUE klass, SEL sel, VALUE enc)
-{
-    VALUE e = enc_find2(enc);
-    if (e == Qnil) {
-	rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));
+    // fill the fields
+    encoding->index = encoding_index;
+    encoding->public_name = public_name;
+    encoding->min_char_size = min_char_size;
+    encoding->single_byte_encoding = single_byte_encoding;
+    encoding->ascii_compatible = ascii_compatible;
+    encoding->aliases_count = aliases_count;
+    encoding->aliases = aliases;
+
+    // fill the default implementations with aborts
+    encoding->methods.update_flags = str_undefined_update_flags;
+    encoding->methods.make_data_binary = str_undefined_make_data_binary;
+    encoding->methods.try_making_data_uchars =
+	str_undefined_try_making_data_uchars;
+    encoding->methods.length = str_undefined_length;
+    encoding->methods.bytesize = str_undefined_bytesize;
+    encoding->methods.get_character_boundaries =
+	str_undefined_get_character_boundaries;
+    encoding->methods.offset_in_bytes_to_index =
+	str_undefined_offset_in_bytes_to_index;
+
+    switch (rb_encoding_type) {
+	case ENCODING_TYPE_SPECIAL:
+	    break;
+	case ENCODING_TYPE_UCNV:
+	    enc_init_ucnv_encoding(encoding);
+	    break;
+	default:
+	    abort();
     }
-    return e;
 }
 
-/*
- * call-seq:
- *   Encoding.compatible?(str1, str2) => enc or nil
- *
- * Checks the compatibility of two strings.
- * If they are compatible, means concatenatable, 
- * returns an encoding which the concatinated string will be.
- * If they are not compatible, nil is returned.
- *
- *   Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
- *   => #<Encoding:ISO-8859-1>
- *
- *   Encoding.compatible?(
- *     "\xa1".force_encoding("iso-8859-1"),
- *     "\xa1\xa1".force_encoding("euc-jp"))
- *   => nil
- *
- */
-static VALUE
-enc_compatible_p(VALUE klass, SEL sel, VALUE str1, VALUE str2)
+// This Init function is called very early. Do not use any runtime method
+// because things may not be initialized properly yet.
+void
+Init_PreEncoding(void)
 {
-    rb_encoding *enc = rb_enc_compatible(str1, str2);
-    VALUE encoding = Qnil;
-    if (!enc || !(encoding = rb_enc_from_encoding(enc)))
-	encoding = Qnil;
-    return encoding;
-}
+    add_encoding(ENCODING_BINARY,    ENCODING_TYPE_SPECIAL, "ASCII-8BIT",  1, true,  true,  "BINARY", NULL);
+    add_encoding(ENCODING_ASCII,     ENCODING_TYPE_UCNV,    "US-ASCII",    1, true,  true,  "ASCII", "ANSI_X3.4-1968", "646", NULL);
+    add_encoding(ENCODING_UTF8,      ENCODING_TYPE_UCNV,    "UTF-8",       1, false, true,  "CP65001", NULL);
+    add_encoding(ENCODING_UTF16BE,   ENCODING_TYPE_UCNV,    "UTF-16BE",    2, false, false, NULL);
+    add_encoding(ENCODING_UTF16LE,   ENCODING_TYPE_UCNV,    "UTF-16LE",    2, false, false, NULL);
+    add_encoding(ENCODING_UTF32BE,   ENCODING_TYPE_UCNV,    "UTF-32BE",    4, false, false, "UCS-4BE", NULL);
+    add_encoding(ENCODING_UTF32LE,   ENCODING_TYPE_UCNV,    "UTF-32LE",    4, false, false, "UCS-4LE", NULL);
+    add_encoding(ENCODING_ISO8859_1, ENCODING_TYPE_UCNV,    "ISO-8859-1",  1, true,  true,  "ISO8859-1", NULL);
+    add_encoding(ENCODING_MACROMAN,  ENCODING_TYPE_UCNV,    "macRoman",    1, true,  true,  NULL);
+    // FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
+    //add_encoding(ENCODING_EUCJP,     ENCODING_TYPE_RUBY, "EUC-JP",      1, false, true,  "eucJP", NULL);
+    //add_encoding(ENCODING_SJIS,      ENCODING_TYPE_RUBY, "Shift_JIS",   1, false, true, "SJIS", NULL);
+    //add_encoding(ENCODING_CP932,     ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
 
-/* :nodoc: */
-static VALUE
-enc_dump(VALUE self, SEL sel, int argc, VALUE *argv)
-{
-    rb_scan_args(argc, argv, "01", 0);
-    return enc_name(self, 0);
+    default_external = rb_encodings[ENCODING_UTF8];
+    default_internal = rb_encodings[ENCODING_UTF8];
 }
 
-/* :nodoc: */
-static VALUE
-enc_load(VALUE klass, SEL sel, VALUE str)
+void
+Init_Encoding(void)
 {
-    return enc_find(klass, 0, str);
-}
+    // rb_cEncoding is defined earlier in Init_PreVM().
+    rb_set_class_path(rb_cEncoding, rb_cObject, "Encoding");
+    rb_const_set(rb_cObject, rb_intern("Encoding"), rb_cEncoding);
 
-static rb_encoding *default_external;
-    
-rb_encoding *
-rb_default_external_encoding(void)
-{
-    return default_external;
-}
+    rb_undef_alloc_func(rb_cEncoding);
 
-VALUE
-rb_enc_default_external(void)
-{
-    return enc_make(default_external);
-}
+    rb_objc_define_method(rb_cEncoding, "to_s", mr_enc_name, 0);
+    rb_objc_define_method(rb_cEncoding, "inspect", mr_enc_inspect, 0);
+    rb_objc_define_method(rb_cEncoding, "name", mr_enc_name, 0);
+    rb_objc_define_method(rb_cEncoding, "names", mr_enc_names, 0);
+    rb_objc_define_method(rb_cEncoding, "dummy?", mr_enc_dummy_p, 0);
+    rb_objc_define_method(rb_cEncoding, "ascii_compatible?",
+	    mr_enc_ascii_compatible_p, 0);
+    rb_objc_define_method(*(VALUE *)rb_cEncoding, "list", mr_enc_s_list, 0);
+    rb_objc_define_method(*(VALUE *)rb_cEncoding, "name_list",
+	    mr_enc_s_name_list, 0);
+    rb_objc_define_method(*(VALUE *)rb_cEncoding, "aliases",
+	    mr_enc_s_aliases, 0);
+    rb_objc_define_method(*(VALUE *)rb_cEncoding, "find", mr_enc_s_find, 1);
+    rb_objc_define_method(*(VALUE *)rb_cEncoding, "compatible?",
+	    mr_enc_s_is_compatible, 2); // in string.c
 
-/*
- * call-seq:
- *   Encoding.default_external => enc
- *
- * Returns default external encoding.
- *
- * It is initialized by the locale or -E option.
- */
-static VALUE
-get_default_external(VALUE klass, SEL sel)
-{
-    return rb_enc_default_external();
-}
+    //rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
+    //rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
 
-static VALUE
-set_default_external(VALUE klass, SEL sel, VALUE enc)
-{
-    // TODO
-    return enc;
+    rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external",
+	    mr_enc_s_default_external, 0);
+    //rb_define_singleton_method(rb_cEncoding, "default_external=", set_default_external, 1);
+    rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal",
+	    mr_enc_s_default_internal, 0);
+    //rb_define_singleton_method(rb_cEncoding, "default_internal=", set_default_internal, 1);
+    //rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
+
+    // Create constants.
+    for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
+	rb_encoding_t *enc = rb_encodings[i];
+	define_encoding_constant(enc->public_name, enc);
+	for (unsigned int j = 0; j < enc->aliases_count; j++) {
+	    define_encoding_constant(enc->aliases[j], enc);
+	}
+    }
 }
 
-void
-rb_enc_set_default_external(VALUE encoding)
+// MRI C-API compatibility.
+
+rb_encoding_t *
+rb_enc_find(const char *name)
 {
-    default_external = rb_enc_to_enc_ptr(encoding);
+    for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
+	rb_encoding_t *enc = rb_encodings[i];
+	if (strcasecmp(enc->public_name, name) == 0) {
+	    return enc;
+	}
+	for (unsigned int j = 0; j < enc->aliases_count; j++) {
+	    const char *alias = enc->aliases[j];
+	    if (strcasecmp(alias, name) == 0) {
+		return enc;
+	    }
+	}
+    }
+    return NULL;
 }
 
-/*
- * call-seq:
- *   Encoding.locale_charmap => string
- *
- * Returns the locale charmap name.
- *
- *   Debian GNU/Linux
- *     LANG=C
- *       Encoding.locale_charmap  => "ANSI_X3.4-1968"
- *     LANG=ja_JP.EUC-JP
- *       Encoding.locale_charmap  => "EUC-JP"
- *
- *   SunOS 5
- *     LANG=C
- *       Encoding.locale_charmap  => "646"
- *     LANG=ja
- *       Encoding.locale_charmap  => "eucJP"
- *
- */
-static VALUE
-rb_locale_charmap(VALUE klass, SEL sel)
+VALUE
+rb_enc_from_encoding(rb_encoding_t *enc)
 {
-    CFStringEncoding enc = CFStringGetSystemEncoding();
-    return (VALUE)CFStringConvertEncodingToIANACharSetName(enc);
+    return (VALUE)enc;
 }
 
-/*
- * call-seq:
- *   Encoding.name_list => ["enc1", "enc2", ...]
- *
- * Returns the list of available encoding names.
- *
- *   Encoding.name_list
- *   => ["US-ASCII", "ASCII-8BIT", "UTF-8",
- *       "ISO-8859-1", "Shift_JIS", "EUC-JP",
- *       "Windows-31J",
- *       "BINARY", "CP932", "eucJP"]
- *
- * This list doesn't include dummy encodings.
- *
- */
-
-static VALUE
-rb_enc_name_list(VALUE klass, SEL sel)
+rb_encoding_t *
+rb_enc_get(VALUE obj)
 {
-    VALUE ary, list;
-    long i, count;
-
-    ary = rb_ary_new();
-    list = enc_list(klass, 0);
-    for (i = 0, count = RARRAY_LEN(list); i < count; i++) {
-	rb_ary_push(ary, enc_name(RARRAY_AT(list, i), 0));
+    if (IS_RSTR(obj)) {
+	return RSTR(obj)->encoding;
     }
-    return ary;
+    // TODO support symbols
+    return NULL;
 }
 
-/*
- * call-seq:
- *   Encoding.aliases => {"alias1" => "orig1", "alias2" => "orig2", ...}
- *
- * Returns the hash of available encoding alias and original encoding name.
- *
- *   Encoding.aliases
- *   => {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
- *       "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
- *
- */
-
-static VALUE
-rb_enc_aliases(VALUE klass, SEL sel)
+rb_encoding_t *
+rb_to_encoding(VALUE obj)
 {
-    /* TODO: the CFString IANA <-> charset code does support aliases, we should
-     * find a way to return them here. 
-     */
-    return rb_hash_new();
-}
-
-VALUE
-rb_enc_name2(rb_encoding *enc)
-{
-    if (enc != NULL) {
-	CFStringRef str = CFStringConvertEncodingToIANACharSetName(*enc);
-	if (str != NULL) {
-	    VALUE name = rb_str_dup((VALUE)str);
-	    CFStringUppercase((CFMutableStringRef)name, NULL);
-	    return name;
+    rb_encoding_t *enc;
+    if (CLASS_OF(obj) == rb_cEncoding) {
+	enc = RENC(obj);
+    }
+    else {
+	StringValue(obj);
+	enc = rb_enc_find(RSTRING_PTR(obj));
+	if (enc == NULL) {
+	    rb_raise(rb_eArgError, "unknown encoding name - %s",
+		    RSTRING_PTR(obj));
 	}
     }
-    return Qnil;
+    return enc;
 }
 
 const char *
-rb_enc_name(rb_encoding *enc)
+rb_enc_name(rb_encoding_t *enc)
 {
-    VALUE str = rb_enc_name2(enc);
-    return str == Qnil ? NULL : RSTRING_PTR(str);
+    return RENC(enc)->public_name;
 }
 
-long 
-rb_enc_mbminlen(rb_encoding *enc)
+VALUE
+rb_enc_name2(rb_encoding_t *enc)
 {
-    return rb_enc_mbmaxlen(enc);
+    return rb_usascii_str_new2(rb_enc_name(enc));
 }
 
 long
-rb_enc_mbmaxlen(rb_encoding *enc)
+rb_enc_mbminlen(rb_encoding_t *enc)
 {
-    return enc == NULL
-	? 1 : CFStringGetMaximumSizeForEncoding(1, *enc);
+    return enc->min_char_size;    
 }
 
-rb_encoding *
-rb_enc_find(const char *name)
+long
+rb_enc_mbmaxlen(rb_encoding_t *enc)
 {
-    return rb_enc_find2(rb_str_new2(name));
+    return enc->single_byte_encoding ? 1 : 10; // XXX 10?
 }
 
-rb_encoding *
-rb_enc_find2(VALUE name)
-{
-    VALUE e = enc_find2(name);
-    return e == Qnil ? NULL : rb_enc_to_enc_ptr(e);
-}
-
-rb_encoding *
-rb_enc_get(VALUE obj)
-{
-    CFStringEncoding enc = kCFStringEncodingInvalidId;
-
-    switch (TYPE(obj)) {
-	case T_STRING:
-	    enc = *(VALUE *)obj == rb_cByteString
-		? kCFStringEncodingASCII
-		: CFStringGetFastestEncoding((CFStringRef)obj);
-	    break;
-    }
-
-    if (enc == kCFStringEncodingInvalidId) {
-	return NULL;
-    }
-    return rb_enc_to_enc_ptr(enc_make(&enc));
-}
-
-rb_encoding *
+rb_encoding_t *
 rb_locale_encoding(void)
 {
-    CFStringEncoding enc = CFStringGetSystemEncoding();
-    return rb_enc_to_enc_ptr(enc_make(&enc));
+    // XXX
+    return rb_encodings[ENCODING_UTF8];
 }
 
 void
-Init_Encoding(void)
+rb_enc_set_default_external(VALUE encoding)
 {
-    id_base_encoding = rb_intern("#base_encoding");
-
-    rb_cEncoding = rb_define_class("Encoding", rb_cObject);
-    rb_undef_alloc_func(rb_cEncoding);
-    rb_objc_define_method(rb_cEncoding, "to_s", enc_name, 0);
-    rb_objc_define_method(rb_cEncoding, "inspect", enc_inspect, 0);
-    rb_objc_define_method(rb_cEncoding, "name", enc_name, 0);
-    rb_objc_define_method(rb_cEncoding, "base_encoding", enc_base_encoding, 0);
-    rb_objc_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0);
-    rb_objc_define_method(*(VALUE *)rb_cEncoding, "list", enc_list, 0);
-    rb_objc_define_method(*(VALUE *)rb_cEncoding, "name_list", rb_enc_name_list, 0);
-    rb_objc_define_method(*(VALUE *)rb_cEncoding, "aliases", rb_enc_aliases, 0);
-    rb_objc_define_method(*(VALUE *)rb_cEncoding, "find", enc_find, 1);
-    rb_objc_define_method(*(VALUE *)rb_cEncoding, "compatible?", enc_compatible_p, 2);
-
-    rb_objc_define_method(rb_cEncoding, "_dump", enc_dump, -1);
-    rb_objc_define_method(*(VALUE *)rb_cEncoding, "_load", enc_load, 1);
-
-    rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external", get_default_external, 0);
-    rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external=", set_default_external, 1);
-    rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal", get_default_external, 0); // TODO
-    rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal=", set_default_external, 1); // TODO
-    rb_objc_define_method(*(VALUE *)rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
-
-    enc_init_db();
+    assert(CLASS_OF(encoding) == rb_cEncoding);
+    default_external = RENC(encoding); 
 }
 
-/* locale insensitive functions */
-
-#define ctype_test(c, ctype) \
-    (rb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), ctype))
-
-int rb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); }
-int rb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); }
-int rb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); }
-int rb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); }
-int rb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); }
-int rb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); }
-int rb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); }
-int rb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); }
-int rb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); }
-int rb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); }
-int rb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); }
-int rb_isxdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_XDIGIT); }
-
-int
-rb_tolower(int c)
-{
-    return rb_isascii(c) ? ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) : c;
-}
-
-int
-rb_toupper(int c)
-{
-    return rb_isascii(c) ? ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) : c;
-}
-

Copied: MacRuby/trunk/encoding.h (from rev 3744, MacRuby/branches/icu/encoding.h)
===================================================================
--- MacRuby/trunk/encoding.h	                        (rev 0)
+++ MacRuby/trunk/encoding.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,314 @@
+/* 
+ * MacRuby implementation of Ruby 1.9 String.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ * 
+ * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
+ * Copyright (C) 1993-2007 Yukihiro Matsumoto
+ * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
+ * Copyright (C) 2000 Information-technology Promotion Agency, Japan
+ */
+
+#ifndef __ENCODING_H_
+#define __ENCODING_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if defined(__cplusplus)
+# include "unicode/unistr.h"
+#else
+# include "unicode/ustring.h"
+#endif
+
+#if __LITTLE_ENDIAN__
+#define ENCODING_UTF16_NATIVE ENCODING_UTF16LE
+#define ENCODING_UTF32_NATIVE ENCODING_UTF32LE
+#define ENCODING_UTF16_NON_NATIVE ENCODING_UTF16BE
+#define ENCODING_UTF32_NON_NATIVE ENCODING_UTF32BE
+#else
+#define ENCODING_UTF16_NATIVE ENCODING_UTF16BE
+#define ENCODING_UTF32_NATIVE ENCODING_UTF32BE
+#define ENCODING_UTF16_NON_NATIVE ENCODING_UTF16LE
+#define ENCODING_UTF32_NON_NATIVE ENCODING_UTF32LE
+#endif
+
+#define NATIVE_UTF16_ENC(encoding) \
+    ((encoding) == rb_encodings[ENCODING_UTF16_NATIVE])
+#define NON_NATIVE_UTF16_ENC(encoding) \
+    ((encoding) == rb_encodings[ENCODING_UTF16_NON_NATIVE])
+#define UTF16_ENC(encoding) \
+    (NATIVE_UTF16_ENC(encoding) || NON_NATIVE_UTF16_ENC(encoding))
+#define NATIVE_UTF32_ENC(encoding) \
+    ((encoding) == rb_encodings[ENCODING_UTF32_NATIVE])
+#define NON_NATIVE_UTF32_ENC(encoding) \
+    ((encoding) == rb_encodings[ENCODING_UTF32_NON_NATIVE])
+#define UTF32_ENC(encoding) \
+    (NATIVE_UTF32_ENC(encoding) || NON_NATIVE_UTF32_ENC(encoding))
+#define BINARY_ENC(encoding) ((encoding) == rb_encodings[ENCODING_BINARY])
+
+typedef uint8_t str_flag_t;
+
+typedef struct {
+    struct RBasic basic;
+    struct rb_encoding *encoding;
+    long capacity_in_bytes;
+    long length_in_bytes;
+    union {
+	char *bytes;
+	UChar *uchars;
+    } data;
+    str_flag_t flags;
+} rb_str_t;
+
+#define RSTR(x) ((rb_str_t *)x)
+
+static inline bool
+rb_klass_is_rstr(VALUE klass)
+{
+    do {
+	if (klass == rb_cRubyString) {
+	    return true;
+	}
+	if (klass == rb_cNSString) {
+	    return false;
+	}
+	klass = RCLASS_SUPER(klass);
+    }
+    while (klass != 0);
+    return false;
+}
+
+#define IS_RSTR(x) (rb_klass_is_rstr(*(VALUE *)x))
+
+static inline void
+rstr_modify(VALUE str)
+{
+    const long mask = RBASIC(str)->flags;
+    if ((mask & FL_FREEZE) == FL_FREEZE) {
+        rb_raise(rb_eRuntimeError, "can't modify frozen/immutable string");
+    }
+    if ((mask & FL_TAINT) == FL_TAINT && rb_safe_level() >= 4) {
+        rb_raise(rb_eSecurityError, "Insecure: can't modify string");
+    }
+}
+
+static inline void
+rstr_frozen_check(VALUE str)
+{
+    const long mask = RBASIC(str)->flags;
+    if ((mask & FL_FREEZE) == FL_FREEZE) {
+	rb_raise(rb_eRuntimeError, "string frozen");
+    }
+}
+
+typedef struct {
+    long start_offset_in_bytes;
+    long end_offset_in_bytes;
+} character_boundaries_t;
+
+typedef struct {
+    void (*update_flags)(rb_str_t *);
+    void (*make_data_binary)(rb_str_t *);
+    bool (*try_making_data_uchars)(rb_str_t *);
+    long (*length)(rb_str_t *, bool);
+    long (*bytesize)(rb_str_t *);
+    character_boundaries_t (*get_character_boundaries)(rb_str_t *, long, bool);
+    long (*offset_in_bytes_to_index)(rb_str_t *, long, bool);
+} encoding_methods_t;
+
+typedef struct rb_encoding {
+    struct RBasic basic;
+    unsigned int index;
+    const char *public_name;
+    const char **aliases;
+    unsigned int aliases_count;
+    unsigned char min_char_size;
+    bool single_byte_encoding : 1;
+    bool ascii_compatible : 1;
+    encoding_methods_t methods;
+    void *private_data;
+} rb_encoding_t;
+
+#define RENC(x) ((rb_encoding_t *)(x))
+
+enum {
+    ENCODING_BINARY = 0,
+    ENCODING_ASCII,
+    ENCODING_UTF8,
+    ENCODING_UTF16BE,
+    ENCODING_UTF16LE,
+    ENCODING_UTF32BE,
+    ENCODING_UTF32LE,
+    ENCODING_ISO8859_1,
+    ENCODING_MACROMAN,
+    //ENCODING_EUCJP,
+    //ENCODING_SJIS,
+    //ENCODING_CP932,
+
+    ENCODINGS_COUNT
+};
+
+extern rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
+
+#define STRING_HAS_SUPPLEMENTARY     0x020
+#define STRING_HAS_SUPPLEMENTARY_SET 0x010
+#define STRING_ASCII_ONLY_SET        0x010
+#define STRING_ASCII_ONLY            0x008
+#define STRING_VALID_ENCODING_SET    0x004
+#define STRING_VALID_ENCODING        0x002
+#define STRING_STORED_IN_UCHARS      0x001
+
+#define STRING_REQUIRED_FLAGS STRING_STORED_IN_UCHARS
+
+#define BYTES_TO_UCHARS(len) ((len) / sizeof(UChar))
+#define UCHARS_TO_BYTES(len) ((len) * sizeof(UChar))
+
+#define ODD_NUMBER(x) ((x) & 0x1)
+
+static inline long
+div_round_up(long a, long b)
+{
+    return ((a) + (b - 1)) / b;
+}
+
+void str_update_flags(rb_str_t *self);
+
+static inline void
+str_unset_facultative_flags(rb_str_t *self)
+{
+    self->flags &= ~STRING_HAS_SUPPLEMENTARY_SET & ~STRING_ASCII_ONLY_SET
+	& ~STRING_VALID_ENCODING_SET;
+}
+
+static inline bool
+str_known_to_have_an_invalid_encoding(rb_str_t *self)
+{
+    return (self->flags & (STRING_VALID_ENCODING_SET
+		| STRING_VALID_ENCODING)) == STRING_VALID_ENCODING_SET;
+}
+
+static inline bool
+str_known_not_to_have_any_supplementary(rb_str_t *self)
+{
+    return (self->flags & (STRING_HAS_SUPPLEMENTARY_SET
+		| STRING_HAS_SUPPLEMENTARY)) == STRING_HAS_SUPPLEMENTARY_SET;
+}
+
+static inline bool
+str_check_flag_and_update_if_needed(rb_str_t *self, str_flag_t flag_set,
+	str_flag_t flag)
+{
+    if (!(self->flags & flag_set)) {
+	str_update_flags(self);
+	assert(self->flags & flag_set);
+    }
+    return self->flags & flag;
+}
+
+static inline bool
+str_is_valid_encoding(rb_str_t *self)
+{
+    return str_check_flag_and_update_if_needed(self, STRING_VALID_ENCODING_SET,
+	    STRING_VALID_ENCODING);
+}
+
+static inline bool
+str_is_ascii_only(rb_str_t *self)
+{
+    return str_check_flag_and_update_if_needed(self, STRING_ASCII_ONLY_SET,
+	    STRING_ASCII_ONLY);
+}
+
+static inline bool
+str_is_ruby_ascii_only(rb_str_t *self)
+{
+    // for MRI, a string in a non-ASCII-compatible encoding (like UTF-16)
+    // containing only ASCII characters is not "ASCII only" though for us it
+    // is internally
+    if (!self->encoding->ascii_compatible) {
+	return false;
+    }
+    return str_is_ascii_only(self);
+}
+
+static inline bool
+str_is_stored_in_uchars(rb_str_t *self)
+{
+    return self->flags & STRING_STORED_IN_UCHARS;
+}
+
+static inline void
+str_negate_stored_in_uchars(rb_str_t *self)
+{
+    self->flags ^= STRING_STORED_IN_UCHARS;
+}
+
+static inline void
+str_set_stored_in_uchars(rb_str_t *self, bool status)
+{
+    if (status) {
+	self->flags |= STRING_STORED_IN_UCHARS;
+    }
+    else {
+	self->flags &= ~STRING_STORED_IN_UCHARS;
+    }
+}
+
+static inline void
+str_set_facultative_flag(rb_str_t *self, bool status, str_flag_t flag_set,
+	str_flag_t flag)
+{
+    if (status) {
+	self->flags = self->flags | flag_set | flag;
+    }
+    else {
+	self->flags = (self->flags | flag_set) & ~flag;
+    }
+}
+
+static inline void
+str_set_has_supplementary(rb_str_t *self, bool status)
+{
+    str_set_facultative_flag(self, status, STRING_HAS_SUPPLEMENTARY_SET,
+	    STRING_HAS_SUPPLEMENTARY);
+}
+
+static inline void
+str_set_ascii_only(rb_str_t *self, bool status)
+{
+    str_set_facultative_flag(self, status, STRING_ASCII_ONLY_SET,
+	    STRING_ASCII_ONLY);
+}
+
+static inline void
+str_set_valid_encoding(rb_str_t *self, bool status)
+{
+    str_set_facultative_flag(self, status, STRING_VALID_ENCODING_SET,
+	    STRING_VALID_ENCODING);
+}
+
+VALUE mr_enc_s_is_compatible(VALUE klass, SEL sel, VALUE str1, VALUE str2);
+VALUE rb_str_intern_fast(VALUE str);
+VALUE rstr_aref(VALUE str, SEL sel, int argc, VALUE *argv);
+
+// The following functions should always been prefered over anything else,
+// especially if this "else" is RSTRING_PTR and RSTRING_LEN.
+// They also work on CFStrings.
+VALUE rb_unicode_str_new(const UniChar *ptr, const size_t len);
+void rb_str_get_uchars(VALUE str, UChar **chars_p, long *chars_len_p,
+	bool *need_free_p);
+long rb_str_chars_len(VALUE str);
+UChar rb_str_get_uchar(VALUE str, long pos);
+void rb_str_append_uchar(VALUE str, UChar c);
+void rb_str_append_uchars(VALUE str, const UChar *chars, long len);
+unsigned long rb_str_hash_uchars(const UChar *chars, long chars_len);
+long rb_uchar_strtol(UniChar *chars, long chars_len, long pos,
+	long *end_offset);
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+#endif /* __ENCODING_H_ */

Modified: MacRuby/trunk/error.c
===================================================================
--- MacRuby/trunk/error.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/error.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -848,12 +848,10 @@
     return rb_attr_get(self, rb_intern("args"));
 }
 
-VALUE rb_str_inspect(VALUE, SEL);
-
 void
 rb_invalid_str(const char *str, const char *type)
 {
-    VALUE s = rb_str_inspect(rb_str_new2(str), 0);
+    VALUE s = rb_str_inspect(rb_str_new2(str));
 
     rb_raise(rb_eArgError, "invalid value for %s: %s", type, RSTRING_PTR(s));
 }

Modified: MacRuby/trunk/eval.c
===================================================================
--- MacRuby/trunk/eval.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/eval.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -39,6 +39,7 @@
 void Init_PreGC(void);
 void Init_PreVM(void);
 void Init_PreGCD(void);
+void Init_PreEncoding(void);
 
 bool ruby_dlog_enabled = false;
 FILE *ruby_dlog_file = NULL;
@@ -72,9 +73,10 @@
 	}
     }
 
-    Init_PreGC();
-    Init_PreVM();
-    Init_PreGCD();
+    Init_PreGC(); 	// requires nothing
+    Init_PreVM(); 	// requires nothing
+    Init_PreGCD(); 	// requires nothing
+    Init_PreEncoding(); // requires rb_cEncoding, GC
 
     rb_call_inits();
     ruby_prog_init();

Modified: MacRuby/trunk/ext/bigdecimal/bigdecimal.c
===================================================================
--- MacRuby/trunk/ext/bigdecimal/bigdecimal.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/bigdecimal/bigdecimal.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -2523,9 +2523,7 @@
 
     /* Skip all '_' after digit: 2006-6-30 */
     ni = 0;
-    VALUE bstr = rb_bytestring_new();
-    rb_bytestring_resize(bstr, strlen(szVal)+1);
-    psz = (char *)rb_bytestring_byte_pointer(bstr);
+    psz = (char *)xmalloc(strlen(szVal)+1);
     i   = 0;
     ipn = 0;
     while((psz[i]=szVal[ipn])!=0) {

Modified: MacRuby/trunk/ext/digest/bubblebabble/bubblebabble.c
===================================================================
--- MacRuby/trunk/ext/digest/bubblebabble/bubblebabble.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/digest/bubblebabble/bubblebabble.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -40,7 +40,7 @@
     }
 
     const size_t p_len = (digest_len | 1) * 3 + 2;
-    p = (UInt8 *)alloca(p_len + 1);
+    p = (UInt8 *)malloc(p_len + 1);
 
     i = j = 0;
     p[j++] = 'x';
@@ -74,7 +74,9 @@
 
     p[j] = 'x';
 
-    return rb_bytestring_new_with_data(p, p_len);
+    VALUE bstr = rb_bstr_new_with_data(p, p_len);
+    free(p);
+    return bstr;
 }
 
 /*

Modified: MacRuby/trunk/ext/digest/digest.c
===================================================================
--- MacRuby/trunk/ext/digest/digest.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/digest/digest.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -52,7 +52,7 @@
     }
 
     const size_t p_len = digest_len * 2;
-    p = (UInt8 *)alloca(p_len + 1);
+    p = (UInt8 *)malloc(p_len + 1);
 
     for (i = 0; i < digest_len; i++) {
         unsigned char byte = digest[i];
@@ -61,7 +61,9 @@
         p[i + i + 1] = hex[byte & 0x0f];
     }
 
-    return rb_bytestring_new_with_data(p, p_len);
+    VALUE bstr = rb_bstr_new_with_data(p, p_len);
+    free(p);
+    return bstr;
 }
 
 /*
@@ -539,8 +541,10 @@
 
     Data_Get_Struct(self, void, pctx);
 
-    str = rb_str_new(0, algo->digest_len);
-    algo->finish_func(pctx, (unsigned char *)RSTRING_PTR(str));
+    str = rb_bstr_new();
+    rb_bstr_resize(str, algo->digest_len);
+    rb_bstr_set_length(str, algo->digest_len);
+    algo->finish_func(pctx, rb_bstr_bytes(str));
 
     /* avoid potential coredump caused by use of a finished context */
     algo->init_func(pctx);

Modified: MacRuby/trunk/ext/json/rubyext.c
===================================================================
--- MacRuby/trunk/ext/json/rubyext.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/json/rubyext.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -212,15 +212,13 @@
     const unsigned char* buffer;
     unsigned int len;
     rb_json_generator_t* gen = RJSONGenerator(self);
-    
+
     json_encode_part(gen, obj);
     yajl_gen_get_buf(gen->generator, &buffer, &len);
-    
-    VALUE resultBuf = (VALUE)CFStringCreateWithBytes(NULL, (const UInt8*)buffer, len, kCFStringEncodingUTF8, false);
-    CFMakeCollectable((CFTypeRef)resultBuf);
+
+    VALUE res = rb_str_new((const char *)buffer, len);
     yajl_gen_clear(gen->generator);
-    
-    return resultBuf;
+    return res;
 }
 
 static void
@@ -320,8 +318,7 @@
 static int
 yajl_handle_string(void* ctx, const unsigned char* value, unsigned int len)
 {
-    VALUE str = (VALUE)CFStringCreateWithBytes(NULL, (const UInt8*)value, len, kCFStringEncodingUTF8, false);
-    CFMakeCollectable((CFTypeRef)str);
+    VALUE str = rb_str_new((const char *)value, len);
     yajl_set_static_value(ctx, str);
     return 1;
 }
@@ -330,12 +327,11 @@
 yajl_handle_hash_key(void* ctx, const unsigned char* value, unsigned int len)
 {
     rb_json_parser_t* parser = RJSONParser(ctx);
-    
-    VALUE keyStr = (VALUE)CFStringCreateWithBytes(NULL, (const UInt8*)value, len, kCFStringEncodingUTF8, false);
-    CFMakeCollectable((CFTypeRef)keyStr);
-    
+
+    VALUE keyStr = rb_str_new((const char *)value, len); 
+
     if (parser->symbolizeKeys) {
-        ID key = rb_intern(RSTRING_PTR(keyStr));
+        ID key = rb_intern_str(keyStr);
         yajl_set_static_value(ctx, ID2SYM(key));
     }
     else {
@@ -521,15 +517,11 @@
 static VALUE
 rb_object_to_json(VALUE self, SEL sel, int argc, VALUE* argv)
 {
-    VALUE buf, str;
-    
-    str = rb_vm_call_with_cache(to_s_cache, self, sel_to_s, 0, 0);
-    
-    buf = (VALUE)CFStringCreateMutable(NULL, 0);
-    CFMakeCollectable((CFTypeRef)buf);
-    CFStringAppendCString((CFMutableStringRef)buf, "\"", kCFStringEncodingUTF8);
-    CFStringAppend((CFMutableStringRef)buf, (CFStringRef)str);
-    CFStringAppendCString((CFMutableStringRef)buf, "\"", kCFStringEncodingUTF8);
+    VALUE str = rb_vm_call_with_cache(to_s_cache, self, sel_to_s, 0, 0);
+
+    VALUE buf = rb_str_new2("\"");
+    rb_str_concat(buf, str);
+    rb_str_cat2(buf, "\"");
     return buf;
 }
 

Modified: MacRuby/trunk/ext/libyaml/rubyext.c
===================================================================
--- MacRuby/trunk/ext/libyaml/rubyext.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/libyaml/rubyext.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -157,12 +157,7 @@
 
     if (!NIL_P(input)) {
 	assert(parser != NULL);
-	if (CLASS_OF(input) == rb_cByteString) {
-	    yaml_parser_set_input_string(parser,
-		    (const unsigned char *)rb_bytestring_byte_pointer(input),
-		    rb_bytestring_length(input));
-	}
-	else if (TYPE(input) == T_STRING) {
+	if (TYPE(input) == T_STRING) {
 	    const char * instring = RSTRING_PTR(input);
 	    yaml_parser_set_input_string(parser,
 		    (const unsigned char *)(instring),
@@ -524,10 +519,7 @@
 	tag = "tag:yaml.org,2002:str";
     }
     VALUE handler = handler_for_tag(parser, (yaml_char_t *)tag);
-    VALUE scalarval = (VALUE)CFStringCreateWithBytes(NULL, (const UInt8 *)val,
-	    parser->event.data.scalar.length,
-	    kCFStringEncodingUTF8, true);
-    CFMakeCollectable((CFTypeRef)scalarval);
+    VALUE scalarval = rb_str_new(val, parser->event.data.scalar.length);
     return interpret_value(parser, scalarval, handler);
 }
 
@@ -635,9 +627,7 @@
 {
     VALUE argv[2];
 
-    argv[0] = (VALUE)CFStringCreateWithBytes(NULL, (const UInt8 *)tag,
-					     strlen(tag),
-					     kCFStringEncodingUTF8, true);
+    argv[0] = rb_str_new2(tag);
     argv[1] = value;
 
     return rb_class_new_instance(2, argv, rb_cYamlNode);
@@ -655,9 +645,7 @@
     if (tag == NULL) {
 	tag = "str";
     }
-    VALUE scalarval = (VALUE)CFStringCreateWithBytes(NULL, (const UInt8 *)val,
-	    parser->event.data.scalar.length,
-	    kCFStringEncodingUTF8, true);
+    VALUE scalarval = rb_str_new(val, parser->event.data.scalar.length);
     return make_yaml_node(tag, scalarval);
 }
 
@@ -848,10 +836,9 @@
 }
 
 static int
-rb_yaml_bytestring_output_handler(void *bs, unsigned char *buffer, size_t size)
+rb_yaml_str_output_handler(void *str, unsigned char *buffer, size_t size)
 {
-    CFMutableDataRef data = rb_bytestring_wrapped_data((VALUE)bs);
-    CFDataAppendBytes(data, (const UInt8*)buffer, (CFIndex)size);
+    rb_str_cat((VALUE)str, (char *)buffer, size);
     return 1;
 }
 
@@ -870,18 +857,21 @@
     GC_WB(&remitter->output, output);
     yaml_emitter_t *emitter = &remitter->emitter;
     if (!NIL_P(output)) {
-	if (CLASS_OF(output) == rb_cByteString) {
-	    yaml_emitter_set_output(emitter, rb_yaml_bytestring_output_handler,
-		    (void *)output);
-	}
-	else if (TYPE(output) == T_FILE) {
-	    yaml_emitter_set_output(emitter, rb_yaml_io_output_handler,
-		    (void *)output);
-	}
-	else {
-	    rb_raise(rb_eArgError, "unsupported YAML output type %s",
-		    rb_obj_classname(output));
-	}
+	switch (TYPE(output)) {
+	    case T_FILE:
+		yaml_emitter_set_output(emitter, rb_yaml_io_output_handler,
+			(void *)output);
+		break;
+
+	    case T_STRING:
+		yaml_emitter_set_output(emitter, rb_yaml_str_output_handler,
+			(void *)output);
+		break;
+
+	    default:
+		rb_raise(rb_eArgError, "unsupported YAML output type %s",
+			rb_obj_classname(output));
+	}	
     }
     return output;
 }
@@ -892,7 +882,7 @@
     VALUE output = Qnil;
     rb_scan_args(argc, argv, "01", &output);
     if (NIL_P(output)) {
-	output = rb_bytestring_new();
+	output = rb_str_new(NULL, 0);
     }
     rb_yaml_emitter_set_output(self, 0, output);
     return self;
@@ -991,8 +981,7 @@
     yaml_event_t ev;
     yaml_emitter_t *emitter = &RYAMLEmitter(self)->emitter;
     yaml_char_t *output = (yaml_char_t *)RSTRING_PTR(val);
-    const size_t length = *(VALUE *)val == rb_cByteString
-	? RSTRING_LEN(val) : strlen((const char *)output);
+    const size_t length = RSTRING_LEN(val);
 
     int can_omit_tag = 0;
     int string_tag   = 0;

Modified: MacRuby/trunk/ext/openssl/ossl.c
===================================================================
--- MacRuby/trunk/ext/openssl/ossl.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/openssl/ossl.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -121,7 +121,7 @@
 VALUE
 ossl_buf2str(char *buf, int len)
 {
-    return rb_bytestring_new_with_data((UInt8 *)buf, len);
+    return rb_bstr_new_with_data((UInt8 *)buf, len);
 }
 
 /*

Modified: MacRuby/trunk/ext/openssl/ossl_bio.c
===================================================================
--- MacRuby/trunk/ext/openssl/ossl_bio.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/openssl/ossl_bio.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -39,11 +39,8 @@
     }
     else {
 	StringValue(obj);
-	if (CLASS_OF(obj) != rb_cByteString) {
-	    rb_raise(rb_eArgError, "expected ByteString object");
-	}
-	bio = BIO_new_mem_buf(rb_bytestring_byte_pointer(obj),
-		rb_bytestring_length(obj));
+	obj = rb_str_bstr(obj);
+	bio = BIO_new_mem_buf(rb_bstr_bytes(obj), rb_bstr_length(obj));
 	if (!bio) ossl_raise(eOSSLError, NULL);
     }
 

Modified: MacRuby/trunk/ext/openssl/ossl_digest.c
===================================================================
--- MacRuby/trunk/ext/openssl/ossl_digest.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/openssl/ossl_digest.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -175,17 +175,15 @@
     GetDigest(self, ctx);
 
     if (NIL_P(str)) {
-        str = rb_bytestring_new();
+        str = rb_bstr_new();
     }
     else {
         StringValue(str);
-	if (CLASS_OF(str) != rb_cByteString) {
-	    rb_raise(rb_eArgError, "expected ByteString object");
-	}
+	str = rb_str_bstr(str);
     }
-    rb_bytestring_resize(str, EVP_MD_CTX_size(ctx));
+    rb_bstr_resize(str, EVP_MD_CTX_size(ctx));
 
-    EVP_DigestFinal_ex(ctx, rb_bytestring_byte_pointer(str), NULL);
+    EVP_DigestFinal_ex(ctx, rb_bstr_bytes(str), NULL);
 
     return str;
 }

Modified: MacRuby/trunk/ext/openssl/ossl_pkey_ec.c
===================================================================
--- MacRuby/trunk/ext/openssl/ossl_pkey_ec.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/openssl/ossl_pkey_ec.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -639,15 +639,15 @@
 
 /* BUG: need a way to figure out the maximum string size */
     buf_len = 1024;
-    str = rb_bytestring_new();
-    rb_bytestring_resize(str, buf_len);
+    str = rb_bstr_new();
+    rb_bstr_resize(str, buf_len);
 /* BUG: take KDF as a block */
-    buf_len = ECDH_compute_key(rb_bytestring_byte_pointer(str), buf_len,
+    buf_len = ECDH_compute_key(rb_bstr_bytes(str), buf_len,
 	    point, ec, NULL);
     if (buf_len < 0)
          ossl_raise(eECError, "ECDH_compute_key");
 
-    rb_bytestring_resize(str, buf_len);
+    rb_bstr_resize(str, buf_len);
 
     return str;
 }

Modified: MacRuby/trunk/ext/openssl/ossl_ssl.c
===================================================================
--- MacRuby/trunk/ext/openssl/ossl_ssl.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/openssl/ossl_ssl.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1167,16 +1167,14 @@
     rb_scan_args(argc, argv, "11", &len, &str);
     ilen = NUM2INT(len);
     if(NIL_P(str)) {
-	str = rb_bytestring_new();
+	str = rb_bstr_new();
     }
     else{
         StringValue(str);
-        rb_str_modify(str);
-	if (CLASS_OF(str) != rb_cByteString) {
-	    rb_raise(rb_eArgError, "expected ByteString object");
-	}
+	rb_str_modify(str);
+	str = rb_str_bstr(str);
     }
-    rb_bytestring_resize(str, ilen);
+    rb_bstr_resize(str, ilen);
     if(ilen == 0) return str;
 
     Data_Get_Struct(self, SSL, ssl);
@@ -1185,8 +1183,8 @@
 	if(!nonblock && SSL_pending(ssl) <= 0)
 	    rb_thread_wait_fd(FPTR_TO_FD(fptr));
 	for (;;){
-	    nread = SSL_read(ssl, rb_bytestring_byte_pointer(str),
-		    rb_bytestring_length(str));
+	    nread = SSL_read(ssl, rb_bstr_bytes(str),
+		    rb_bstr_length(str));
 	    switch(ssl_get_error(ssl, nread)){
 	    case SSL_ERROR_NONE:
 		goto end;

Modified: MacRuby/trunk/ext/ripper/extconf.rb
===================================================================
--- MacRuby/trunk/ext/ripper/extconf.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/ripper/extconf.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -16,6 +16,7 @@
   $defs << '-DRIPPER_DEBUG' if $debug
   $VPATH << '$(topdir)' << '$(top_srcdir)'
   $INCFLAGS << ' -I$(topdir) -I$(top_srcdir) -I$(top_srcdir)/onig'
+  $CFLAGS << ' -std=c99'
   create_makefile 'ripper'
 end
 

Modified: MacRuby/trunk/ext/socket/socket.c
===================================================================
--- MacRuby/trunk/ext/socket/socket.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/socket/socket.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -597,17 +597,17 @@
     }
     fd = fptr->fd;
 
-    str = rb_bytestring_new();
-    rb_bytestring_resize(str, buflen);
+    str = rb_bstr_new();
+    rb_bstr_resize(str, buflen);
 
   retry:
     rb_thread_wait_fd(fd);
     rb_io_check_closed(fptr);
-    if (rb_bytestring_length(str) != buflen) {
+    if (rb_bstr_length(str) != buflen) {
 	rb_raise(rb_eRuntimeError, "buffer string modified");
     }
     TRAP_BEG;
-    slen = recvfrom(fd, rb_bytestring_byte_pointer(str), buflen, flags,
+    slen = recvfrom(fd, rb_bstr_bytes(str), buflen, flags,
 	    (struct sockaddr *)buf, &alen);
     TRAP_END;
 
@@ -617,8 +617,8 @@
 	}
 	rb_sys_fail("recvfrom(2)");
     }
-    if (slen < rb_bytestring_length(str)) {
-	rb_bytestring_resize(str, slen);
+    if (slen < rb_bstr_length(str)) {
+	rb_bstr_resize(str, slen);
     }
     rb_obj_taint(str);
     switch (from) {
@@ -677,19 +677,19 @@
     }
     fd = fptr->fd;
 
-    str = rb_bytestring_new();
-    rb_bytestring_resize(str, buflen);
+    str = rb_bstr_new();
+    rb_bstr_resize(str, buflen);
 
     rb_io_check_closed(fptr);
     rb_io_set_nonblock(fptr);
-    slen = recvfrom(fd, rb_bytestring_byte_pointer(str), buflen, flags,
+    slen = recvfrom(fd, rb_bstr_bytes(str), buflen, flags,
 	    (struct sockaddr *)buf, &alen);
 
     if (slen < 0) {
 	rb_sys_fail("recvfrom(2)");
     }
-    if (slen < rb_bytestring_length(str)) {
-	rb_bytestring_resize(str, slen);
+    if (slen < rb_bstr_length(str)) {
+	rb_bstr_resize(str, slen);
     }
     rb_obj_taint(str);
     switch (from) {

Modified: MacRuby/trunk/ext/zlib/zlib.c
===================================================================
--- MacRuby/trunk/ext/zlib/zlib.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ext/zlib/zlib.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -187,9 +187,9 @@
 
 void Init_zlib _((void));
 
-#define BSTRING_LEN(s) rb_bytestring_length(s)
-#define BSTRING_PTR(s) (rb_bytestring_byte_pointer(s))
-#define BSTRING_PTR_BYTEF(s) ((Bytef*)rb_bytestring_byte_pointer(s))
+#define BSTRING_LEN(s) rb_bstr_length(s)
+#define BSTRING_PTR(s) (rb_bstr_bytes(s))
+#define BSTRING_PTR_BYTEF(s) ((Bytef*)rb_bstr_bytes(s))
 
 
 /*--------- Exceptions --------*/
@@ -403,8 +403,8 @@
     long inc;
 
     if (NIL_P(z->buf)) {
-        GC_WB(&z->buf, rb_bytestring_new());
-        rb_bytestring_resize(z->buf, ZSTREAM_INITIAL_BUFSIZE);
+        GC_WB(&z->buf, rb_bstr_new());
+        rb_bstr_resize(z->buf, ZSTREAM_INITIAL_BUFSIZE);
         z->buf_filled = 0;
         z->stream.next_out = BSTRING_PTR_BYTEF(z->buf);
         z->stream.avail_out = ZSTREAM_INITIAL_BUFSIZE;
@@ -419,7 +419,7 @@
         if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) {
             inc = ZSTREAM_AVAIL_OUT_STEP_MIN;
         }
-        rb_bytestring_resize(z->buf, z->buf_filled + inc);
+        rb_bstr_resize(z->buf, z->buf_filled + inc);
         z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ?
             inc : ZSTREAM_AVAIL_OUT_STEP_MAX;
     }
@@ -430,13 +430,13 @@
 zstream_expand_buffer_into(struct zstream *z, int size)
 {
     if (NIL_P(z->buf)) {
-        GC_WB(&z->buf, rb_bytestring_new());
+        GC_WB(&z->buf, rb_bstr_new());
         z->buf_filled = 0;
         z->stream.next_out = BSTRING_PTR_BYTEF(z->buf);
         z->stream.avail_out = size;
     }
     else if (z->stream.avail_out != size) {
-        rb_bytestring_resize(z->buf, z->buf_filled + size);
+        rb_bstr_resize(z->buf, z->buf_filled + size);
         z->stream.next_out = BSTRING_PTR_BYTEF(z->buf) + z->buf_filled;
         z->stream.avail_out = size;
     }
@@ -446,30 +446,29 @@
 zstream_append_buffer(struct zstream *z, const Bytef *src, int len)
 {
     if (NIL_P(z->buf)) {
-	GC_WB(&z->buf, rb_bytestring_new_with_data((UInt8*)src, len));
+	GC_WB(&z->buf, rb_bstr_new_with_data((UInt8*)src, len));
 	z->buf_filled = len;
 	z->stream.next_out = BSTRING_PTR_BYTEF(z->buf);
 	z->stream.avail_out = 0;
 	return;
     }
     
-    CFMutableDataRef data = rb_bytestring_wrapped_data(z->buf);
-    if (CFDataGetLength(data) < (z->buf_filled + len)) {
-	CFDataSetLength(data, z->buf_filled + len);
+    if (rb_bstr_length(z->buf) < (z->buf_filled + len)) {
+	rb_bstr_resize(z->buf, z->buf_filled + len);
 	z->stream.avail_out = 0;
     } else if (z->stream.avail_out >= len) {
         z->stream.avail_out -= len;
     } else {
         z->stream.avail_out = 0;
     }
-    
-    CFDataAppendBytes(data, (const UInt8*)src, len);
+
+    rb_bstr_concat(z->buf, (const UInt8 *)src, len);
     z->buf_filled += len;
     z->stream.next_out = BSTRING_PTR_BYTEF(z->buf) + z->buf_filled;
 }
 
 #define zstream_append_buffer2(z,v) \
-    zstream_append_buffer((z),(Bytef*)rb_bytestring_byte_pointer(v),rb_bytestring_length(v))
+    zstream_append_buffer((z),(Bytef*)rb_bstr_bytes(v),rb_bstr_length(v))
 
 static VALUE
 zstream_detach_buffer(struct zstream *z)
@@ -477,11 +476,11 @@
     VALUE dst;
 
     if (NIL_P(z->buf)) {
-        dst = rb_bytestring_new();
+        dst = rb_bstr_new();
     }
     else {
         dst = z->buf;
-        rb_bytestring_resize(dst, z->buf_filled);
+        rb_bstr_resize(dst, z->buf_filled);
     }
 
     z->buf = Qnil;
@@ -500,7 +499,7 @@
         return zstream_detach_buffer(z);
     }
 
-    rb_bytestring_resize(z->buf, len);
+    rb_bstr_resize(z->buf, len);
     dst = z->buf;
     z->buf_filled -= len;
     UInt8 *buf = BSTRING_PTR(z->buf);
@@ -536,17 +535,15 @@
     if (len <= 0) return;
 
     if (NIL_P(z->input)) {
-	GC_WB(&z->input, rb_bytestring_new_with_data((UInt8*)src, len));
+	GC_WB(&z->input, rb_bstr_new_with_data((UInt8*)src, len));
     } else {
-	rb_bytestring_append_bytes(z->input, (const UInt8*)src, len);
+	rb_bstr_concat(z->input, (const UInt8*)src, len);
     }
 }
 
 #define zstream_append_input2(z,v) \
     do { \
-	if (*(VALUE *)v != rb_cByteString) { \
-	    v = rb_coerce_to_bytestring(v); \
-	} \
+	v = rb_str_bstr(v); \
 	zstream_append_input((z), BSTRING_PTR_BYTEF(v), BSTRING_LEN(v)); \
     } \
     while(0)
@@ -555,11 +552,11 @@
 zstream_discard_input(struct zstream *z, unsigned int len)
 {
     if (NIL_P(z->input) || BSTRING_LEN(z->input) <= len) {
-	    z->input = Qnil;
+	z->input = Qnil;
     } else {
-        UInt8 *buf = BSTRING_PTR(z->input);
-        memmove(buf, buf+len, BSTRING_LEN(z->input) - len);
-	    rb_bytestring_resize(z->input, BSTRING_LEN(z->input) - len);
+	UInt8 *buf = BSTRING_PTR(z->input);
+	memmove(buf, buf+len, BSTRING_LEN(z->input) - len);
+	rb_bstr_resize(z->input, BSTRING_LEN(z->input) - len);
     }
 }
 
@@ -584,9 +581,9 @@
     VALUE dst;
 
     if (NIL_P(z->input)) {
-        dst = rb_bytestring_new();
+        dst = rb_bstr_new();
     } else {
-	    dst = z->input;
+	dst = z->input;
     }
     z->input = Qnil;
     return dst;
@@ -1113,11 +1110,8 @@
     rb_scan_args(argc, argv, "11", &src, &level);
 
     lev = ARG_LEVEL(level);
-    CFShow((CFStringRef)src);
     StringValue(src);
-    if (CLASS_OF(src) != rb_cByteString) {
-        src = rb_coerce_to_bytestring(src);
-    }
+    src = rb_str_bstr(src);
     zstream_init_deflate(z);
     err = deflateInit(&z->stream, lev);
     if (err != Z_OK) {
@@ -1141,9 +1135,7 @@
 	return;
     }
     StringValue(src);
-    if (CLASS_OF(src) != rb_cByteString) {
-        src = rb_coerce_to_bytestring(src);
-    }
+    src = rb_str_bstr(src);
     if (flush != Z_NO_FLUSH || BSTRING_LEN(src) > 0) { /* prevent BUF_ERROR */
 	zstream_run(z, BSTRING_PTR_BYTEF(src), BSTRING_LEN(src), flush);
     }
@@ -1270,9 +1262,7 @@
 
     OBJ_INFECT(obj, dic);
     StringValue(src);
-    if (CLASS_OF(src) != rb_cByteString) {
-        src = rb_coerce_to_bytestring(src);
-    }
+    src = rb_str_bstr(src);
     err = deflateSetDictionary(&z->stream,
 			       BSTRING_PTR(src), BSTRING_LEN(src));
     if (err != Z_OK) {
@@ -1421,11 +1411,9 @@
 	}
 	else {
 	    StringValue(src);
-	    if (CLASS_OF(src) != rb_cByteString) {
-            src = rb_coerce_to_bytestring(src);
-	    }
+	    src = rb_str_bstr(src);
 	    zstream_append_buffer2(z, src);
-	    dst = rb_bytestring_new(0, 0);
+	    dst = rb_bstr_new();
 	}
     }
     else {
@@ -2000,7 +1988,7 @@
     if (len < 0)
         rb_raise(rb_eArgError, "negative length %d given", len);
     if (len == 0)
-	return rb_bytestring_new(0, 0);
+	return rb_bstr_new();
     while (!ZSTREAM_IS_FINISHED(&gz->z) && gz->z.buf_filled < len) {
 	gzfile_read_more(gz);
     }
@@ -2031,9 +2019,9 @@
 
     if (len == 0) {
         if (NIL_P(outbuf))
-            return rb_bytestring_new(0, 0);
+            return rb_bstr_new();
         else {
-            rb_bytestring_resize(outbuf, 0);
+            rb_bstr_resize(outbuf, 0);
             return outbuf;
         }
     }
@@ -2057,7 +2045,7 @@
         return dst;
     }
     else {
-        rb_bytestring_resize(outbuf, BSTRING_LEN(dst));
+        rb_bstr_resize(outbuf, BSTRING_LEN(dst));
         UInt8 *buf = BSTRING_PTR(outbuf);
         memcpy(buf, BSTRING_PTR(dst), BSTRING_LEN(dst));
         return outbuf;
@@ -2076,7 +2064,7 @@
 	if (!(gz->z.flags & GZFILE_FLAG_FOOTER_FINISHED)) {
 	    gzfile_check_footer(gz);
 	}
-	return rb_bytestring_new(0, 0);
+	return rb_bstr_new();
     }
 
     dst = zstream_detach_buffer(&gz->z);
@@ -2623,12 +2611,8 @@
 {
     struct gzfile *gz = get_gzfile(obj);
 
-    if (TYPE(str) != T_STRING) {
-	str = rb_obj_as_string(str);
-    }
-    if (CLASS_OF(str) != rb_cByteString) {
-        str = rb_coerce_to_bytestring(str);
-    }
+    StringValue(str);
+    str = rb_str_bstr(str);
     gzfile_write(gz, BSTRING_PTR_BYTEF(str), BSTRING_LEN(str));
     return INT2FIX(BSTRING_LEN(str));
 }

Modified: MacRuby/trunk/file.c
===================================================================
--- MacRuby/trunk/file.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/file.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -19,6 +19,7 @@
 #include "dln.h"
 #include "objc.h"
 #include "vm.h"
+#include "encoding.h"
 
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
@@ -113,26 +114,11 @@
 	tmp = obj;
     }
   exit:
-    if (CLASS_OF(tmp) == rb_cByteString) {
-	const long len = rb_bytestring_length(tmp);
-	char *buf = (char *)alloca(len + 1);
-	memcpy(buf, (const char *)rb_bytestring_byte_pointer(tmp), len); 
-	buf[len] = '\0';
-	CFStringRef str = CFStringCreateWithFileSystemRepresentation(NULL,
-		buf);
-	if (str == NULL) {
-	    rb_raise(rb_eRuntimeError,
-		    "can't convert given ByteString to path");
-	}
-	return (VALUE)CFMakeCollectable(str);
+    StringValueCStr(tmp);
+    if (check && obj != tmp) {
+	rb_check_safe_obj(tmp);
     }
-    else {
-	StringValueCStr(tmp);
-	if (check && obj != tmp) {
-	    rb_check_safe_obj(tmp);
-	}
-	return rb_str_new4(tmp);
-    }
+    return rb_str_new4(tmp);
 }
 
 VALUE
@@ -183,7 +169,7 @@
 rb_file_path(VALUE obj, SEL sel)
 {
     rb_io_t *io = ExtractIOStruct(obj);
-    return io->path == NULL ? Qnil : (VALUE)io->path;
+    return io->path == 0 ? Qnil : io->path;
 }
 
 static VALUE
@@ -840,7 +826,7 @@
 
     rb_secure(2);
     GetOpenFile(obj, fptr);
-    if (fptr->path == NULL) {
+    if (fptr->path == 0) {
 	return Qnil;
     }
     if (lstat(RSTRING_PTR(fptr->path), &st) == -1) {
@@ -2582,7 +2568,8 @@
 rb_file_s_split(VALUE klass, SEL sel, VALUE path)
 {
     FilePathStringValue(path);		/* get rid of converting twice */
-    return rb_assoc_new(rb_file_s_dirname(Qnil, 0, path), rb_file_s_basename(0,0,1,&path));
+    return rb_assoc_new(rb_file_s_dirname(Qnil, 0, path),
+	    rb_file_s_basename(0,0,1,&path));
 }
 
 static VALUE separator;
@@ -2590,51 +2577,42 @@
 static VALUE
 rb_file_join(VALUE ary, VALUE sep)
 {
-    CFMutableStringRef res = CFStringCreateMutable(NULL, 0);
-    CFStringRef sep_cf = (CFStringRef)sep;
+    assert(rb_str_chars_len(sep) == 1);
+    UChar sep_char = rb_str_get_uchar(sep, 0);
+    VALUE res = rb_str_new(NULL, 0);
 
-    const long count = RARRAY_LEN(ary);
-    if (count > 0) {
-	long i;
-	for (i = 0; i < count; i++) {
-	    VALUE tmp = RARRAY_AT(ary, i);
-	    switch (TYPE(tmp)) {
-		case T_STRING:
-		    if (*(VALUE *)tmp == rb_cByteString) {
-			tmp = (VALUE)rb_bytestring_resolve_cfstring(tmp);
-		    }
-		    break;
+    for (long i = 0, count = RARRAY_LEN(ary); i < count; i++) {
+	VALUE tmp = RARRAY_AT(ary, i);
+	switch (TYPE(tmp)) {
+	    case T_STRING:
+		break;
 
-		case T_ARRAY:
-		    tmp = rb_file_join(tmp, sep);
-		    break;
+	    case T_ARRAY:
+		tmp = rb_file_join(tmp, sep);
+		break;
 
-		default:
-		    FilePathStringValue(tmp);
-	    }
+	    default:
+		FilePathStringValue(tmp);
+	}
 
-	    CFStringRef tmp_cf = (CFStringRef)tmp;
+	if (i > 0 && !NIL_P(sep)) {
+	    const long res_len = rb_str_chars_len(res);
+	    const long tmp_len = rb_str_chars_len(tmp);
 
-	    if (i > 0) {
-		if (CFStringHasSuffix(res, sep_cf)) {
-		    if (CFStringHasPrefix(tmp_cf, sep_cf)) {
-			// Remove trailing slash from res if tmp starts with a
-			// slash.
-			CFStringDelete(res,
-				CFRangeMake(CFStringGetLength(res) - 1, 1));
-		    }
+	    if (res_len > 0
+		    && rb_str_get_uchar(res, res_len - 1) == sep_char) {
+		if (tmp_len > 0 && rb_str_get_uchar(tmp, 0) == sep_char) {
+		    rb_str_delete(res, res_len - 1, 1);
 		}
-		else if (!CFStringHasPrefix(tmp_cf, sep_cf)) {
-		    CFStringAppend(res, sep_cf);
-		}
 	    }
-
-	    CFStringAppend(res, tmp_cf);
+	    else if (tmp_len == 0
+		    || rb_str_get_uchar(tmp, 0) != sep_char) {
+		rb_str_concat(res, sep);
+	    } 
 	}
+	rb_str_concat(res, tmp);
     }
-
-    CFMakeCollectable(res);
-    return (VALUE)res;
+    return res;
 }
 
 /*

Modified: MacRuby/trunk/gc.c
===================================================================
--- MacRuby/trunk/gc.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/gc.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -21,7 +21,6 @@
 #include "ruby/signal.h"
 #include "ruby/st.h"
 #include "ruby/node.h"
-#include "ruby/re.h"
 #include "ruby/io.h"
 #include "ruby/util.h"
 #include "objc.h"

Modified: MacRuby/trunk/hash.c
===================================================================
--- MacRuby/trunk/hash.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/hash.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -100,10 +100,11 @@
 		return a != b;
 	    }
 	    break;
+
+	case T_STRING:
+	    return rb_str_cmp(a, b);
     }
 
-    // XXX optimize for string
-
     return !rb_eql(a, b);
 }
 
@@ -120,7 +121,7 @@
 	    return (int)a;
 
 	case T_STRING:
-	    return CFHash((CFTypeRef)a);
+	    return (int)rb_str_hash(a);
     }
 
     return (int)FIX2LONG(rb_hash(a));

Deleted: MacRuby/trunk/icu-1060/unicode/basictz.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/basictz.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/basictz.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,210 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 2007-2008, International Business Machines Corporation and         *
-* others. All Rights Reserved.                                                *
-*******************************************************************************
-*/
-#ifndef BASICTZ_H
-#define BASICTZ_H
-
-/**
- * \file 
- * \brief C++ API: ICU TimeZone base class
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/timezone.h"
-#include "unicode/tzrule.h"
-#include "unicode/tztrans.h"
-
-U_NAMESPACE_BEGIN
-
-// forward declarations
-class UVector;
-
-/**
- * <code>BasicTimeZone</code> is an abstract class extending <code>TimeZone</code>.
- * This class provides some additional methods to access time zone transitions and rules.
- * All ICU <code>TimeZone</code> concrete subclasses extend this class.
- * @stable ICU 4.0
- */
-class U_I18N_API BasicTimeZone: public TimeZone {
-public:
-    /**
-     * Destructor.
-     * @stable ICU 4.0
-     */
-    virtual ~BasicTimeZone();
-
-    /**
-     * Gets the first time zone transition after the base time.
-     * @param base      The base time.
-     * @param inclusive Whether the base time is inclusive or not.
-     * @param result    Receives the first transition after the base time.
-     * @return  TRUE if the transition is found.
-     * @stable ICU 4.0
-     */
-    virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/ = 0;
-
-    /**
-     * Gets the most recent time zone transition before the base time.
-     * @param base      The base time.
-     * @param inclusive Whether the base time is inclusive or not.
-     * @param result    Receives the most recent transition before the base time.
-     * @return  TRUE if the transition is found.
-     * @stable ICU 4.0
-     */
-    virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/ = 0;
-
-    /**
-     * Checks if the time zone has equivalent transitions in the time range.
-     * This method returns true when all of transition times, from/to standard
-     * offsets and DST savings used by this time zone match the other in the
-     * time range.
-     * @param tz    The <code>BasicTimeZone</code> object to be compared with.
-     * @param start The start time of the evaluated time range (inclusive)
-     * @param end   The end time of the evaluated time range (inclusive)
-     * @param ignoreDstAmount
-     *              When true, any transitions with only daylight saving amount
-     *              changes will be ignored, except either of them is zero.
-     *              For example, a transition from rawoffset 3:00/dstsavings 1:00
-     *              to rawoffset 2:00/dstsavings 2:00 is excluded from the comparison,
-     *              but a transtion from rawoffset 2:00/dstsavings 1:00 to
-     *              rawoffset 3:00/dstsavings 0:00 is included.
-     * @param ec    Output param to filled in with a success or an error.
-     * @return      true if the other time zone has the equivalent transitions in the
-     *              time range.
-     * @stable ICU 4.0
-     */
-    virtual UBool hasEquivalentTransitions(/*const*/ BasicTimeZone& tz, UDate start, UDate end,
-        UBool ignoreDstAmount, UErrorCode& ec) /*const*/;
-
-    /**
-     * Returns the number of <code>TimeZoneRule</code>s which represents time transitions,
-     * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except
-     * <code>InitialTimeZoneRule</code>.  The return value range is 0 or any positive value.
-     * @param status    Receives error status code.
-     * @return The number of <code>TimeZoneRule</code>s representing time transitions.
-     * @stable ICU 4.0
-     */
-    virtual int32_t countTransitionRules(UErrorCode& status) /*const*/ = 0;
-
-    /**
-     * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code>
-     * which represent time transitions for this time zone.  On successful return,
-     * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and
-     * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code>
-     * instances up to the size specified by trscount.  The results are referencing the
-     * rule instance held by this time zone instance.  Therefore, after this time zone
-     * is destructed, they are no longer available.
-     * @param initial       Receives the initial timezone rule
-     * @param trsrules      Receives the timezone transition rules
-     * @param trscount      On input, specify the size of the array 'transitions' receiving
-     *                      the timezone transition rules.  On output, actual number of
-     *                      rules filled in the array will be set.
-     * @param status        Receives error status code.
-     * @stable ICU 4.0
-     */
-    virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial,
-        const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) /*const*/ = 0;
-
-    /**
-     * Gets the set of time zone rules valid at the specified time.  Some known external time zone
-     * implementations are not capable to handle historic time zone rule changes.  Also some
-     * implementations can only handle certain type of rule definitions.
-     * If this time zone does not use any daylight saving time within about 1 year from the specified
-     * time, only the <code>InitialTimeZone</code> is returned.  Otherwise, the rule for standard
-     * time and daylight saving time transitions are returned in addition to the
-     * <code>InitialTimeZoneRule</code>.  The standard and daylight saving time transition rules are
-     * represented by <code>AnnualTimeZoneRule</code> with <code>DateTimeRule::DOW</code> for its date
-     * rule and <code>DateTimeRule::WALL_TIME</code> for its time rule.  Because daylight saving time
-     * rule is changing time to time in many time zones and also mapping a transition time rule to
-     * different type is lossy transformation, the set of rules returned by this method may be valid
-     * for short period of time.
-     * The time zone rule objects returned by this method is owned by the caller, so the caller is
-     * responsible for deleting them after use.
-     * @param date      The date used for extracting time zone rules.
-     * @param initial   Receives the <code>InitialTimeZone</code>, always not NULL.
-     * @param std       Receives the <code>AnnualTimeZoneRule</code> for standard time transitions.
-     *                  When this time time zone does not observe daylight saving times around the
-     *                  specified date, NULL is set.
-     * @param dst       Receives the <code>AnnualTimeZoneRule</code> for daylight saving time
-     *                  transitions.  When this time zone does not observer daylight saving times
-     *                  around the specified date, NULL is set.
-     * @param status    Receives error status code.
-     * @stable ICU 4.0
-     */
-    virtual void getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial,
-        AnnualTimeZoneRule*& std, AnnualTimeZoneRule*& dst, UErrorCode& status) /*const*/;
-
-
-    /**
-     * The time type option bit flags used by getOffsetFromLocal
-     * @internal
-     */
-    enum {
-        kStandard = 0x01,
-        kDaylight = 0x03,
-        kFormer = 0x04,
-        kLatter = 0x0C
-    };
-
-    /**
-     * Get time zone offsets from local wall time.
-     * @internal
-     */
-    virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt,
-        int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) /*const*/;
-
-protected:
-
-    /**
-     * The time type option bit masks used by getOffsetFromLocal
-     * @internal
-     */
-    enum {
-        kStdDstMask = kDaylight,
-        kFormerLatterMask = kLatter
-    };
-
-    /**
-     * Default constructor.
-     * @stable ICU 4.0
-     */
-    BasicTimeZone();
-
-    /**
-     * Construct a timezone with a given ID.
-     * @param id a system time zone ID
-     * @stable ICU 4.0
-     */
-    BasicTimeZone(const UnicodeString &id);
-
-    /**
-     * Copy constructor.
-     * @param source the object to be copied.
-     * @stable ICU 4.0
-     */
-    BasicTimeZone(const BasicTimeZone& source);
-
-    /**
-     * Gets the set of TimeZoneRule instances applicable to the specified time and after.
-     * @param start     The start date used for extracting time zone rules
-     * @param initial   Receives the InitialTimeZone, always not NULL
-     * @param transitionRules   Receives the transition rules, could be NULL
-     * @param status    Receives error status code
-     */
-    void getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, UVector*& transitionRules,
-        UErrorCode& status) /*const*/;
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // BASICTZ_H
-
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/basictz.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/basictz.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/basictz.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/basictz.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,210 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2008, International Business Machines Corporation and         *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*/
+#ifndef BASICTZ_H
+#define BASICTZ_H
+
+/**
+ * \file 
+ * \brief C++ API: ICU TimeZone base class
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/timezone.h"
+#include "unicode/tzrule.h"
+#include "unicode/tztrans.h"
+
+U_NAMESPACE_BEGIN
+
+// forward declarations
+class UVector;
+
+/**
+ * <code>BasicTimeZone</code> is an abstract class extending <code>TimeZone</code>.
+ * This class provides some additional methods to access time zone transitions and rules.
+ * All ICU <code>TimeZone</code> concrete subclasses extend this class.
+ * @stable ICU 4.0
+ */
+class U_I18N_API BasicTimeZone: public TimeZone {
+public:
+    /**
+     * Destructor.
+     * @stable ICU 4.0
+     */
+    virtual ~BasicTimeZone();
+
+    /**
+     * Gets the first time zone transition after the base time.
+     * @param base      The base time.
+     * @param inclusive Whether the base time is inclusive or not.
+     * @param result    Receives the first transition after the base time.
+     * @return  TRUE if the transition is found.
+     * @stable ICU 4.0
+     */
+    virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/ = 0;
+
+    /**
+     * Gets the most recent time zone transition before the base time.
+     * @param base      The base time.
+     * @param inclusive Whether the base time is inclusive or not.
+     * @param result    Receives the most recent transition before the base time.
+     * @return  TRUE if the transition is found.
+     * @stable ICU 4.0
+     */
+    virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/ = 0;
+
+    /**
+     * Checks if the time zone has equivalent transitions in the time range.
+     * This method returns true when all of transition times, from/to standard
+     * offsets and DST savings used by this time zone match the other in the
+     * time range.
+     * @param tz    The <code>BasicTimeZone</code> object to be compared with.
+     * @param start The start time of the evaluated time range (inclusive)
+     * @param end   The end time of the evaluated time range (inclusive)
+     * @param ignoreDstAmount
+     *              When true, any transitions with only daylight saving amount
+     *              changes will be ignored, except either of them is zero.
+     *              For example, a transition from rawoffset 3:00/dstsavings 1:00
+     *              to rawoffset 2:00/dstsavings 2:00 is excluded from the comparison,
+     *              but a transtion from rawoffset 2:00/dstsavings 1:00 to
+     *              rawoffset 3:00/dstsavings 0:00 is included.
+     * @param ec    Output param to filled in with a success or an error.
+     * @return      true if the other time zone has the equivalent transitions in the
+     *              time range.
+     * @stable ICU 4.0
+     */
+    virtual UBool hasEquivalentTransitions(/*const*/ BasicTimeZone& tz, UDate start, UDate end,
+        UBool ignoreDstAmount, UErrorCode& ec) /*const*/;
+
+    /**
+     * Returns the number of <code>TimeZoneRule</code>s which represents time transitions,
+     * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except
+     * <code>InitialTimeZoneRule</code>.  The return value range is 0 or any positive value.
+     * @param status    Receives error status code.
+     * @return The number of <code>TimeZoneRule</code>s representing time transitions.
+     * @stable ICU 4.0
+     */
+    virtual int32_t countTransitionRules(UErrorCode& status) /*const*/ = 0;
+
+    /**
+     * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code>
+     * which represent time transitions for this time zone.  On successful return,
+     * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and
+     * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code>
+     * instances up to the size specified by trscount.  The results are referencing the
+     * rule instance held by this time zone instance.  Therefore, after this time zone
+     * is destructed, they are no longer available.
+     * @param initial       Receives the initial timezone rule
+     * @param trsrules      Receives the timezone transition rules
+     * @param trscount      On input, specify the size of the array 'transitions' receiving
+     *                      the timezone transition rules.  On output, actual number of
+     *                      rules filled in the array will be set.
+     * @param status        Receives error status code.
+     * @stable ICU 4.0
+     */
+    virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial,
+        const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) /*const*/ = 0;
+
+    /**
+     * Gets the set of time zone rules valid at the specified time.  Some known external time zone
+     * implementations are not capable to handle historic time zone rule changes.  Also some
+     * implementations can only handle certain type of rule definitions.
+     * If this time zone does not use any daylight saving time within about 1 year from the specified
+     * time, only the <code>InitialTimeZone</code> is returned.  Otherwise, the rule for standard
+     * time and daylight saving time transitions are returned in addition to the
+     * <code>InitialTimeZoneRule</code>.  The standard and daylight saving time transition rules are
+     * represented by <code>AnnualTimeZoneRule</code> with <code>DateTimeRule::DOW</code> for its date
+     * rule and <code>DateTimeRule::WALL_TIME</code> for its time rule.  Because daylight saving time
+     * rule is changing time to time in many time zones and also mapping a transition time rule to
+     * different type is lossy transformation, the set of rules returned by this method may be valid
+     * for short period of time.
+     * The time zone rule objects returned by this method is owned by the caller, so the caller is
+     * responsible for deleting them after use.
+     * @param date      The date used for extracting time zone rules.
+     * @param initial   Receives the <code>InitialTimeZone</code>, always not NULL.
+     * @param std       Receives the <code>AnnualTimeZoneRule</code> for standard time transitions.
+     *                  When this time time zone does not observe daylight saving times around the
+     *                  specified date, NULL is set.
+     * @param dst       Receives the <code>AnnualTimeZoneRule</code> for daylight saving time
+     *                  transitions.  When this time zone does not observer daylight saving times
+     *                  around the specified date, NULL is set.
+     * @param status    Receives error status code.
+     * @stable ICU 4.0
+     */
+    virtual void getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial,
+        AnnualTimeZoneRule*& std, AnnualTimeZoneRule*& dst, UErrorCode& status) /*const*/;
+
+
+    /**
+     * The time type option bit flags used by getOffsetFromLocal
+     * @internal
+     */
+    enum {
+        kStandard = 0x01,
+        kDaylight = 0x03,
+        kFormer = 0x04,
+        kLatter = 0x0C
+    };
+
+    /**
+     * Get time zone offsets from local wall time.
+     * @internal
+     */
+    virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt,
+        int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) /*const*/;
+
+protected:
+
+    /**
+     * The time type option bit masks used by getOffsetFromLocal
+     * @internal
+     */
+    enum {
+        kStdDstMask = kDaylight,
+        kFormerLatterMask = kLatter
+    };
+
+    /**
+     * Default constructor.
+     * @stable ICU 4.0
+     */
+    BasicTimeZone();
+
+    /**
+     * Construct a timezone with a given ID.
+     * @param id a system time zone ID
+     * @stable ICU 4.0
+     */
+    BasicTimeZone(const UnicodeString &id);
+
+    /**
+     * Copy constructor.
+     * @param source the object to be copied.
+     * @stable ICU 4.0
+     */
+    BasicTimeZone(const BasicTimeZone& source);
+
+    /**
+     * Gets the set of TimeZoneRule instances applicable to the specified time and after.
+     * @param start     The start date used for extracting time zone rules
+     * @param initial   Receives the InitialTimeZone, always not NULL
+     * @param transitionRules   Receives the transition rules, could be NULL
+     * @param status    Receives error status code
+     */
+    void getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, UVector*& transitionRules,
+        UErrorCode& status) /*const*/;
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // BASICTZ_H
+
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/brkiter.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/brkiter.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/brkiter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,557 +0,0 @@
-/*
-********************************************************************************
-*   Copyright (C) 1997-2007, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-********************************************************************************
-*
-* File brkiter.h
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   02/18/97    aliu        Added typedef for TextCount.  Made DONE const.
-*   05/07/97    aliu        Fixed DLL declaration.
-*   07/09/97    jfitz       Renamed BreakIterator and interface synced with JDK
-*   08/11/98    helena      Sync-up JDK1.2.
-*   01/13/2000  helena      Added UErrorCode parameter to createXXXInstance methods.
-********************************************************************************
-*/
-
-#ifndef BRKITER_H
-#define BRKITER_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief C++ API: Break Iterator.
- */
-
-#if UCONFIG_NO_BREAK_ITERATION
-
-U_NAMESPACE_BEGIN
-
-/*
- * Allow the declaration of APIs with pointers to BreakIterator
- * even when break iteration is removed from the build.
- */
-class BreakIterator;
-
-U_NAMESPACE_END
-
-#else
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-#include "unicode/chariter.h"
-#include "unicode/locid.h"
-#include "unicode/ubrk.h"
-#include "unicode/strenum.h"
-#include "unicode/utext.h"
-#include "unicode/umisc.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * The BreakIterator class implements methods for finding the location
- * of boundaries in text. BreakIterator is an abstract base class.
- * Instances of BreakIterator maintain a current position and scan over
- * text returning the index of characters where boundaries occur.
- * <p>
- * Line boundary analysis determines where a text string can be broken
- * when line-wrapping. The mechanism correctly handles punctuation and
- * hyphenated words.
- * <p>
- * Sentence boundary analysis allows selection with correct
- * interpretation of periods within numbers and abbreviations, and
- * trailing punctuation marks such as quotation marks and parentheses.
- * <p>
- * Word boundary analysis is used by search and replace functions, as
- * well as within text editing applications that allow the user to
- * select words with a double click. Word selection provides correct
- * interpretation of punctuation marks within and following
- * words. Characters that are not part of a word, such as symbols or
- * punctuation marks, have word-breaks on both sides.
- * <p>
- * Character boundary analysis allows users to interact with
- * characters as they expect to, for example, when moving the cursor
- * through a text string. Character boundary analysis provides correct
- * navigation of through character strings, regardless of how the
- * character is stored.  For example, an accented character might be
- * stored as a base character and a diacritical mark. What users
- * consider to be a character can differ between languages.
- * <p>
- * The text boundary positions are found according to the rules
- * described in Unicode Standard Annex #29, Text Boundaries, and
- * Unicode Standard Annex #14, Line Breaking Properties.  These
- * are available at http://www.unicode.org/reports/tr14/ and
- * http://www.unicode.org/reports/tr29/.
- * <p>
- * In addition to the C++ API defined in this header file, a
- * plain C API with equivalent functionality is defined in the
- * file ubrk.h
- * <p>
- * Code snippits illustrating the use of the Break Iterator APIs
- * are available in the ICU User Guide,
- * http://icu-project.org/userguide/boundaryAnalysis.html
- * and in the sample program icu/source/samples/break/break.cpp"
- *
- */
-class U_COMMON_API BreakIterator : public UObject {
-public:
-    /**
-     *  destructor
-     *  @stable ICU 2.0
-     */
-    virtual ~BreakIterator();
-
-    /**
-     * Return true if another object is semantically equal to this
-     * one. The other object should be an instance of the same subclass of
-     * BreakIterator. Objects of different subclasses are considered
-     * unequal.
-     * <P>
-     * Return true if this BreakIterator is at the same position in the
-     * same text, and is the same class and type (word, line, etc.) of
-     * BreakIterator, as the argument.  Text is considered the same if
-     * it contains the same characters, it need not be the same
-     * object, and styles are not considered.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const BreakIterator&) const = 0;
-
-    /**
-     * Returns the complement of the result of operator==
-     * @param rhs The BreakIterator to be compared for inequality
-     * @return the complement of the result of operator==
-     * @stable ICU 2.0
-     */
-    UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
-
-    /**
-     * Return a polymorphic copy of this object.  This is an abstract
-     * method which subclasses implement.
-     * @stable ICU 2.0
-     */
-    virtual BreakIterator* clone(void) const = 0;
-
-    /**
-     * Return a polymorphic class ID for this object. Different subclasses
-     * will return distinct unequal values.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const = 0;
-
-    /**
-     * Return a CharacterIterator over the text being analyzed.
-     * @stable ICU 2.0
-     */
-    virtual CharacterIterator& getText(void) const = 0;
-
-
-    /**
-      *  Get a UText for the text being analyzed.
-      *  The returned UText is a shallow clone of the UText used internally
-      *  by the break iterator implementation.  It can safely be used to
-      *  access the text without impacting any break iterator operations,
-      *  but the underlying text itself must not be altered.
-      *
-      * @param fillIn A UText to be filled in.  If NULL, a new UText will be
-      *           allocated to hold the result.
-      * @param status receives any error codes.
-      * @return   The current UText for this break iterator.  If an input
-      *           UText was provided, it will always be returned.
-      * @stable ICU 3.4
-      */
-     virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
-
-    /**
-     * Change the text over which this operates. The text boundary is
-     * reset to the start.
-     * @param text The UnicodeString used to change the text.
-     * @stable ICU 2.0
-     */
-    virtual void  setText(const UnicodeString &text) = 0;
-
-    /**
-     * Reset the break iterator to operate over the text represented by
-     * the UText.  The iterator position is reset to the start.
-     *
-     * This function makes a shallow clone of the supplied UText.  This means
-     * that the caller is free to immediately close or otherwise reuse the
-     * Utext that was passed as a parameter, but that the underlying text itself
-     * must not be altered while being referenced by the break iterator.
-     *
-     * @param text The UText used to change the text.
-     * @param status receives any error codes.
-     * @stable ICU 3.4
-     */
-    virtual void  setText(UText *text, UErrorCode &status) = 0;
-
-    /**
-     * Change the text over which this operates. The text boundary is
-     * reset to the start.
-     * Note that setText(UText *) provides similar functionality to this function,
-     * and is more efficient.
-     * @param it The CharacterIterator used to change the text.
-     * @stable ICU 2.0
-     */
-    virtual void  adoptText(CharacterIterator* it) = 0;
-
-    enum {
-        /**
-         * DONE is returned by previous() and next() after all valid
-         * boundaries have been returned.
-         * @stable ICU 2.0
-         */
-        DONE = (int32_t)-1
-    };
-
-    /**
-     * Return the index of the first character in the text being scanned.
-     * @stable ICU 2.0
-     */
-    virtual int32_t first(void) = 0;
-
-    /**
-     * Return the index immediately BEYOND the last character in the text being scanned.
-     * @stable ICU 2.0
-     */
-    virtual int32_t last(void) = 0;
-
-    /**
-     * Return the boundary preceding the current boundary.
-     * @return The character index of the previous text boundary or DONE if all
-     * boundaries have been returned.
-     * @stable ICU 2.0
-     */
-    virtual int32_t previous(void) = 0;
-
-    /**
-     * Return the boundary following the current boundary.
-     * @return The character index of the next text boundary or DONE if all
-     * boundaries have been returned.
-     * @stable ICU 2.0
-     */
-    virtual int32_t next(void) = 0;
-
-    /**
-     * Return character index of the current interator position within the text.
-     * @return The boundary most recently returned.
-     * @stable ICU 2.0
-     */
-    virtual int32_t current(void) const = 0;
-
-    /**
-     * Return the first boundary following the specified offset.
-     * The value returned is always greater than the offset or
-     * the value BreakIterator.DONE
-     * @param offset the offset to begin scanning.
-     * @return The first boundary after the specified offset.
-     * @stable ICU 2.0
-     */
-    virtual int32_t following(int32_t offset) = 0;
-
-    /**
-     * Return the first boundary preceding the specified offset.
-     * The value returned is always smaller than the offset or
-     * the value BreakIterator.DONE
-     * @param offset the offset to begin scanning.
-     * @return The first boundary before the specified offset.
-     * @stable ICU 2.0
-     */
-    virtual int32_t preceding(int32_t offset) = 0;
-
-    /**
-     * Return true if the specfied position is a boundary position.
-     * As a side effect, the current position of the iterator is set
-     * to the first boundary position at or following the specified offset.
-     * @param offset the offset to check.
-     * @return True if "offset" is a boundary position.
-     * @stable ICU 2.0
-     */
-    virtual UBool isBoundary(int32_t offset) = 0;
-
-    /**
-     * Return the nth boundary from the current boundary
-     * @param n which boundary to return.  A value of 0
-     * does nothing.  Negative values move to previous boundaries
-     * and positive values move to later boundaries.
-     * @return The index of the nth boundary from the current position, or
-     * DONE if there are fewer than |n| boundaries in the specfied direction.
-     * @stable ICU 2.0
-     */
-    virtual int32_t next(int32_t n) = 0;
-
-    /**
-     * Create BreakIterator for word-breaks using the given locale.
-     * Returns an instance of a BreakIterator implementing word breaks.
-     * WordBreak is useful for word selection (ex. double click)
-     * @param where the locale.
-     * @param status the error code
-     * @return A BreakIterator for word-breaks.  The UErrorCode& status
-     * parameter is used to return status information to the user.
-     * To check whether the construction succeeded or not, you should check
-     * the value of U_SUCCESS(err).  If you wish more detailed information, you
-     * can check for informational error results which still indicate success.
-     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
-     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
-     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
-     * used; neither the requested locale nor any of its fall back locales
-     * could be found.
-     * The caller owns the returned object and is responsible for deleting it.
-     * @stable ICU 2.0
-     */
-    static BreakIterator* U_EXPORT2
-    createWordInstance(const Locale& where, UErrorCode& status);
-
-    /**
-     * Create BreakIterator for line-breaks using specified locale.
-     * Returns an instance of a BreakIterator implementing line breaks. Line
-     * breaks are logically possible line breaks, actual line breaks are
-     * usually determined based on display width.
-     * LineBreak is useful for word wrapping text.
-     * @param where the locale.
-     * @param status The error code.
-     * @return A BreakIterator for line-breaks.  The UErrorCode& status
-     * parameter is used to return status information to the user.
-     * To check whether the construction succeeded or not, you should check
-     * the value of U_SUCCESS(err).  If you wish more detailed information, you
-     * can check for informational error results which still indicate success.
-     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
-     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
-     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
-     * used; neither the requested locale nor any of its fall back locales
-     * could be found.
-     * The caller owns the returned object and is responsible for deleting it.
-     * @stable ICU 2.0
-     */
-    static BreakIterator* U_EXPORT2
-    createLineInstance(const Locale& where, UErrorCode& status);
-
-    /**
-     * Create BreakIterator for character-breaks using specified locale
-     * Returns an instance of a BreakIterator implementing character breaks.
-     * Character breaks are boundaries of combining character sequences.
-     * @param where the locale.
-     * @param status The error code.
-     * @return A BreakIterator for character-breaks.  The UErrorCode& status
-     * parameter is used to return status information to the user.
-     * To check whether the construction succeeded or not, you should check
-     * the value of U_SUCCESS(err).  If you wish more detailed information, you
-     * can check for informational error results which still indicate success.
-     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
-     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
-     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
-     * used; neither the requested locale nor any of its fall back locales
-     * could be found.
-     * The caller owns the returned object and is responsible for deleting it.
-     * @stable ICU 2.0
-     */
-    static BreakIterator* U_EXPORT2
-    createCharacterInstance(const Locale& where, UErrorCode& status);
-
-    /**
-     * Create BreakIterator for sentence-breaks using specified locale
-     * Returns an instance of a BreakIterator implementing sentence breaks.
-     * @param where the locale.
-     * @param status The error code.
-     * @return A BreakIterator for sentence-breaks.  The UErrorCode& status
-     * parameter is used to return status information to the user.
-     * To check whether the construction succeeded or not, you should check
-     * the value of U_SUCCESS(err).  If you wish more detailed information, you
-     * can check for informational error results which still indicate success.
-     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
-     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
-     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
-     * used; neither the requested locale nor any of its fall back locales
-     * could be found.
-     * The caller owns the returned object and is responsible for deleting it.
-     * @stable ICU 2.0
-     */
-    static BreakIterator* U_EXPORT2
-    createSentenceInstance(const Locale& where, UErrorCode& status);
-
-    /**
-     * Create BreakIterator for title-casing breaks using the specified locale
-     * Returns an instance of a BreakIterator implementing title breaks.
-     * The iterator returned locates title boundaries as described for
-     * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
-     * please use Word Boundary iterator.{@link #createWordInstance }
-     *
-     * @param where the locale.
-     * @param status The error code.
-     * @return A BreakIterator for title-breaks.  The UErrorCode& status
-     * parameter is used to return status information to the user.
-     * To check whether the construction succeeded or not, you should check
-     * the value of U_SUCCESS(err).  If you wish more detailed information, you
-     * can check for informational error results which still indicate success.
-     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
-     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
-     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
-     * used; neither the requested locale nor any of its fall back locales
-     * could be found.
-     * The caller owns the returned object and is responsible for deleting it.
-     * @stable ICU 2.1
-     */
-    static BreakIterator* U_EXPORT2
-    createTitleInstance(const Locale& where, UErrorCode& status);
-
-    /**
-     * Get the set of Locales for which TextBoundaries are installed.
-     * <p><b>Note:</b> this will not return locales added through the register
-     * call. To see the registered locales too, use the getAvailableLocales
-     * function that returns a StringEnumeration object </p>
-     * @param count the output parameter of number of elements in the locale list
-     * @return available locales
-     * @stable ICU 2.0
-     */
-    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
-
-    /**
-     * Get name of the object for the desired Locale, in the desired langauge.
-     * @param objectLocale must be from getAvailableLocales.
-     * @param displayLocale specifies the desired locale for output.
-     * @param name the fill-in parameter of the return value
-     * Uses best match.
-     * @return user-displayable name
-     * @stable ICU 2.0
-     */
-    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
-                                         const Locale& displayLocale,
-                                         UnicodeString& name);
-
-    /**
-     * Get name of the object for the desired Locale, in the langauge of the
-     * default locale.
-     * @param objectLocale must be from getMatchingLocales
-     * @param name the fill-in parameter of the return value
-     * @return user-displayable name
-     * @stable ICU 2.0
-     */
-    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
-                                         UnicodeString& name);
-
-    /**
-     * Thread safe client-buffer-based cloning operation
-     *    Do NOT call delete on a safeclone, since 'new' is not used to create it.
-     * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
-     * If buffer is not large enough, new memory will be allocated.
-     * @param BufferSize reference to size of allocated space.
-     * If BufferSize == 0, a sufficient size for use in cloning will
-     * be returned ('pre-flighting')
-     * If BufferSize is not enough for a stack-based safe clone,
-     * new memory will be allocated.
-     * @param status to indicate whether the operation went on smoothly or there were errors
-     *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
-     *  necessary.
-     * @return pointer to the new clone
-     *
-     * @stable ICU 2.0
-     */
-    virtual BreakIterator *  createBufferClone(void *stackBuffer,
-                                               int32_t &BufferSize,
-                                               UErrorCode &status) = 0;
-
-    /**
-     *   Determine whether the BreakIterator was created in user memory by
-     *   createBufferClone(), and thus should not be deleted.  Such objects
-     *   must be closed by an explicit call to the destructor (not delete).
-     *  @stable ICU 2.0
-     */
-    inline UBool isBufferClone(void);
-
-#if !UCONFIG_NO_SERVICE
-    /**
-     * Register a new break iterator of the indicated kind, to use in the given locale.
-     * The break iterator will be adopted.  Clones of the iterator will be returned
-     * if a request for a break iterator of the given kind matches or falls back to
-     * this locale.
-     * @param toAdopt the BreakIterator instance to be adopted
-     * @param locale the Locale for which this instance is to be registered
-     * @param kind the type of iterator for which this instance is to be registered
-     * @param status the in/out status code, no special meanings are assigned
-     * @return a registry key that can be used to unregister this instance
-     * @stable ICU 2.4
-     */
-    static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
-                                        const Locale& locale,
-                                        UBreakIteratorType kind,
-                                        UErrorCode& status);
-
-    /**
-     * Unregister a previously-registered BreakIterator using the key returned from the
-     * register call.  Key becomes invalid after a successful call and should not be used again.
-     * The BreakIterator corresponding to the key will be deleted.
-     * @param key the registry key returned by a previous call to registerInstance
-     * @param status the in/out status code, no special meanings are assigned
-     * @return TRUE if the iterator for the key was successfully unregistered
-     * @stable ICU 2.4
-     */
-    static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
-
-    /**
-     * Return a StringEnumeration over the locales available at the time of the call,
-     * including registered locales.
-     * @return a StringEnumeration over the locales available at the time of the call
-     * @stable ICU 2.4
-     */
-    static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
-#endif
-
-    /**
-     * Returns the locale for this break iterator. Two flavors are available: valid and
-     * actual locale.
-     * @stable ICU 2.8
-     */
-    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
-
-    /** Get the locale for this break iterator object. You can choose between valid and actual locale.
-     *  @param type type of the locale we're looking for (valid or actual)
-     *  @param status error code for the operation
-     *  @return the locale
-     *  @internal
-     */
-    const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
-
- private:
-    static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status);
-    static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
-    static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
-
-    friend class ICUBreakIteratorFactory;
-    friend class ICUBreakIteratorService;
-
-protected:
-    /** @internal */
-    BreakIterator();
-    /** @internal */
-    UBool fBufferClone;
-    /** @internal */
-    BreakIterator (const BreakIterator &other) : UObject(other), fBufferClone(FALSE) {}
-
-private:
-
-    /** @internal */
-    char actualLocale[ULOC_FULLNAME_CAPACITY];
-    char validLocale[ULOC_FULLNAME_CAPACITY];
-
-    /**
-     * The assignment operator has no real implementation.
-     * It's provided to make the compiler happy. Do not call.
-     */
-    BreakIterator& operator=(const BreakIterator&);
-};
-
-inline UBool BreakIterator::isBufferClone()
-{
-    return fBufferClone;
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
-
-#endif // _BRKITER
-//eof
-

Copied: MacRuby/trunk/icu-1060/unicode/brkiter.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/brkiter.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/brkiter.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/brkiter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,557 @@
+/*
+********************************************************************************
+*   Copyright (C) 1997-2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+********************************************************************************
+*
+* File brkiter.h
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   02/18/97    aliu        Added typedef for TextCount.  Made DONE const.
+*   05/07/97    aliu        Fixed DLL declaration.
+*   07/09/97    jfitz       Renamed BreakIterator and interface synced with JDK
+*   08/11/98    helena      Sync-up JDK1.2.
+*   01/13/2000  helena      Added UErrorCode parameter to createXXXInstance methods.
+********************************************************************************
+*/
+
+#ifndef BRKITER_H
+#define BRKITER_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: Break Iterator.
+ */
+
+#if UCONFIG_NO_BREAK_ITERATION
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Allow the declaration of APIs with pointers to BreakIterator
+ * even when break iteration is removed from the build.
+ */
+class BreakIterator;
+
+U_NAMESPACE_END
+
+#else
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#include "unicode/locid.h"
+#include "unicode/ubrk.h"
+#include "unicode/strenum.h"
+#include "unicode/utext.h"
+#include "unicode/umisc.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * The BreakIterator class implements methods for finding the location
+ * of boundaries in text. BreakIterator is an abstract base class.
+ * Instances of BreakIterator maintain a current position and scan over
+ * text returning the index of characters where boundaries occur.
+ * <p>
+ * Line boundary analysis determines where a text string can be broken
+ * when line-wrapping. The mechanism correctly handles punctuation and
+ * hyphenated words.
+ * <p>
+ * Sentence boundary analysis allows selection with correct
+ * interpretation of periods within numbers and abbreviations, and
+ * trailing punctuation marks such as quotation marks and parentheses.
+ * <p>
+ * Word boundary analysis is used by search and replace functions, as
+ * well as within text editing applications that allow the user to
+ * select words with a double click. Word selection provides correct
+ * interpretation of punctuation marks within and following
+ * words. Characters that are not part of a word, such as symbols or
+ * punctuation marks, have word-breaks on both sides.
+ * <p>
+ * Character boundary analysis allows users to interact with
+ * characters as they expect to, for example, when moving the cursor
+ * through a text string. Character boundary analysis provides correct
+ * navigation of through character strings, regardless of how the
+ * character is stored.  For example, an accented character might be
+ * stored as a base character and a diacritical mark. What users
+ * consider to be a character can differ between languages.
+ * <p>
+ * The text boundary positions are found according to the rules
+ * described in Unicode Standard Annex #29, Text Boundaries, and
+ * Unicode Standard Annex #14, Line Breaking Properties.  These
+ * are available at http://www.unicode.org/reports/tr14/ and
+ * http://www.unicode.org/reports/tr29/.
+ * <p>
+ * In addition to the C++ API defined in this header file, a
+ * plain C API with equivalent functionality is defined in the
+ * file ubrk.h
+ * <p>
+ * Code snippits illustrating the use of the Break Iterator APIs
+ * are available in the ICU User Guide,
+ * http://icu-project.org/userguide/boundaryAnalysis.html
+ * and in the sample program icu/source/samples/break/break.cpp"
+ *
+ */
+class U_COMMON_API BreakIterator : public UObject {
+public:
+    /**
+     *  destructor
+     *  @stable ICU 2.0
+     */
+    virtual ~BreakIterator();
+
+    /**
+     * Return true if another object is semantically equal to this
+     * one. The other object should be an instance of the same subclass of
+     * BreakIterator. Objects of different subclasses are considered
+     * unequal.
+     * <P>
+     * Return true if this BreakIterator is at the same position in the
+     * same text, and is the same class and type (word, line, etc.) of
+     * BreakIterator, as the argument.  Text is considered the same if
+     * it contains the same characters, it need not be the same
+     * object, and styles are not considered.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const BreakIterator&) const = 0;
+
+    /**
+     * Returns the complement of the result of operator==
+     * @param rhs The BreakIterator to be compared for inequality
+     * @return the complement of the result of operator==
+     * @stable ICU 2.0
+     */
+    UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
+
+    /**
+     * Return a polymorphic copy of this object.  This is an abstract
+     * method which subclasses implement.
+     * @stable ICU 2.0
+     */
+    virtual BreakIterator* clone(void) const = 0;
+
+    /**
+     * Return a polymorphic class ID for this object. Different subclasses
+     * will return distinct unequal values.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const = 0;
+
+    /**
+     * Return a CharacterIterator over the text being analyzed.
+     * @stable ICU 2.0
+     */
+    virtual CharacterIterator& getText(void) const = 0;
+
+
+    /**
+      *  Get a UText for the text being analyzed.
+      *  The returned UText is a shallow clone of the UText used internally
+      *  by the break iterator implementation.  It can safely be used to
+      *  access the text without impacting any break iterator operations,
+      *  but the underlying text itself must not be altered.
+      *
+      * @param fillIn A UText to be filled in.  If NULL, a new UText will be
+      *           allocated to hold the result.
+      * @param status receives any error codes.
+      * @return   The current UText for this break iterator.  If an input
+      *           UText was provided, it will always be returned.
+      * @stable ICU 3.4
+      */
+     virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
+
+    /**
+     * Change the text over which this operates. The text boundary is
+     * reset to the start.
+     * @param text The UnicodeString used to change the text.
+     * @stable ICU 2.0
+     */
+    virtual void  setText(const UnicodeString &text) = 0;
+
+    /**
+     * Reset the break iterator to operate over the text represented by
+     * the UText.  The iterator position is reset to the start.
+     *
+     * This function makes a shallow clone of the supplied UText.  This means
+     * that the caller is free to immediately close or otherwise reuse the
+     * Utext that was passed as a parameter, but that the underlying text itself
+     * must not be altered while being referenced by the break iterator.
+     *
+     * @param text The UText used to change the text.
+     * @param status receives any error codes.
+     * @stable ICU 3.4
+     */
+    virtual void  setText(UText *text, UErrorCode &status) = 0;
+
+    /**
+     * Change the text over which this operates. The text boundary is
+     * reset to the start.
+     * Note that setText(UText *) provides similar functionality to this function,
+     * and is more efficient.
+     * @param it The CharacterIterator used to change the text.
+     * @stable ICU 2.0
+     */
+    virtual void  adoptText(CharacterIterator* it) = 0;
+
+    enum {
+        /**
+         * DONE is returned by previous() and next() after all valid
+         * boundaries have been returned.
+         * @stable ICU 2.0
+         */
+        DONE = (int32_t)-1
+    };
+
+    /**
+     * Return the index of the first character in the text being scanned.
+     * @stable ICU 2.0
+     */
+    virtual int32_t first(void) = 0;
+
+    /**
+     * Return the index immediately BEYOND the last character in the text being scanned.
+     * @stable ICU 2.0
+     */
+    virtual int32_t last(void) = 0;
+
+    /**
+     * Return the boundary preceding the current boundary.
+     * @return The character index of the previous text boundary or DONE if all
+     * boundaries have been returned.
+     * @stable ICU 2.0
+     */
+    virtual int32_t previous(void) = 0;
+
+    /**
+     * Return the boundary following the current boundary.
+     * @return The character index of the next text boundary or DONE if all
+     * boundaries have been returned.
+     * @stable ICU 2.0
+     */
+    virtual int32_t next(void) = 0;
+
+    /**
+     * Return character index of the current interator position within the text.
+     * @return The boundary most recently returned.
+     * @stable ICU 2.0
+     */
+    virtual int32_t current(void) const = 0;
+
+    /**
+     * Return the first boundary following the specified offset.
+     * The value returned is always greater than the offset or
+     * the value BreakIterator.DONE
+     * @param offset the offset to begin scanning.
+     * @return The first boundary after the specified offset.
+     * @stable ICU 2.0
+     */
+    virtual int32_t following(int32_t offset) = 0;
+
+    /**
+     * Return the first boundary preceding the specified offset.
+     * The value returned is always smaller than the offset or
+     * the value BreakIterator.DONE
+     * @param offset the offset to begin scanning.
+     * @return The first boundary before the specified offset.
+     * @stable ICU 2.0
+     */
+    virtual int32_t preceding(int32_t offset) = 0;
+
+    /**
+     * Return true if the specfied position is a boundary position.
+     * As a side effect, the current position of the iterator is set
+     * to the first boundary position at or following the specified offset.
+     * @param offset the offset to check.
+     * @return True if "offset" is a boundary position.
+     * @stable ICU 2.0
+     */
+    virtual UBool isBoundary(int32_t offset) = 0;
+
+    /**
+     * Return the nth boundary from the current boundary
+     * @param n which boundary to return.  A value of 0
+     * does nothing.  Negative values move to previous boundaries
+     * and positive values move to later boundaries.
+     * @return The index of the nth boundary from the current position, or
+     * DONE if there are fewer than |n| boundaries in the specfied direction.
+     * @stable ICU 2.0
+     */
+    virtual int32_t next(int32_t n) = 0;
+
+    /**
+     * Create BreakIterator for word-breaks using the given locale.
+     * Returns an instance of a BreakIterator implementing word breaks.
+     * WordBreak is useful for word selection (ex. double click)
+     * @param where the locale.
+     * @param status the error code
+     * @return A BreakIterator for word-breaks.  The UErrorCode& status
+     * parameter is used to return status information to the user.
+     * To check whether the construction succeeded or not, you should check
+     * the value of U_SUCCESS(err).  If you wish more detailed information, you
+     * can check for informational error results which still indicate success.
+     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
+     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
+     * used; neither the requested locale nor any of its fall back locales
+     * could be found.
+     * The caller owns the returned object and is responsible for deleting it.
+     * @stable ICU 2.0
+     */
+    static BreakIterator* U_EXPORT2
+    createWordInstance(const Locale& where, UErrorCode& status);
+
+    /**
+     * Create BreakIterator for line-breaks using specified locale.
+     * Returns an instance of a BreakIterator implementing line breaks. Line
+     * breaks are logically possible line breaks, actual line breaks are
+     * usually determined based on display width.
+     * LineBreak is useful for word wrapping text.
+     * @param where the locale.
+     * @param status The error code.
+     * @return A BreakIterator for line-breaks.  The UErrorCode& status
+     * parameter is used to return status information to the user.
+     * To check whether the construction succeeded or not, you should check
+     * the value of U_SUCCESS(err).  If you wish more detailed information, you
+     * can check for informational error results which still indicate success.
+     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
+     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
+     * used; neither the requested locale nor any of its fall back locales
+     * could be found.
+     * The caller owns the returned object and is responsible for deleting it.
+     * @stable ICU 2.0
+     */
+    static BreakIterator* U_EXPORT2
+    createLineInstance(const Locale& where, UErrorCode& status);
+
+    /**
+     * Create BreakIterator for character-breaks using specified locale
+     * Returns an instance of a BreakIterator implementing character breaks.
+     * Character breaks are boundaries of combining character sequences.
+     * @param where the locale.
+     * @param status The error code.
+     * @return A BreakIterator for character-breaks.  The UErrorCode& status
+     * parameter is used to return status information to the user.
+     * To check whether the construction succeeded or not, you should check
+     * the value of U_SUCCESS(err).  If you wish more detailed information, you
+     * can check for informational error results which still indicate success.
+     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
+     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
+     * used; neither the requested locale nor any of its fall back locales
+     * could be found.
+     * The caller owns the returned object and is responsible for deleting it.
+     * @stable ICU 2.0
+     */
+    static BreakIterator* U_EXPORT2
+    createCharacterInstance(const Locale& where, UErrorCode& status);
+
+    /**
+     * Create BreakIterator for sentence-breaks using specified locale
+     * Returns an instance of a BreakIterator implementing sentence breaks.
+     * @param where the locale.
+     * @param status The error code.
+     * @return A BreakIterator for sentence-breaks.  The UErrorCode& status
+     * parameter is used to return status information to the user.
+     * To check whether the construction succeeded or not, you should check
+     * the value of U_SUCCESS(err).  If you wish more detailed information, you
+     * can check for informational error results which still indicate success.
+     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
+     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
+     * used; neither the requested locale nor any of its fall back locales
+     * could be found.
+     * The caller owns the returned object and is responsible for deleting it.
+     * @stable ICU 2.0
+     */
+    static BreakIterator* U_EXPORT2
+    createSentenceInstance(const Locale& where, UErrorCode& status);
+
+    /**
+     * Create BreakIterator for title-casing breaks using the specified locale
+     * Returns an instance of a BreakIterator implementing title breaks.
+     * The iterator returned locates title boundaries as described for
+     * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+     * please use Word Boundary iterator.{@link #createWordInstance }
+     *
+     * @param where the locale.
+     * @param status The error code.
+     * @return A BreakIterator for title-breaks.  The UErrorCode& status
+     * parameter is used to return status information to the user.
+     * To check whether the construction succeeded or not, you should check
+     * the value of U_SUCCESS(err).  If you wish more detailed information, you
+     * can check for informational error results which still indicate success.
+     * U_USING_FALLBACK_WARNING indicates that a fall back locale was used.  For
+     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+     * used.  U_USING_DEFAULT_WARNING indicates that the default locale data was
+     * used; neither the requested locale nor any of its fall back locales
+     * could be found.
+     * The caller owns the returned object and is responsible for deleting it.
+     * @stable ICU 2.1
+     */
+    static BreakIterator* U_EXPORT2
+    createTitleInstance(const Locale& where, UErrorCode& status);
+
+    /**
+     * Get the set of Locales for which TextBoundaries are installed.
+     * <p><b>Note:</b> this will not return locales added through the register
+     * call. To see the registered locales too, use the getAvailableLocales
+     * function that returns a StringEnumeration object </p>
+     * @param count the output parameter of number of elements in the locale list
+     * @return available locales
+     * @stable ICU 2.0
+     */
+    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+
+    /**
+     * Get name of the object for the desired Locale, in the desired langauge.
+     * @param objectLocale must be from getAvailableLocales.
+     * @param displayLocale specifies the desired locale for output.
+     * @param name the fill-in parameter of the return value
+     * Uses best match.
+     * @return user-displayable name
+     * @stable ICU 2.0
+     */
+    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
+                                         const Locale& displayLocale,
+                                         UnicodeString& name);
+
+    /**
+     * Get name of the object for the desired Locale, in the langauge of the
+     * default locale.
+     * @param objectLocale must be from getMatchingLocales
+     * @param name the fill-in parameter of the return value
+     * @return user-displayable name
+     * @stable ICU 2.0
+     */
+    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
+                                         UnicodeString& name);
+
+    /**
+     * Thread safe client-buffer-based cloning operation
+     *    Do NOT call delete on a safeclone, since 'new' is not used to create it.
+     * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
+     * If buffer is not large enough, new memory will be allocated.
+     * @param BufferSize reference to size of allocated space.
+     * If BufferSize == 0, a sufficient size for use in cloning will
+     * be returned ('pre-flighting')
+     * If BufferSize is not enough for a stack-based safe clone,
+     * new memory will be allocated.
+     * @param status to indicate whether the operation went on smoothly or there were errors
+     *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
+     *  necessary.
+     * @return pointer to the new clone
+     *
+     * @stable ICU 2.0
+     */
+    virtual BreakIterator *  createBufferClone(void *stackBuffer,
+                                               int32_t &BufferSize,
+                                               UErrorCode &status) = 0;
+
+    /**
+     *   Determine whether the BreakIterator was created in user memory by
+     *   createBufferClone(), and thus should not be deleted.  Such objects
+     *   must be closed by an explicit call to the destructor (not delete).
+     *  @stable ICU 2.0
+     */
+    inline UBool isBufferClone(void);
+
+#if !UCONFIG_NO_SERVICE
+    /**
+     * Register a new break iterator of the indicated kind, to use in the given locale.
+     * The break iterator will be adopted.  Clones of the iterator will be returned
+     * if a request for a break iterator of the given kind matches or falls back to
+     * this locale.
+     * @param toAdopt the BreakIterator instance to be adopted
+     * @param locale the Locale for which this instance is to be registered
+     * @param kind the type of iterator for which this instance is to be registered
+     * @param status the in/out status code, no special meanings are assigned
+     * @return a registry key that can be used to unregister this instance
+     * @stable ICU 2.4
+     */
+    static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
+                                        const Locale& locale,
+                                        UBreakIteratorType kind,
+                                        UErrorCode& status);
+
+    /**
+     * Unregister a previously-registered BreakIterator using the key returned from the
+     * register call.  Key becomes invalid after a successful call and should not be used again.
+     * The BreakIterator corresponding to the key will be deleted.
+     * @param key the registry key returned by a previous call to registerInstance
+     * @param status the in/out status code, no special meanings are assigned
+     * @return TRUE if the iterator for the key was successfully unregistered
+     * @stable ICU 2.4
+     */
+    static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
+
+    /**
+     * Return a StringEnumeration over the locales available at the time of the call,
+     * including registered locales.
+     * @return a StringEnumeration over the locales available at the time of the call
+     * @stable ICU 2.4
+     */
+    static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
+#endif
+
+    /**
+     * Returns the locale for this break iterator. Two flavors are available: valid and
+     * actual locale.
+     * @stable ICU 2.8
+     */
+    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+
+    /** Get the locale for this break iterator object. You can choose between valid and actual locale.
+     *  @param type type of the locale we're looking for (valid or actual)
+     *  @param status error code for the operation
+     *  @return the locale
+     *  @internal
+     */
+    const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
+
+ private:
+    static BreakIterator* buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode& status);
+    static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
+    static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
+
+    friend class ICUBreakIteratorFactory;
+    friend class ICUBreakIteratorService;
+
+protected:
+    /** @internal */
+    BreakIterator();
+    /** @internal */
+    UBool fBufferClone;
+    /** @internal */
+    BreakIterator (const BreakIterator &other) : UObject(other), fBufferClone(FALSE) {}
+
+private:
+
+    /** @internal */
+    char actualLocale[ULOC_FULLNAME_CAPACITY];
+    char validLocale[ULOC_FULLNAME_CAPACITY];
+
+    /**
+     * The assignment operator has no real implementation.
+     * It's provided to make the compiler happy. Do not call.
+     */
+    BreakIterator& operator=(const BreakIterator&);
+};
+
+inline UBool BreakIterator::isBufferClone()
+{
+    return fBufferClone;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif // _BRKITER
+//eof
+

Deleted: MacRuby/trunk/icu-1060/unicode/calendar.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/calendar.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/calendar.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,2170 +0,0 @@
-/*
-********************************************************************************
-*   Copyright (C) 1997-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-********************************************************************************
-*
-* File CALENDAR.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   04/22/97    aliu        Expanded and corrected comments and other header
-*                           contents.
-*   05/01/97    aliu        Made equals(), before(), after() arguments const.
-*   05/20/97    aliu        Replaced fAreFieldsSet with fAreFieldsInSync and
-*                           fAreAllFieldsSet.
-*   07/27/98    stephen     Sync up with JDK 1.2
-*   11/15/99    weiv        added YEAR_WOY and DOW_LOCAL
-*                           to EDateFields
-*    8/19/2002  srl         Removed Javaisms
-*   11/07/2003  srl         Update, clean up documentation.
-********************************************************************************
-*/
-
-#ifndef CALENDAR_H
-#define CALENDAR_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Calendar object
- */
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/uobject.h"
-#include "unicode/locid.h"
-#include "unicode/timezone.h"
-#include "unicode/ucal.h"
-#include "unicode/umisc.h"
-
-U_NAMESPACE_BEGIN
-
-class ICUServiceFactory;
-
-/**
- * @internal
- */
-typedef int32_t UFieldResolutionTable[12][8];
-
-/**
- * <code>Calendar</code> is an abstract base class for converting between
- * a <code>UDate</code> object and a set of integer fields such as
- * <code>YEAR</code>, <code>MONTH</code>, <code>DAY</code>, <code>HOUR</code>,
- * and so on. (A <code>UDate</code> object represents a specific instant in
- * time with millisecond precision. See UDate
- * for information about the <code>UDate</code> class.)
- *
- * <p>
- * Subclasses of <code>Calendar</code> interpret a <code>UDate</code>
- * according to the rules of a specific calendar system.
- * The most commonly used subclass of <code>Calendar</code> is
- * <code>GregorianCalendar</code>. Other subclasses could represent
- * the various types of lunar calendars in use in many parts of the world.
- *
- * <p>
- * <b>NOTE</b>: (ICU 2.6) The subclass interface should be considered unstable
- * - it WILL change.
- *
- * <p>
- * Like other locale-sensitive classes, <code>Calendar</code> provides a
- * static method, <code>createInstance</code>, for getting a generally useful
- * object of this type. <code>Calendar</code>'s <code>createInstance</code> method
- * returns the appropriate <code>Calendar</code> subclass whose
- * time fields have been initialized with the current date and time:
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * Calendar *rightNow = Calendar::createInstance(errCode);
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- *
- * <p>
- * A <code>Calendar</code> object can produce all the time field values
- * needed to implement the date-time formatting for a particular language
- * and calendar style (for example, Japanese-Gregorian, Japanese-Traditional).
- *
- * <p>
- * When computing a <code>UDate</code> from time fields, two special circumstances
- * may arise: there may be insufficient information to compute the
- * <code>UDate</code> (such as only year and month but no day in the month),
- * or there may be inconsistent information (such as "Tuesday, July 15, 1996"
- * -- July 15, 1996 is actually a Monday).
- *
- * <p>
- * <strong>Insufficient information.</strong> The calendar will use default
- * information to specify the missing fields. This may vary by calendar; for
- * the Gregorian calendar, the default for a field is the same as that of the
- * start of the epoch: i.e., YEAR = 1970, MONTH = JANUARY, DATE = 1, etc.
- *
- * <p>
- * <strong>Inconsistent information.</strong> If fields conflict, the calendar
- * will give preference to fields set more recently. For example, when
- * determining the day, the calendar will look for one of the following
- * combinations of fields.  The most recent combination, as determined by the
- * most recently set single field, will be used.
- *
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * MONTH + DAY_OF_MONTH
- * MONTH + WEEK_OF_MONTH + DAY_OF_WEEK
- * MONTH + DAY_OF_WEEK_IN_MONTH + DAY_OF_WEEK
- * DAY_OF_YEAR
- * DAY_OF_WEEK + WEEK_OF_YEAR
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- *
- * For the time of day:
- *
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * HOUR_OF_DAY
- * AM_PM + HOUR
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- *
- * <p>
- * <strong>Note:</strong> for some non-Gregorian calendars, different
- * fields may be necessary for complete disambiguation. For example, a full
- * specification of the historial Arabic astronomical calendar requires year,
- * month, day-of-month <em>and</em> day-of-week in some cases.
- *
- * <p>
- * <strong>Note:</strong> There are certain possible ambiguities in
- * interpretation of certain singular times, which are resolved in the
- * following ways:
- * <ol>
- *     <li> 24:00:00 "belongs" to the following day. That is,
- *          23:59 on Dec 31, 1969 &lt; 24:00 on Jan 1, 1970 &lt; 24:01:00 on Jan 1, 1970
- *
- *     <li> Although historically not precise, midnight also belongs to "am",
- *          and noon belongs to "pm", so on the same day,
- *          12:00 am (midnight) &lt; 12:01 am, and 12:00 pm (noon) &lt; 12:01 pm
- * </ol>
- *
- * <p>
- * The date or time format strings are not part of the definition of a
- * calendar, as those must be modifiable or overridable by the user at
- * runtime. Use {@link DateFormat}
- * to format dates.
- *
- * <p>
- * <code>Calendar</code> provides an API for field "rolling", where fields
- * can be incremented or decremented, but wrap around. For example, rolling the
- * month up in the date <code>December 12, <b>1996</b></code> results in
- * <code>January 12, <b>1996</b></code>.
- *
- * <p>
- * <code>Calendar</code> also provides a date arithmetic function for
- * adding the specified (signed) amount of time to a particular time field.
- * For example, subtracting 5 days from the date <code>September 12, 1996</code>
- * results in <code>September 7, 1996</code>.
- *
- * @stable ICU 2.0
- */
-class U_I18N_API Calendar : public UObject {
-public:
-
-    /**
-     * Field IDs for date and time. Used to specify date/time fields. ERA is calendar
-     * specific. Example ranges given are for illustration only; see specific Calendar
-     * subclasses for actual ranges.
-     * @deprecated ICU 2.6. Use C enum UCalendarDateFields defined in ucal.h
-     */
-    enum EDateFields {
-#ifndef U_HIDE_DEPRECATED_API
-        ERA,                  // Example: 0..1
-        YEAR,                 // Example: 1..big number
-        MONTH,                // Example: 0..11
-        WEEK_OF_YEAR,         // Example: 1..53
-        WEEK_OF_MONTH,        // Example: 1..4
-        DATE,                 // Example: 1..31
-        DAY_OF_YEAR,          // Example: 1..365
-        DAY_OF_WEEK,          // Example: 1..7
-        DAY_OF_WEEK_IN_MONTH, // Example: 1..4, may be specified as -1
-        AM_PM,                // Example: 0..1
-        HOUR,                 // Example: 0..11
-        HOUR_OF_DAY,          // Example: 0..23
-        MINUTE,               // Example: 0..59
-        SECOND,               // Example: 0..59
-        MILLISECOND,          // Example: 0..999
-        ZONE_OFFSET,          // Example: -12*U_MILLIS_PER_HOUR..12*U_MILLIS_PER_HOUR
-        DST_OFFSET,           // Example: 0 or U_MILLIS_PER_HOUR
-        YEAR_WOY,             // 'Y' Example: 1..big number - Year of Week of Year
-        DOW_LOCAL,            // 'e' Example: 1..7 - Day of Week / Localized
-		
-		EXTENDED_YEAR,
-		JULIAN_DAY,
-		MILLISECONDS_IN_DAY,
-		IS_LEAP_MONTH,
-
-        FIELD_COUNT = UCAL_FIELD_COUNT // See ucal.h for other fields.
-#endif /* U_HIDE_DEPRECATED_API */
-    };
-
-    /**
-     * Useful constant for days of week. Note: Calendar day-of-week is 1-based. Clients
-     * who create locale resources for the field of first-day-of-week should be aware of
-     * this. For instance, in US locale, first-day-of-week is set to 1, i.e., SUNDAY.
-     * @deprecated ICU 2.6. Use C enum UCalendarDaysOfWeek defined in ucal.h
-     */
-    enum EDaysOfWeek {
-#ifndef U_HIDE_DEPRECATED_API
-        SUNDAY = 1,
-        MONDAY,
-        TUESDAY,
-        WEDNESDAY,
-        THURSDAY,
-        FRIDAY,
-        SATURDAY
-#endif /* U_HIDE_DEPRECATED_API */
-    };
-
-    /**
-     * Useful constants for month. Note: Calendar month is 0-based.
-     * @deprecated ICU 2.6. Use C enum UCalendarMonths defined in ucal.h
-     */
-    enum EMonths {
-#ifndef U_HIDE_DEPRECATED_API
-        JANUARY,
-        FEBRUARY,
-        MARCH,
-        APRIL,
-        MAY,
-        JUNE,
-        JULY,
-        AUGUST,
-        SEPTEMBER,
-        OCTOBER,
-        NOVEMBER,
-        DECEMBER,
-        UNDECIMBER
-#endif /* U_HIDE_DEPRECATED_API */
-    };
-
-    /**
-     * Useful constants for hour in 12-hour clock. Used in GregorianCalendar.
-     * @deprecated ICU 2.6. Use C enum UCalendarAMPMs defined in ucal.h
-     */
-    enum EAmpm {
-#ifndef U_HIDE_DEPRECATED_API
-        AM,
-        PM
-#endif /* U_HIDE_DEPRECATED_API */
-    };
-
-    /**
-     * destructor
-     * @stable ICU 2.0
-     */
-    virtual ~Calendar();
-
-    /**
-     * Create and return a polymorphic copy of this calendar.
-     *
-     * @return    a polymorphic copy of this calendar.
-     * @stable ICU 2.0
-     */
-    virtual Calendar* clone(void) const = 0;
-
-    /**
-     * Creates a Calendar using the default timezone and locale. Clients are responsible
-     * for deleting the object returned.
-     *
-     * @param success  Indicates the success/failure of Calendar creation. Filled in
-     *                 with U_ZERO_ERROR if created successfully, set to a failure result
-     *                 otherwise. U_MISSING_RESOURCE_ERROR will be returned if the resource data
-     *                 requests a calendar type which has not been installed.
-     * @return         A Calendar if created successfully. NULL otherwise.
-     * @stable ICU 2.0
-     */
-    static Calendar* U_EXPORT2 createInstance(UErrorCode& success);
-
-    /**
-     * Creates a Calendar using the given timezone and the default locale.
-     * The Calendar takes ownership of zoneToAdopt; the
-     * client must not delete it.
-     *
-     * @param zoneToAdopt  The given timezone to be adopted.
-     * @param success      Indicates the success/failure of Calendar creation. Filled in
-     *                     with U_ZERO_ERROR if created successfully, set to a failure result
-     *                     otherwise.
-     * @return             A Calendar if created successfully. NULL otherwise.
-     * @stable ICU 2.0
-     */
-    static Calendar* U_EXPORT2 createInstance(TimeZone* zoneToAdopt, UErrorCode& success);
-
-    /**
-     * Creates a Calendar using the given timezone and the default locale.  The TimeZone
-     * is _not_ adopted; the client is still responsible for deleting it.
-     *
-     * @param zone  The timezone.
-     * @param success      Indicates the success/failure of Calendar creation. Filled in
-     *                     with U_ZERO_ERROR if created successfully, set to a failure result
-     *                     otherwise.
-     * @return             A Calendar if created successfully. NULL otherwise.
-     * @stable ICU 2.0
-     */
-    static Calendar* U_EXPORT2 createInstance(const TimeZone& zone, UErrorCode& success);
-
-    /**
-     * Creates a Calendar using the default timezone and the given locale.
-     *
-     * @param aLocale  The given locale.
-     * @param success  Indicates the success/failure of Calendar creation. Filled in
-     *                 with U_ZERO_ERROR if created successfully, set to a failure result
-     *                 otherwise.
-     * @return         A Calendar if created successfully. NULL otherwise.
-     * @stable ICU 2.0
-     */
-    static Calendar* U_EXPORT2 createInstance(const Locale& aLocale, UErrorCode& success);
-
-    /**
-     * Creates a Calendar using the given timezone and given locale.
-     * The Calendar takes ownership of zoneToAdopt; the
-     * client must not delete it.
-     *
-     * @param zoneToAdopt  The given timezone to be adopted.
-     * @param aLocale      The given locale.
-     * @param success      Indicates the success/failure of Calendar creation. Filled in
-     *                     with U_ZERO_ERROR if created successfully, set to a failure result
-     *                     otherwise.
-     * @return             A Calendar if created successfully. NULL otherwise.
-     * @stable ICU 2.0
-     */
-    static Calendar* U_EXPORT2 createInstance(TimeZone* zoneToAdopt, const Locale& aLocale, UErrorCode& success);
-
-    /**
-     * Gets a Calendar using the given timezone and given locale.  The TimeZone
-     * is _not_ adopted; the client is still responsible for deleting it.
-     *
-     * @param zoneToAdopt  The given timezone to be adopted.
-     * @param aLocale      The given locale.
-     * @param success      Indicates the success/failure of Calendar creation. Filled in
-     *                     with U_ZERO_ERROR if created successfully, set to a failure result
-     *                     otherwise.
-     * @return             A Calendar if created successfully. NULL otherwise.
-     * @stable ICU 2.0
-     */
-    static Calendar* U_EXPORT2 createInstance(const TimeZone& zoneToAdopt, const Locale& aLocale, UErrorCode& success);
-
-    /**
-     * Returns a list of the locales for which Calendars are installed.
-     *
-     * @param count  Number of locales returned.
-     * @return       An array of Locale objects representing the set of locales for which
-     *               Calendars are installed.  The system retains ownership of this list;
-     *               the caller must NOT delete it. Does not include user-registered Calendars.
-     * @stable ICU 2.0
-     */
-    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
-
-    /**
-     * Returns the current UTC (GMT) time measured in milliseconds since 0:00:00 on 1/1/70
-     * (derived from the system time).
-     *
-     * @return   The current UTC time in milliseconds.
-     * @stable ICU 2.0
-     */
-    static UDate U_EXPORT2 getNow(void);
-
-    /**
-     * Gets this Calendar's time as milliseconds. May involve recalculation of time due
-     * to previous calls to set time field values. The time specified is non-local UTC
-     * (GMT) time. Although this method is const, this object may actually be changed
-     * (semantically const).
-     *
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @return        The current time in UTC (GMT) time, or zero if the operation
-     *                failed.
-     * @stable ICU 2.0
-     */
-    inline UDate getTime(UErrorCode& status) const { return getTimeInMillis(status); }
-
-    /**
-     * Sets this Calendar's current time with the given UDate. The time specified should
-     * be in non-local UTC (GMT) time.
-     *
-     * @param date  The given UDate in UTC (GMT) time.
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @stable ICU 2.0
-     */
-    inline void setTime(UDate date, UErrorCode& status) { setTimeInMillis(date, status); }
-
-    /**
-     * Compares the equality of two Calendar objects. Objects of different subclasses
-     * are considered unequal. This comparison is very exacting; two Calendar objects
-     * must be in exactly the same state to be considered equal. To compare based on the
-     * represented time, use equals() instead.
-     *
-     * @param that  The Calendar object to be compared with.
-     * @return      True if the given Calendar is the same as this Calendar; false
-     *              otherwise.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const Calendar& that) const;
-
-    /**
-     * Compares the inequality of two Calendar objects.
-     *
-     * @param that  The Calendar object to be compared with.
-     * @return      True if the given Calendar is not the same as this Calendar; false
-     *              otherwise.
-     * @stable ICU 2.0
-     */
-    UBool operator!=(const Calendar& that) const {return !operator==(that);}
-
-    /**
-     * Returns TRUE if the given Calendar object is equivalent to this
-     * one.  An equivalent Calendar will behave exactly as this one
-     * does, but it may be set to a different time.  By contrast, for
-     * the operator==() method to return TRUE, the other Calendar must
-     * be set to the same time.
-     *
-     * @param other the Calendar to be compared with this Calendar
-     * @stable ICU 2.4
-     */
-    virtual UBool isEquivalentTo(const Calendar& other) const;
-
-    /**
-     * Compares the Calendar time, whereas Calendar::operator== compares the equality of
-     * Calendar objects.
-     *
-     * @param when    The Calendar to be compared with this Calendar. Although this is a
-     *                const parameter, the object may be modified physically
-     *                (semantically const).
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @return        True if the current time of this Calendar is equal to the time of
-     *                Calendar when; false otherwise.
-     * @stable ICU 2.0
-     */
-    UBool equals(const Calendar& when, UErrorCode& status) const;
-
-    /**
-     * Returns true if this Calendar's current time is before "when"'s current time.
-     *
-     * @param when    The Calendar to be compared with this Calendar. Although this is a
-     *                const parameter, the object may be modified physically
-     *                (semantically const).
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @return        True if the current time of this Calendar is before the time of
-     *                Calendar when; false otherwise.
-     * @stable ICU 2.0
-     */
-    UBool before(const Calendar& when, UErrorCode& status) const;
-
-    /**
-     * Returns true if this Calendar's current time is after "when"'s current time.
-     *
-     * @param when    The Calendar to be compared with this Calendar. Although this is a
-     *                const parameter, the object may be modified physically
-     *                (semantically const).
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @return        True if the current time of this Calendar is after the time of
-     *                Calendar when; false otherwise.
-     * @stable ICU 2.0
-     */
-    UBool after(const Calendar& when, UErrorCode& status) const;
-
-    /**
-     * UDate Arithmetic function. Adds the specified (signed) amount of time to the given
-     * time field, based on the calendar's rules. For example, to subtract 5 days from
-     * the current time of the calendar, call add(Calendar::DATE, -5). When adding on
-     * the month or Calendar::MONTH field, other fields like date might conflict and
-     * need to be changed. For instance, adding 1 month on the date 01/31/96 will result
-     * in 02/29/96.
-     *
-     * @param field   Specifies which date field to modify.
-     * @param amount  The amount of time to be added to the field, in the natural unit
-     *                for that field (e.g., days for the day fields, hours for the hour
-     *                field.)
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @deprecated ICU 2.6. use add(UCalendarDateFields field, int32_t amount, UErrorCode& status) instead.
-     */
-    virtual void add(EDateFields field, int32_t amount, UErrorCode& status);
-
-    /**
-     * UDate Arithmetic function. Adds the specified (signed) amount of time to the given
-     * time field, based on the calendar's rules. For example, to subtract 5 days from
-     * the current time of the calendar, call add(Calendar::DATE, -5). When adding on
-     * the month or Calendar::MONTH field, other fields like date might conflict and
-     * need to be changed. For instance, adding 1 month on the date 01/31/96 will result
-     * in 02/29/96.
-     *
-     * @param field   Specifies which date field to modify.
-     * @param amount  The amount of time to be added to the field, in the natural unit
-     *                for that field (e.g., days for the day fields, hours for the hour
-     *                field.)
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @stable ICU 2.6.
-     */
-    virtual void add(UCalendarDateFields field, int32_t amount, UErrorCode& status);
-
-    /**
-     * Time Field Rolling function. Rolls (up/down) a single unit of time on the given
-     * time field. For example, to roll the current date up by one day, call
-     * roll(Calendar::DATE, true). When rolling on the year or Calendar::YEAR field, it
-     * will roll the year value in the range between getMinimum(Calendar::YEAR) and the
-     * value returned by getMaximum(Calendar::YEAR). When rolling on the month or
-     * Calendar::MONTH field, other fields like date might conflict and, need to be
-     * changed. For instance, rolling the month up on the date 01/31/96 will result in
-     * 02/29/96. Rolling up always means rolling forward in time; e.g., rolling the year
-     * up on "100 BC" will result in "99 BC", for Gregorian calendar. When rolling on the
-     * hour-in-day or Calendar::HOUR_OF_DAY field, it will roll the hour value in the range
-     * between 0 and 23, which is zero-based.
-     * <P>
-     * NOTE: Do not use this method -- use roll(EDateFields, int, UErrorCode&) instead.
-     *
-     * @param field   The time field.
-     * @param up      Indicates if the value of the specified time field is to be rolled
-     *                up or rolled down. Use true if rolling up, false otherwise.
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @deprecated ICU 2.6. Use roll(UCalendarDateFields field, UBool up, UErrorCode& status) instead.
-     */
-    inline void roll(EDateFields field, UBool up, UErrorCode& status);
-
-    /**
-     * Time Field Rolling function. Rolls (up/down) a single unit of time on the given
-     * time field. For example, to roll the current date up by one day, call
-     * roll(Calendar::DATE, true). When rolling on the year or Calendar::YEAR field, it
-     * will roll the year value in the range between getMinimum(Calendar::YEAR) and the
-     * value returned by getMaximum(Calendar::YEAR). When rolling on the month or
-     * Calendar::MONTH field, other fields like date might conflict and, need to be
-     * changed. For instance, rolling the month up on the date 01/31/96 will result in
-     * 02/29/96. Rolling up always means rolling forward in time; e.g., rolling the year
-     * up on "100 BC" will result in "99 BC", for Gregorian calendar. When rolling on the
-     * hour-in-day or Calendar::HOUR_OF_DAY field, it will roll the hour value in the range
-     * between 0 and 23, which is zero-based.
-     * <P>
-     * NOTE: Do not use this method -- use roll(UCalendarDateFields, int, UErrorCode&) instead.
-     *
-     * @param field   The time field.
-     * @param up      Indicates if the value of the specified time field is to be rolled
-     *                up or rolled down. Use true if rolling up, false otherwise.
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @stable ICU 2.6.
-     */
-    inline void roll(UCalendarDateFields field, UBool up, UErrorCode& status);
-
-    /**
-     * Time Field Rolling function. Rolls by the given amount on the given
-     * time field. For example, to roll the current date up by one day, call
-     * roll(Calendar::DATE, +1, status). When rolling on the month or
-     * Calendar::MONTH field, other fields like date might conflict and, need to be
-     * changed. For instance, rolling the month up on the date 01/31/96 will result in
-     * 02/29/96.  Rolling by a positive value always means rolling forward in time;
-     * e.g., rolling the year by +1 on "100 BC" will result in "99 BC", for Gregorian
-     * calendar. When rolling on the hour-in-day or Calendar::HOUR_OF_DAY field, it will
-     * roll the hour value in the range between 0 and 23, which is zero-based.
-     * <P>
-     * The only difference between roll() and add() is that roll() does not change
-     * the value of more significant fields when it reaches the minimum or maximum
-     * of its range, whereas add() does.
-     *
-     * @param field   The time field.
-     * @param amount  Indicates amount to roll.
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid, this will be set to
-     *                an error status.
-     * @deprecated ICU 2.6. Use roll(UCalendarDateFields field, int32_t amount, UErrorCode& status) instead.
-     */
-    virtual void roll(EDateFields field, int32_t amount, UErrorCode& status);
-
-    /**
-     * Time Field Rolling function. Rolls by the given amount on the given
-     * time field. For example, to roll the current date up by one day, call
-     * roll(Calendar::DATE, +1, status). When rolling on the month or
-     * Calendar::MONTH field, other fields like date might conflict and, need to be
-     * changed. For instance, rolling the month up on the date 01/31/96 will result in
-     * 02/29/96.  Rolling by a positive value always means rolling forward in time;
-     * e.g., rolling the year by +1 on "100 BC" will result in "99 BC", for Gregorian
-     * calendar. When rolling on the hour-in-day or Calendar::HOUR_OF_DAY field, it will
-     * roll the hour value in the range between 0 and 23, which is zero-based.
-     * <P>
-     * The only difference between roll() and add() is that roll() does not change
-     * the value of more significant fields when it reaches the minimum or maximum
-     * of its range, whereas add() does.
-     *
-     * @param field   The time field.
-     * @param amount  Indicates amount to roll.
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid, this will be set to
-     *                an error status.
-     * @stable ICU 2.6.
-     */
-    virtual void roll(UCalendarDateFields field, int32_t amount, UErrorCode& status);
-
-    /**
-     * Return the difference between the given time and the time this
-     * calendar object is set to.  If this calendar is set
-     * <em>before</em> the given time, the returned value will be
-     * positive.  If this calendar is set <em>after</em> the given
-     * time, the returned value will be negative.  The
-     * <code>field</code> parameter specifies the units of the return
-     * value.  For example, if <code>fieldDifference(when,
-     * Calendar::MONTH)</code> returns 3, then this calendar is set to
-     * 3 months before <code>when</code>, and possibly some addition
-     * time less than one month.
-     *
-     * <p>As a side effect of this call, this calendar is advanced
-     * toward <code>when</code> by the given amount.  That is, calling
-     * this method has the side effect of calling <code>add(field,
-     * n)</code>, where <code>n</code> is the return value.
-     *
-     * <p>Usage: To use this method, call it first with the largest
-     * field of interest, then with progressively smaller fields.  For
-     * example:
-     *
-     * <pre>
-     * int y = cal->fieldDifference(when, Calendar::YEAR, err);
-     * int m = cal->fieldDifference(when, Calendar::MONTH, err);
-     * int d = cal->fieldDifference(when, Calendar::DATE, err);</pre>
-     *
-     * computes the difference between <code>cal</code> and
-     * <code>when</code> in years, months, and days.
-     *
-     * <p>Note: <code>fieldDifference()</code> is
-     * <em>asymmetrical</em>.  That is, in the following code:
-     *
-     * <pre>
-     * cal->setTime(date1, err);
-     * int m1 = cal->fieldDifference(date2, Calendar::MONTH, err);
-     * int d1 = cal->fieldDifference(date2, Calendar::DATE, err);
-     * cal->setTime(date2, err);
-     * int m2 = cal->fieldDifference(date1, Calendar::MONTH, err);
-     * int d2 = cal->fieldDifference(date1, Calendar::DATE, err);</pre>
-     *
-     * one might expect that <code>m1 == -m2 && d1 == -d2</code>.
-     * However, this is not generally the case, because of
-     * irregularities in the underlying calendar system (e.g., the
-     * Gregorian calendar has a varying number of days per month).
-     *
-     * @param when the date to compare this calendar's time to
-     * @param field the field in which to compute the result
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid, this will be set to
-     *                an error status.
-     * @return the difference, either positive or negative, between
-     * this calendar's time and <code>when</code>, in terms of
-     * <code>field</code>.
-     * @deprecated ICU 2.6. Use fieldDifference(UDate when, UCalendarDateFields field, UErrorCode& status).
-     */
-    virtual int32_t fieldDifference(UDate when, EDateFields field, UErrorCode& status);
-
-    /**
-     * Return the difference between the given time and the time this
-     * calendar object is set to.  If this calendar is set
-     * <em>before</em> the given time, the returned value will be
-     * positive.  If this calendar is set <em>after</em> the given
-     * time, the returned value will be negative.  The
-     * <code>field</code> parameter specifies the units of the return
-     * value.  For example, if <code>fieldDifference(when,
-     * Calendar::MONTH)</code> returns 3, then this calendar is set to
-     * 3 months before <code>when</code>, and possibly some addition
-     * time less than one month.
-     *
-     * <p>As a side effect of this call, this calendar is advanced
-     * toward <code>when</code> by the given amount.  That is, calling
-     * this method has the side effect of calling <code>add(field,
-     * n)</code>, where <code>n</code> is the return value.
-     *
-     * <p>Usage: To use this method, call it first with the largest
-     * field of interest, then with progressively smaller fields.  For
-     * example:
-     *
-     * <pre>
-     * int y = cal->fieldDifference(when, Calendar::YEAR, err);
-     * int m = cal->fieldDifference(when, Calendar::MONTH, err);
-     * int d = cal->fieldDifference(when, Calendar::DATE, err);</pre>
-     *
-     * computes the difference between <code>cal</code> and
-     * <code>when</code> in years, months, and days.
-     *
-     * <p>Note: <code>fieldDifference()</code> is
-     * <em>asymmetrical</em>.  That is, in the following code:
-     *
-     * <pre>
-     * cal->setTime(date1, err);
-     * int m1 = cal->fieldDifference(date2, Calendar::MONTH, err);
-     * int d1 = cal->fieldDifference(date2, Calendar::DATE, err);
-     * cal->setTime(date2, err);
-     * int m2 = cal->fieldDifference(date1, Calendar::MONTH, err);
-     * int d2 = cal->fieldDifference(date1, Calendar::DATE, err);</pre>
-     *
-     * one might expect that <code>m1 == -m2 && d1 == -d2</code>.
-     * However, this is not generally the case, because of
-     * irregularities in the underlying calendar system (e.g., the
-     * Gregorian calendar has a varying number of days per month).
-     *
-     * @param when the date to compare this calendar's time to
-     * @param field the field in which to compute the result
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid, this will be set to
-     *                an error status.
-     * @return the difference, either positive or negative, between
-     * this calendar's time and <code>when</code>, in terms of
-     * <code>field</code>.
-     * @stable ICU 2.6.
-     */
-    virtual int32_t fieldDifference(UDate when, UCalendarDateFields field, UErrorCode& status);
-
-    /**
-     * Sets the calendar's time zone to be the one passed in. The Calendar takes ownership
-     * of the TimeZone; the caller is no longer responsible for deleting it.  If the
-     * given time zone is NULL, this function has no effect.
-     *
-     * @param value  The given time zone.
-     * @stable ICU 2.0
-     */
-    void adoptTimeZone(TimeZone* value);
-
-    /**
-     * Sets the calendar's time zone to be the same as the one passed in. The TimeZone
-     * passed in is _not_ adopted; the client is still responsible for deleting it.
-     *
-     * @param zone  The given time zone.
-     * @stable ICU 2.0
-     */
-    void setTimeZone(const TimeZone& zone);
-
-    /**
-     * Returns a reference to the time zone owned by this calendar. The returned reference
-     * is only valid until clients make another call to adoptTimeZone or setTimeZone,
-     * or this Calendar is destroyed.
-     *
-     * @return   The time zone object associated with this calendar.
-     * @stable ICU 2.0
-     */
-    const TimeZone& getTimeZone(void) const;
-
-    /**
-     * Returns the time zone owned by this calendar. The caller owns the returned object
-     * and must delete it when done.  After this call, the new time zone associated
-     * with this Calendar is the default TimeZone as returned by TimeZone::createDefault().
-     *
-     * @return   The time zone object which was associated with this calendar.
-     * @stable ICU 2.0
-     */
-    TimeZone* orphanTimeZone(void);
-
-    /**
-     * Queries if the current date for this Calendar is in Daylight Savings Time.
-     *
-     * @param status Fill-in parameter which receives the status of this operation.
-     * @return   True if the current date for this Calendar is in Daylight Savings Time,
-     *           false, otherwise.
-     * @stable ICU 2.0
-     */
-    virtual UBool inDaylightTime(UErrorCode& status) const = 0;
-
-    /**
-     * Specifies whether or not date/time interpretation is to be lenient. With lenient
-     * interpretation, a date such as "February 942, 1996" will be treated as being
-     * equivalent to the 941st day after February 1, 1996. With strict interpretation,
-     * such dates will cause an error when computing time from the time field values
-     * representing the dates.
-     *
-     * @param lenient  True specifies date/time interpretation to be lenient.
-     *
-     * @see            DateFormat#setLenient
-     * @stable ICU 2.0
-     */
-    void setLenient(UBool lenient);
-
-    /**
-     * Tells whether date/time interpretation is to be lenient.
-     *
-     * @return   True tells that date/time interpretation is to be lenient.
-     * @stable ICU 2.0
-     */
-    UBool isLenient(void) const;
-
-    /**
-     * Sets what the first day of the week is; e.g., Sunday in US, Monday in France.
-     *
-     * @param value  The given first day of the week.
-     * @deprecated ICU 2.6. Use setFirstDayOfWeek(UCalendarDaysOfWeek value) instead.
-     */
-    void setFirstDayOfWeek(EDaysOfWeek value);
-
-    /**
-     * Sets what the first day of the week is; e.g., Sunday in US, Monday in France.
-     *
-     * @param value  The given first day of the week.
-     * @stable ICU 2.6.
-     */
-    void setFirstDayOfWeek(UCalendarDaysOfWeek value);
-
-    /**
-     * Gets what the first day of the week is; e.g., Sunday in US, Monday in France.
-     *
-     * @return   The first day of the week.
-     * @deprecated ICU 2.6 use the overload with error code
-     */
-    EDaysOfWeek getFirstDayOfWeek(void) const;
-
-    /**
-     * Gets what the first day of the week is; e.g., Sunday in US, Monday in France.
-     *
-     * @param status error code
-     * @return   The first day of the week.
-     * @stable ICU 2.6
-     */
-    UCalendarDaysOfWeek getFirstDayOfWeek(UErrorCode &status) const;
-
-    /**
-     * Sets what the minimal days required in the first week of the year are; For
-     * example, if the first week is defined as one that contains the first day of the
-     * first month of a year, call the method with value 1. If it must be a full week,
-     * use value 7.
-     *
-     * @param value  The given minimal days required in the first week of the year.
-     * @stable ICU 2.0
-     */
-    void setMinimalDaysInFirstWeek(uint8_t value);
-
-    /**
-     * Gets what the minimal days required in the first week of the year are; e.g., if
-     * the first week is defined as one that contains the first day of the first month
-     * of a year, getMinimalDaysInFirstWeek returns 1. If the minimal days required must
-     * be a full week, getMinimalDaysInFirstWeek returns 7.
-     *
-     * @return   The minimal days required in the first week of the year.
-     * @stable ICU 2.0
-     */
-    uint8_t getMinimalDaysInFirstWeek(void) const;
-
-    /**
-     * Gets the minimum value for the given time field. e.g., for Gregorian
-     * DAY_OF_MONTH, 1.
-     *
-     * @param field  The given time field.
-     * @return       The minimum value for the given time field.
-     * @deprecated ICU 2.6. Use getMinimum(UCalendarDateFields field) instead.
-     */
-    virtual int32_t getMinimum(EDateFields field) const;
-
-    /**
-     * Gets the minimum value for the given time field. e.g., for Gregorian
-     * DAY_OF_MONTH, 1.
-     *
-     * @param field  The given time field.
-     * @return       The minimum value for the given time field.
-     * @stable ICU 2.6.
-     */
-    virtual int32_t getMinimum(UCalendarDateFields field) const;
-
-    /**
-     * Gets the maximum value for the given time field. e.g. for Gregorian DAY_OF_MONTH,
-     * 31.
-     *
-     * @param field  The given time field.
-     * @return       The maximum value for the given time field.
-     * @deprecated ICU 2.6. Use getMaximum(UCalendarDateFields field) instead.
-     */
-    virtual int32_t getMaximum(EDateFields field) const;
-
-    /**
-     * Gets the maximum value for the given time field. e.g. for Gregorian DAY_OF_MONTH,
-     * 31.
-     *
-     * @param field  The given time field.
-     * @return       The maximum value for the given time field.
-     * @stable ICU 2.6.
-     */
-    virtual int32_t getMaximum(UCalendarDateFields field) const;
-
-    /**
-     * Gets the highest minimum value for the given field if varies. Otherwise same as
-     * getMinimum(). For Gregorian, no difference.
-     *
-     * @param field  The given time field.
-     * @return       The highest minimum value for the given time field.
-     * @deprecated ICU 2.6. Use getGreatestMinimum(UCalendarDateFields field) instead.
-     */
-    virtual int32_t getGreatestMinimum(EDateFields field) const;
-
-    /**
-     * Gets the highest minimum value for the given field if varies. Otherwise same as
-     * getMinimum(). For Gregorian, no difference.
-     *
-     * @param field  The given time field.
-     * @return       The highest minimum value for the given time field.
-     * @stable ICU 2.6.
-     */
-    virtual int32_t getGreatestMinimum(UCalendarDateFields field) const;
-
-    /**
-     * Gets the lowest maximum value for the given field if varies. Otherwise same as
-     * getMaximum(). e.g., for Gregorian DAY_OF_MONTH, 28.
-     *
-     * @param field  The given time field.
-     * @return       The lowest maximum value for the given time field.
-     * @deprecated ICU 2.6. Use getLeastMaximum(UCalendarDateFields field) instead.
-     */
-    virtual int32_t getLeastMaximum(EDateFields field) const;
-
-    /**
-     * Gets the lowest maximum value for the given field if varies. Otherwise same as
-     * getMaximum(). e.g., for Gregorian DAY_OF_MONTH, 28.
-     *
-     * @param field  The given time field.
-     * @return       The lowest maximum value for the given time field.
-     * @stable ICU 2.6.
-     */
-    virtual int32_t getLeastMaximum(UCalendarDateFields field) const;
-
-    /**
-     * Return the minimum value that this field could have, given the current date.
-     * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum().
-     *
-     * The version of this function on Calendar uses an iterative algorithm to determine the
-     * actual minimum value for the field.  There is almost always a more efficient way to
-     * accomplish this (in most cases, you can simply return getMinimum()).  GregorianCalendar
-     * overrides this function with a more efficient implementation.
-     *
-     * @param field    the field to determine the minimum of
-     * @param status   Fill-in parameter which receives the status of this operation.
-     * @return         the minimum of the given field for the current date of this Calendar
-     * @deprecated ICU 2.6. Use getActualMinimum(UCalendarDateFields field, UErrorCode& status) instead.
-     */
-    int32_t getActualMinimum(EDateFields field, UErrorCode& status) const;
-
-    /**
-     * Return the minimum value that this field could have, given the current date.
-     * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum().
-     *
-     * The version of this function on Calendar uses an iterative algorithm to determine the
-     * actual minimum value for the field.  There is almost always a more efficient way to
-     * accomplish this (in most cases, you can simply return getMinimum()).  GregorianCalendar
-     * overrides this function with a more efficient implementation.
-     *
-     * @param field    the field to determine the minimum of
-     * @param status   Fill-in parameter which receives the status of this operation.
-     * @return         the minimum of the given field for the current date of this Calendar
-     * @stable ICU 2.6.
-     */
-    virtual int32_t getActualMinimum(UCalendarDateFields field, UErrorCode& status) const;
-
-    /**
-     * Return the maximum value that this field could have, given the current date.
-     * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual
-     * maximum would be 28; for "Feb 3, 1996" it s 29.  Similarly for a Hebrew calendar,
-     * for some years the actual maximum for MONTH is 12, and for others 13.
-     *
-     * The version of this function on Calendar uses an iterative algorithm to determine the
-     * actual maximum value for the field.  There is almost always a more efficient way to
-     * accomplish this (in most cases, you can simply return getMaximum()).  GregorianCalendar
-     * overrides this function with a more efficient implementation.
-     *
-     * @param field    the field to determine the maximum of
-     * @param status   Fill-in parameter which receives the status of this operation.
-     * @return         the maximum of the given field for the current date of this Calendar
-     * @deprecated ICU 2.6. Use getActualMaximum(UCalendarDateFields field, UErrorCode& status) instead.
-     */
-    int32_t getActualMaximum(EDateFields field, UErrorCode& status) const;
-
-    /**
-     * Return the maximum value that this field could have, given the current date.
-     * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual
-     * maximum would be 28; for "Feb 3, 1996" it s 29.  Similarly for a Hebrew calendar,
-     * for some years the actual maximum for MONTH is 12, and for others 13.
-     *
-     * The version of this function on Calendar uses an iterative algorithm to determine the
-     * actual maximum value for the field.  There is almost always a more efficient way to
-     * accomplish this (in most cases, you can simply return getMaximum()).  GregorianCalendar
-     * overrides this function with a more efficient implementation.
-     *
-     * @param field    the field to determine the maximum of
-     * @param status   Fill-in parameter which receives the status of this operation.
-     * @return         the maximum of the given field for the current date of this Calendar
-     * @stable ICU 2.6.
-     */
-    virtual int32_t getActualMaximum(UCalendarDateFields field, UErrorCode& status) const;
-
-    /**
-     * Gets the value for a given time field. Recalculate the current time field values
-     * if the time value has been changed by a call to setTime(). Return zero for unset
-     * fields if any fields have been explicitly set by a call to set(). To force a
-     * recomputation of all fields regardless of the previous state, call complete().
-     * This method is semantically const, but may alter the object in memory.
-     *
-     * @param field  The given time field.
-     * @param status Fill-in parameter which receives the status of the operation.
-     * @return       The value for the given time field, or zero if the field is unset,
-     *               and set() has been called for any other field.
-     * @deprecated ICU 2.6. Use get(UCalendarDateFields field, UErrorCode& status) instead.
-     */
-    int32_t get(EDateFields field, UErrorCode& status) const;
-
-    /**
-     * Gets the value for a given time field. Recalculate the current time field values
-     * if the time value has been changed by a call to setTime(). Return zero for unset
-     * fields if any fields have been explicitly set by a call to set(). To force a
-     * recomputation of all fields regardless of the previous state, call complete().
-     * This method is semantically const, but may alter the object in memory.
-     *
-     * @param field  The given time field.
-     * @param status Fill-in parameter which receives the status of the operation.
-     * @return       The value for the given time field, or zero if the field is unset,
-     *               and set() has been called for any other field.
-     * @stable ICU 2.6.
-     */
-    int32_t get(UCalendarDateFields field, UErrorCode& status) const;
-
-    /**
-     * Determines if the given time field has a value set. This can affect in the
-     * resolving of time in Calendar. Unset fields have a value of zero, by definition.
-     *
-     * @param field  The given time field.
-     * @return   True if the given time field has a value set; false otherwise.
-     * @deprecated ICU 2.6. Use isSet(UCalendarDateFields field) instead.
-     */
-    UBool isSet(EDateFields field) const;
-
-    /**
-     * Determines if the given time field has a value set. This can affect in the
-     * resolving of time in Calendar. Unset fields have a value of zero, by definition.
-     *
-     * @param field  The given time field.
-     * @return   True if the given time field has a value set; false otherwise.
-     * @stable ICU 2.6.
-     */
-    UBool isSet(UCalendarDateFields field) const;
-
-    /**
-     * Sets the given time field with the given value.
-     *
-     * @param field  The given time field.
-     * @param value  The value to be set for the given time field.
-     * @deprecated ICU 2.6. Use set(UCalendarDateFields field, int32_t value) instead.
-     */
-    void set(EDateFields field, int32_t value);
-
-    /**
-     * Sets the given time field with the given value.
-     *
-     * @param field  The given time field.
-     * @param value  The value to be set for the given time field.
-     * @stable ICU 2.6.
-     */
-    void set(UCalendarDateFields field, int32_t value);
-
-    /**
-     * Sets the values for the fields YEAR, MONTH, and DATE. Other field values are
-     * retained; call clear() first if this is not desired.
-     *
-     * @param year   The value used to set the YEAR time field.
-     * @param month  The value used to set the MONTH time field. Month value is 0-based.
-     *               e.g., 0 for January.
-     * @param date   The value used to set the DATE time field.
-     * @stable ICU 2.0
-     */
-    void set(int32_t year, int32_t month, int32_t date);
-
-    /**
-     * Sets the values for the fields YEAR, MONTH, DATE, HOUR_OF_DAY, and MINUTE. Other
-     * field values are retained; call clear() first if this is not desired.
-     *
-     * @param year    The value used to set the YEAR time field.
-     * @param month   The value used to set the MONTH time field. Month value is
-     *                0-based. E.g., 0 for January.
-     * @param date    The value used to set the DATE time field.
-     * @param hour    The value used to set the HOUR_OF_DAY time field.
-     * @param minute  The value used to set the MINUTE time field.
-     * @stable ICU 2.0
-     */
-    void set(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute);
-
-    /**
-     * Sets the values for the fields YEAR, MONTH, DATE, HOUR_OF_DAY, MINUTE, and SECOND.
-     * Other field values are retained; call clear() first if this is not desired.
-     *
-     * @param year    The value used to set the YEAR time field.
-     * @param month   The value used to set the MONTH time field. Month value is
-     *                0-based. E.g., 0 for January.
-     * @param date    The value used to set the DATE time field.
-     * @param hour    The value used to set the HOUR_OF_DAY time field.
-     * @param minute  The value used to set the MINUTE time field.
-     * @param second  The value used to set the SECOND time field.
-     * @stable ICU 2.0
-     */
-    void set(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute, int32_t second);
-
-    /**
-     * Clears the values of all the time fields, making them both unset and assigning
-     * them a value of zero. The field values will be determined during the next
-     * resolving of time into time fields.
-     * @stable ICU 2.0
-     */
-    void clear(void);
-
-    /**
-     * Clears the value in the given time field, both making it unset and assigning it a
-     * value of zero. This field value will be determined during the next resolving of
-     * time into time fields.
-     *
-     * @param field  The time field to be cleared.
-     * @deprecated ICU 2.6. Use clear(UCalendarDateFields field) instead.
-     */
-    void clear(EDateFields field);
-
-    /**
-     * Clears the value in the given time field, both making it unset and assigning it a
-     * value of zero. This field value will be determined during the next resolving of
-     * time into time fields.
-     *
-     * @param field  The time field to be cleared.
-     * @stable ICU 2.6.
-     */
-    void clear(UCalendarDateFields field);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual method. This method is to
-     * implement a simple version of RTTI, since not all C++ compilers support genuine
-     * RTTI. Polymorphic operator==() and clone() methods call this method.
-     * <P>
-     * Concrete subclasses of Calendar must implement getDynamicClassID() and also a
-     * static method and data member:
-     *
-     *      static UClassID getStaticClassID() { return (UClassID)&amp;fgClassID; }
-     *      static char fgClassID;
-     *
-     * @return   The class ID for this object. All objects of a given class have the
-     *           same class ID. Objects of other classes have different class IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const = 0;
-
-    /**
-     * Returns the resource key string used for this calendar type.
-     * For example, prepending "Eras_" to this string could return "Eras_japanese"
-     * or "Eras_gregorian".
-     *
-     * @returns static string, for example, "gregorian" or "japanese"
-     * @internal
-     */
-    virtual const char * getType() const = 0;
-
-protected:
-
-     /**
-      * Constructs a Calendar with the default time zone as returned by
-      * TimeZone::createInstance(), and the default locale.
-      *
-      * @param success  Indicates the status of Calendar object construction. Returns
-      *                 U_ZERO_ERROR if constructed successfully.
-     * @stable ICU 2.0
-      */
-    Calendar(UErrorCode& success);
-
-    /**
-     * Copy constructor
-     *
-     * @param source    Calendar object to be copied from
-     * @stable ICU 2.0
-     */
-    Calendar(const Calendar& source);
-
-    /**
-     * Default assignment operator
-     *
-     * @param right    Calendar object to be copied
-     * @stable ICU 2.0
-     */
-    Calendar& operator=(const Calendar& right);
-
-    /**
-     * Constructs a Calendar with the given time zone and locale. Clients are no longer
-     * responsible for deleting the given time zone object after it's adopted.
-     *
-     * @param zone     The given time zone.
-     * @param aLocale  The given locale.
-     * @param success  Indicates the status of Calendar object construction. Returns
-     *                 U_ZERO_ERROR if constructed successfully.
-     * @stable ICU 2.0
-     */
-    Calendar(TimeZone* zone, const Locale& aLocale, UErrorCode& success);
-
-    /**
-     * Constructs a Calendar with the given time zone and locale.
-     *
-     * @param zone     The given time zone.
-     * @param aLocale  The given locale.
-     * @param success  Indicates the status of Calendar object construction. Returns
-     *                 U_ZERO_ERROR if constructed successfully.
-     * @stable ICU 2.0
-     */
-    Calendar(const TimeZone& zone, const Locale& aLocale, UErrorCode& success);
-
-    /**
-     * Converts Calendar's time field values to GMT as milliseconds.
-     *
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @stable ICU 2.0
-     */
-    virtual void computeTime(UErrorCode& status);
-
-    /**
-     * Converts GMT as milliseconds to time field values. This allows you to sync up the
-     * time field values with a new time that is set for the calendar.  This method
-     * does NOT recompute the time first; to recompute the time, then the fields, use
-     * the method complete().
-     *
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @stable ICU 2.0
-     */
-    virtual void computeFields(UErrorCode& status);
-
-    /**
-     * Gets this Calendar's current time as a long.
-     *
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @return the current time as UTC milliseconds from the epoch.
-     * @stable ICU 2.0
-     */
-    double getTimeInMillis(UErrorCode& status) const;
-
-    /**
-     * Sets this Calendar's current time from the given long value.
-     * @param millis  the new time in UTC milliseconds from the epoch.
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @stable ICU 2.0
-     */
-    void setTimeInMillis( double millis, UErrorCode& status );
-
-    /**
-     * Recomputes the current time from currently set fields, and then fills in any
-     * unset fields in the time field list.
-     *
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     * @stable ICU 2.0
-     */
-    void complete(UErrorCode& status);
-
-    /**
-     * Gets the value for a given time field. Subclasses can use this function to get
-     * field values without forcing recomputation of time.
-     *
-     * @param field  The given time field.
-     * @return       The value for the given time field.
-     * @deprecated ICU 2.6. Use internalGet(UCalendarDateFields field) instead.
-     */
-    inline int32_t internalGet(EDateFields field) const {return fFields[field];}
-
-    /**
-     * Gets the value for a given time field. Subclasses can use this function to get
-     * field values without forcing recomputation of time. If the field's stamp is UNSET,
-     * the defaultValue is used.
-     *
-     * @param field  The given time field.
-     * @param defaultValue a default value used if the field is unset.
-     * @return       The value for the given time field.
-     * @internal
-     */
-    inline int32_t internalGet(UCalendarDateFields field, int32_t defaultValue) const {return fStamp[field]>kUnset ? fFields[field] : defaultValue;}
-
-    /**
-     * Gets the value for a given time field. Subclasses can use this function to get
-     * field values without forcing recomputation of time.
-     *
-     * @param field  The given time field.
-     * @return       The value for the given time field.
-     * @internal
-     */
-    inline int32_t internalGet(UCalendarDateFields field) const {return fFields[field];}
-
-    /**
-     * Sets the value for a given time field.  This is a fast internal method for
-     * subclasses.  It does not affect the areFieldsInSync, isTimeSet, or areAllFieldsSet
-     * flags.
-     *
-     * @param field    The given time field.
-     * @param value    The value for the given time field.
-     * @deprecated ICU 2.6. Use internalSet(UCalendarDateFields field, int32_t value) instead.
-     */
-    void internalSet(EDateFields field, int32_t value);
-
-    /**
-     * Sets the value for a given time field.  This is a fast internal method for
-     * subclasses.  It does not affect the areFieldsInSync, isTimeSet, or areAllFieldsSet
-     * flags.
-     *
-     * @param field    The given time field.
-     * @param value    The value for the given time field.
-     * @stable ICU 2.6.
-     */
-    inline void internalSet(UCalendarDateFields field, int32_t value);
-
-    /**
-     * Prepare this calendar for computing the actual minimum or maximum.
-     * This method modifies this calendar's fields; it is called on a
-     * temporary calendar.
-     * @internal
-     */
-    virtual void prepareGetActual(UCalendarDateFields field, UBool isMinimum, UErrorCode &status);
-
-    /**
-     * Limit enums. Not in sync with UCalendarLimitType (refers to internal fields).
-     * @internal
-     */
-    enum ELimitType {
-      UCAL_LIMIT_MINIMUM = 0,
-      UCAL_LIMIT_GREATEST_MINIMUM,
-      UCAL_LIMIT_LEAST_MAXIMUM,
-      UCAL_LIMIT_MAXIMUM,
-      UCAL_LIMIT_COUNT
-    };
-
-    /**
-     * Subclass API for defining limits of different types.
-     * Subclasses must implement this method to return limits for the
-     * following fields:
-     *
-     * <pre>UCAL_ERA
-     * UCAL_YEAR
-     * UCAL_MONTH
-     * UCAL_WEEK_OF_YEAR
-     * UCAL_WEEK_OF_MONTH
-     * UCAL_DATE (DAY_OF_MONTH on Java)
-     * UCAL_DAY_OF_YEAR
-     * UCAL_DAY_OF_WEEK_IN_MONTH
-     * UCAL_YEAR_WOY
-     * UCAL_EXTENDED_YEAR</pre>
-     *
-     * @param field one of the above field numbers
-     * @param limitType one of <code>MINIMUM</code>, <code>GREATEST_MINIMUM</code>,
-     * <code>LEAST_MAXIMUM</code>, or <code>MAXIMUM</code>
-     * @internal
-     */
-    virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const = 0;
-
-    /**
-     * Return a limit for a field.
-     * @param field the field, from <code>0..UCAL_MAX_FIELD</code>
-     * @param limitType the type specifier for the limit
-     * @see #ELimitType
-     * @internal
-     */
-    virtual int32_t getLimit(UCalendarDateFields field, ELimitType limitType) const;
-
-
-    /**
-     * Return the Julian day number of day before the first day of the
-     * given month in the given extended year.  Subclasses should override
-     * this method to implement their calendar system.
-     * @param eyear the extended year
-     * @param month the zero-based month, or 0 if useMonth is false
-     * @param useMonth if false, compute the day before the first day of
-     * the given year, otherwise, compute the day before the first day of
-     * the given month
-     * @return the Julian day number of the day before the first
-     * day of the given month and year
-     * @internal
-     */
-    virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month,
-                                                   UBool useMonth) const  = 0;
-
-    /**
-     * Return the number of days in the given month of the given extended
-     * year of this calendar system.  Subclasses should override this
-     * method if they can provide a more correct or more efficient
-     * implementation than the default implementation in Calendar.
-     * @internal
-     */
-    virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const ;
-
-    /**
-     * Return the number of days in the given extended year of this
-     * calendar system.  Subclasses should override this method if they can
-     * provide a more correct or more efficient implementation than the
-     * default implementation in Calendar.
-     * @stable ICU 2.0
-     */
-    virtual int32_t handleGetYearLength(int32_t eyear) const;
-
-
-    /**
-     * Return the extended year defined by the current fields.  This will
-     * use the UCAL_EXTENDED_YEAR field or the UCAL_YEAR and supra-year fields (such
-     * as UCAL_ERA) specific to the calendar system, depending on which set of
-     * fields is newer.
-     * @return the extended year
-     * @internal
-     */
-    virtual int32_t handleGetExtendedYear() = 0;
-
-    /**
-     * Subclasses may override this.  This method calls
-     * handleGetMonthLength() to obtain the calendar-specific month
-     * length.
-     * @param bestField which field to use to calculate the date
-     * @return julian day specified by calendar fields.
-     * @internal
-     */
-    virtual int32_t handleComputeJulianDay(UCalendarDateFields bestField);
-
-    /**
-     * Subclasses must override this to convert from week fields
-     * (YEAR_WOY and WEEK_OF_YEAR) to an extended year in the case
-     * where YEAR, EXTENDED_YEAR are not set.
-     * The Calendar implementation assumes yearWoy is in extended gregorian form
-     * @internal
-     * @return the extended year, UCAL_EXTENDED_YEAR
-     */
-    virtual int32_t handleGetExtendedYearFromWeekFields(int32_t yearWoy, int32_t woy);
-
-    /**
-     * Compute the Julian day from fields.  Will determine whether to use
-     * the JULIAN_DAY field directly, or other fields.
-     * @return the julian day
-     * @internal
-     */
-    int32_t computeJulianDay();
-
-    /**
-     * Compute the milliseconds in the day from the fields.  This is a
-     * value from 0 to 23:59:59.999 inclusive, unless fields are out of
-     * range, in which case it can be an arbitrary value.  This value
-     * reflects local zone wall time.
-     * @internal
-     */
-    int32_t computeMillisInDay();
-
-    /**
-     * This method can assume EXTENDED_YEAR has been set.
-     * @param millis milliseconds of the date fields
-     * @param millisInDay milliseconds of the time fields; may be out
-     * or range.
-     * @param ec Output param set to failure code on function return
-     *          when this function fails.
-     * @internal
-     */
-    int32_t computeZoneOffset(double millis, int32_t millisInDay, UErrorCode &ec);
-
-
-    /**
-     * Determine the best stamp in a range.
-     * @param start first enum to look at
-     * @param end last enum to look at
-     * @param bestSoFar stamp prior to function call
-     * @return the stamp value of the best stamp
-     * @internal
-     */
-    int32_t newestStamp(UCalendarDateFields start, UCalendarDateFields end, int32_t bestSoFar) const;
-
-    /**
-     * Values for field resolution tables
-     * @see #resolveFields
-     * @internal
-     */
-    enum {
-      /** Marker for end of resolve set (row or group). */
-      kResolveSTOP = -1,
-      /** Value to be bitwised "ORed" against resolve table field values for remapping.  Example: (UCAL_DATE | kResolveRemap) in 1st column will cause 'UCAL_DATE' to be returned, but will not examine the value of UCAL_DATE.  */
-      kResolveRemap = 32
-    };
-
-    /**
-     * Precedence table for Dates
-     * @see #resolveFields
-     * @internal
-     */
-    static const UFieldResolutionTable kDatePrecedence[];
-
-    /**
-     * Precedence table for Year
-     * @see #resolveFields
-     * @internal
-     */
-    static const UFieldResolutionTable kYearPrecedence[];
-
-    /**
-     * Precedence table for Day of Week
-     * @see #resolveFields
-     * @internal
-     */
-    static const UFieldResolutionTable kDOWPrecedence[];
-
-    /**
-     * Given a precedence table, return the newest field combination in
-     * the table, or UCAL_FIELD_COUNT if none is found.
-     *
-     * <p>The precedence table is a 3-dimensional array of integers.  It
-     * may be thought of as an array of groups.  Each group is an array of
-     * lines.  Each line is an array of field numbers.  Within a line, if
-     * all fields are set, then the time stamp of the line is taken to be
-     * the stamp of the most recently set field.  If any field of a line is
-     * unset, then the line fails to match.  Within a group, the line with
-     * the newest time stamp is selected.  The first field of the line is
-     * returned to indicate which line matched.
-     *
-     * <p>In some cases, it may be desirable to map a line to field that
-     * whose stamp is NOT examined.  For example, if the best field is
-     * DAY_OF_WEEK then the DAY_OF_WEEK_IN_MONTH algorithm may be used.  In
-     * order to do this, insert the value <code>kResolveRemap | F</code> at
-     * the start of the line, where <code>F</code> is the desired return
-     * field value.  This field will NOT be examined; it only determines
-     * the return value if the other fields in the line are the newest.
-     *
-     * <p>If all lines of a group contain at least one unset field, then no
-     * line will match, and the group as a whole will fail to match.  In
-     * that case, the next group will be processed.  If all groups fail to
-     * match, then UCAL_FIELD_COUNT is returned.
-     * @internal
-     */
-    UCalendarDateFields resolveFields(const UFieldResolutionTable *precedenceTable);
-
-
-    /**
-     * @internal
-     */
-    virtual const UFieldResolutionTable* getFieldResolutionTable() const;
-
-    /**
-     * Return the field that is newer, either defaultField, or
-     * alternateField.  If neither is newer or neither is set, return defaultField.
-     * @internal
-     */
-    UCalendarDateFields newerField(UCalendarDateFields defaultField, UCalendarDateFields alternateField) const;
-
-
-private:
-    /**
-     * Helper function for calculating limits by trial and error
-     * @param field The field being investigated
-     * @param startValue starting (least max) value of field
-     * @param endValue ending (greatest max) value of field
-     * @param status return type
-     * @internal
-     */
-    int32_t getActualHelper(UCalendarDateFields field, int32_t startValue, int32_t endValue, UErrorCode &status) const;
-
-
-protected:
-    /**
-     * The flag which indicates if the current time is set in the calendar.
-     * @stable ICU 2.0
-     */
-    UBool      fIsTimeSet;
-
-    /**
-     * True if the fields are in sync with the currently set time of this Calendar.
-     * If false, then the next attempt to get the value of a field will
-     * force a recomputation of all fields from the current value of the time
-     * field.
-     * <P>
-     * This should really be named areFieldsInSync, but the old name is retained
-     * for backward compatibility.
-     * @stable ICU 2.0
-     */
-    UBool      fAreFieldsSet;
-
-    /**
-     * True if all of the fields have been set.  This is initially false, and set to
-     * true by computeFields().
-     * @stable ICU 2.0
-     */
-    UBool      fAreAllFieldsSet;
-
-    /**
-     * True if all fields have been virtually set, but have not yet been
-     * computed.  This occurs only in setTimeInMillis().  A calendar set
-     * to this state will compute all fields from the time if it becomes
-     * necessary, but otherwise will delay such computation.
-     * @stable ICU 3.0
-     */
-    UBool fAreFieldsVirtuallySet;
-
-    /**
-     * Get the current time without recomputing.
-     *
-     * @return     the current time without recomputing.
-     * @stable ICU 2.0
-     */
-    UDate        internalGetTime(void) const     { return fTime; }
-
-    /**
-     * Set the current time without affecting flags or fields.
-     *
-     * @param time    The time to be set
-     * @return        the current time without recomputing.
-     * @stable ICU 2.0
-     */
-    void        internalSetTime(UDate time)     { fTime = time; }
-
-    /**
-     * The time fields containing values into which the millis is computed.
-     * @stable ICU 2.0
-     */
-    int32_t     fFields[UCAL_FIELD_COUNT];
-
-    /**
-     * The flags which tell if a specified time field for the calendar is set.
-     * @deprecated ICU 2.8 use (fStamp[n]!=kUnset)
-     */
-    UBool      fIsSet[UCAL_FIELD_COUNT];
-
-    /** Special values of stamp[]
-     * @stable ICU 2.0
-     */
-    enum {
-        kUnset                 = 0,
-        kInternallySet,
-        kMinimumUserStamp
-    };
-
-    /**
-     * Pseudo-time-stamps which specify when each field was set. There
-     * are two special values, UNSET and INTERNALLY_SET. Values from
-     * MINIMUM_USER_SET to Integer.MAX_VALUE are legal user set values.
-     * @stable ICU 2.0
-     */
-    int32_t        fStamp[UCAL_FIELD_COUNT];
-
-    /**
-     * Subclasses may override this method to compute several fields
-     * specific to each calendar system.  These are:
-     *
-     * <ul><li>ERA
-     * <li>YEAR
-     * <li>MONTH
-     * <li>DAY_OF_MONTH
-     * <li>DAY_OF_YEAR
-     * <li>EXTENDED_YEAR</ul>
-     *
-     * Subclasses can refer to the DAY_OF_WEEK and DOW_LOCAL fields, which
-     * will be set when this method is called.  Subclasses can also call
-     * the getGregorianXxx() methods to obtain Gregorian calendar
-     * equivalents for the given Julian day.
-     *
-     * <p>In addition, subclasses should compute any subclass-specific
-     * fields, that is, fields from BASE_FIELD_COUNT to
-     * getFieldCount() - 1.
-     *
-     * <p>The default implementation in <code>Calendar</code> implements
-     * a pure proleptic Gregorian calendar.
-     * @internal
-     */
-    virtual void handleComputeFields(int32_t julianDay, UErrorCode &status);
-
-    /**
-     * Return the extended year on the Gregorian calendar as computed by
-     * <code>computeGregorianFields()</code>.
-     * @internal
-     */
-    int32_t getGregorianYear() const {
-        return fGregorianYear;
-    }
-
-    /**
-     * Return the month (0-based) on the Gregorian calendar as computed by
-     * <code>computeGregorianFields()</code>.
-     * @internal
-     */
-    int32_t getGregorianMonth() const {
-        return fGregorianMonth;
-    }
-
-    /**
-     * Return the day of year (1-based) on the Gregorian calendar as
-     * computed by <code>computeGregorianFields()</code>.
-     * @internal
-     */
-    int32_t getGregorianDayOfYear() const {
-        return fGregorianDayOfYear;
-    }
-
-    /**
-     * Return the day of month (1-based) on the Gregorian calendar as
-     * computed by <code>computeGregorianFields()</code>.
-     * @internal
-     */
-    int32_t getGregorianDayOfMonth() const {
-      return fGregorianDayOfMonth;
-    }
-
-    /**
-     * Called by computeJulianDay.  Returns the default month (0-based) for the year,
-     * taking year and era into account.  Defaults to 0 for Gregorian, which doesn't care.
-     * @internal
-     * @internal
-     */
-    virtual int32_t getDefaultMonthInYear() ;
-
-
-    /**
-     * Called by computeJulianDay.  Returns the default day (1-based) for the month,
-     * taking currently-set year and era into account.  Defaults to 1 for Gregorian.
-     * @internal
-     */
-    virtual int32_t getDefaultDayInMonth(int32_t /*month*/);
-
-    //-------------------------------------------------------------------------
-    // Protected utility methods for use by subclasses.  These are very handy
-    // for implementing add, roll, and computeFields.
-    //-------------------------------------------------------------------------
-
-    /**
-     * Adjust the specified field so that it is within
-     * the allowable range for the date to which this calendar is set.
-     * For example, in a Gregorian calendar pinning the {@link #UCalendarDateFields DAY_OF_MONTH}
-     * field for a calendar set to April 31 would cause it to be set
-     * to April 30.
-     * <p>
-     * <b>Subclassing:</b>
-     * <br>
-     * This utility method is intended for use by subclasses that need to implement
-     * their own overrides of {@link #roll roll} and {@link #add add}.
-     * <p>
-     * <b>Note:</b>
-     * <code>pinField</code> is implemented in terms of
-     * {@link #getActualMinimum getActualMinimum}
-     * and {@link #getActualMaximum getActualMaximum}.  If either of those methods uses
-     * a slow, iterative algorithm for a particular field, it would be
-     * unwise to attempt to call <code>pinField</code> for that field.  If you
-     * really do need to do so, you should override this method to do
-     * something more efficient for that field.
-     * <p>
-     * @param field The calendar field whose value should be pinned.
-     * @param status Output param set to failure code on function return
-     *          when this function fails.
-     *
-     * @see #getActualMinimum
-     * @see #getActualMaximum
-     * @stable ICU 2.0
-     */
-    virtual void pinField(UCalendarDateFields field, UErrorCode& status);
-
-    /**
-     * Return the week number of a day, within a period. This may be the week number in
-     * a year or the week number in a month. Usually this will be a value >= 1, but if
-     * some initial days of the period are excluded from week 1, because
-     * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} is > 1, then
-     * the week number will be zero for those
-     * initial days. This method requires the day number and day of week for some
-     * known date in the period in order to determine the day of week
-     * on the desired day.
-     * <p>
-     * <b>Subclassing:</b>
-     * <br>
-     * This method is intended for use by subclasses in implementing their
-     * {@link #computeTime computeTime} and/or {@link #computeFields computeFields} methods.
-     * It is often useful in {@link #getActualMinimum getActualMinimum} and
-     * {@link #getActualMaximum getActualMaximum} as well.
-     * <p>
-     * This variant is handy for computing the week number of some other
-     * day of a period (often the first or last day of the period) when its day
-     * of the week is not known but the day number and day of week for some other
-     * day in the period (e.g. the current date) <em>is</em> known.
-     * <p>
-     * @param desiredDay    The {@link #UCalendarDateFields DAY_OF_YEAR} or
-     *              {@link #UCalendarDateFields DAY_OF_MONTH} whose week number is desired.
-     *              Should be 1 for the first day of the period.
-     *
-     * @param dayOfPeriod   The {@link #UCalendarDateFields DAY_OF_YEAR}
-     *              or {@link #UCalendarDateFields DAY_OF_MONTH} for a day in the period whose
-     *              {@link #UCalendarDateFields DAY_OF_WEEK} is specified by the
-     *              <code>knownDayOfWeek</code> parameter.
-     *              Should be 1 for first day of period.
-     *
-     * @param dayOfWeek  The {@link #UCalendarDateFields DAY_OF_WEEK} for the day
-     *              corresponding to the <code>knownDayOfPeriod</code> parameter.
-     *              1-based with 1=Sunday.
-     *
-     * @return      The week number (one-based), or zero if the day falls before
-     *              the first week because
-     *              {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek}
-     *              is more than one.
-     *
-     * @stable ICU 2.8
-     */
-    int32_t weekNumber(int32_t desiredDay, int32_t dayOfPeriod, int32_t dayOfWeek);
-
-
-    /**
-     * Return the week number of a day, within a period. This may be the week number in
-     * a year, or the week number in a month. Usually this will be a value >= 1, but if
-     * some initial days of the period are excluded from week 1, because
-     * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} is > 1,
-     * then the week number will be zero for those
-     * initial days. This method requires the day of week for the given date in order to
-     * determine the result.
-     * <p>
-     * <b>Subclassing:</b>
-     * <br>
-     * This method is intended for use by subclasses in implementing their
-     * {@link #computeTime computeTime} and/or {@link #computeFields computeFields} methods.
-     * It is often useful in {@link #getActualMinimum getActualMinimum} and
-     * {@link #getActualMaximum getActualMaximum} as well.
-     * <p>
-     * @param dayOfPeriod   The {@link #UCalendarDateFields DAY_OF_YEAR} or
-     *                      {@link #UCalendarDateFields DAY_OF_MONTH} whose week number is desired.
-     *                      Should be 1 for the first day of the period.
-     *
-     * @param dayOfWeek     The {@link #UCalendarDateFields DAY_OF_WEEK} for the day
-     *                      corresponding to the <code>dayOfPeriod</code> parameter.
-     *                      1-based with 1=Sunday.
-     *
-     * @return      The week number (one-based), or zero if the day falls before
-     *              the first week because
-     *              {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek}
-     *              is more than one.
-     * @internal
-     */
-    inline int32_t weekNumber(int32_t dayOfPeriod, int32_t dayOfWeek);
-
-    /**
-     * returns the local DOW, valid range 0..6
-     * @internal
-     */
-    int32_t getLocalDOW();
-
-private:
-
-    /**
-     * The next available value for fStamp[]
-     */
-    int32_t fNextStamp;// = MINIMUM_USER_STAMP;
-
-    /**
-     * The current time set for the calendar.
-     */
-    UDate        fTime;
-
-    /**
-     * @see   #setLenient
-     */
-    UBool      fLenient;
-
-    /**
-     * Time zone affects the time calculation done by Calendar. Calendar subclasses use
-     * the time zone data to produce the local time.
-     */
-    TimeZone*   fZone;
-
-    /**
-     * Both firstDayOfWeek and minimalDaysInFirstWeek are locale-dependent. They are
-     * used to figure out the week count for a specific date for a given locale. These
-     * must be set when a Calendar is constructed. For example, in US locale,
-     * firstDayOfWeek is SUNDAY; minimalDaysInFirstWeek is 1. They are used to figure
-     * out the week count for a specific date for a given locale. These must be set when
-     * a Calendar is constructed.
-     */
-    UCalendarDaysOfWeek fFirstDayOfWeek;
-    uint8_t     fMinimalDaysInFirstWeek;
-
-    /**
-     * Sets firstDayOfWeek and minimalDaysInFirstWeek. Called at Calendar construction
-     * time.
-     *
-     * @param desiredLocale  The given locale.
-     * @param type           The calendar type identifier, e.g: gregorian, buddhist, etc.
-     * @param success        Indicates the status of setting the week count data from
-     *                       the resource for the given locale. Returns U_ZERO_ERROR if
-     *                       constructed successfully.
-     */
-    void        setWeekCountData(const Locale& desiredLocale, const char *type, UErrorCode& success);
-
-    /**
-     * Recompute the time and update the status fields isTimeSet
-     * and areFieldsSet.  Callers should check isTimeSet and only
-     * call this method if isTimeSet is false.
-     *
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid or restricted by
-     *                leniency, this will be set to an error status.
-     */
-    void updateTime(UErrorCode& status);
-
-    /**
-     * The Gregorian year, as computed by computeGregorianFields() and
-     * returned by getGregorianYear().
-     * @see #computeGregorianFields
-     */
-    int32_t fGregorianYear;
-
-    /**
-     * The Gregorian month, as computed by computeGregorianFields() and
-     * returned by getGregorianMonth().
-     * @see #computeGregorianFields
-     */
-    int32_t fGregorianMonth;
-
-    /**
-     * The Gregorian day of the year, as computed by
-     * computeGregorianFields() and returned by getGregorianDayOfYear().
-     * @see #computeGregorianFields
-     */
-    int32_t fGregorianDayOfYear;
-
-    /**
-     * The Gregorian day of the month, as computed by
-     * computeGregorianFields() and returned by getGregorianDayOfMonth().
-     * @see #computeGregorianFields
-     */
-    int32_t fGregorianDayOfMonth;
-
-    /* calculations */
-
-    /**
-     * Compute the Gregorian calendar year, month, and day of month from
-     * the given Julian day.  These values are not stored in fields, but in
-     * member variables gregorianXxx.  Also compute the DAY_OF_WEEK and
-     * DOW_LOCAL fields.
-     */
-    void computeGregorianAndDOWFields(int32_t julianDay, UErrorCode &ec);
-
-	protected:
-
-    /**
-     * Compute the Gregorian calendar year, month, and day of month from the
-     * Julian day.  These values are not stored in fields, but in member
-     * variables gregorianXxx.  They are used for time zone computations and by
-     * subclasses that are Gregorian derivatives.  Subclasses may call this
-     * method to perform a Gregorian calendar millis->fields computation.
-     * To perform a Gregorian calendar fields->millis computation, call
-     * computeGregorianMonthStart().
-     * @see #computeGregorianMonthStart
-     */
-    void computeGregorianFields(int32_t julianDay, UErrorCode &ec);
-
-	private:
-
-    /**
-     * Compute the fields WEEK_OF_YEAR, YEAR_WOY, WEEK_OF_MONTH,
-     * DAY_OF_WEEK_IN_MONTH, and DOW_LOCAL from EXTENDED_YEAR, YEAR,
-     * DAY_OF_WEEK, and DAY_OF_YEAR.  The latter fields are computed by the
-     * subclass based on the calendar system.
-     *
-     * <p>The YEAR_WOY field is computed simplistically.  It is equal to YEAR
-     * most of the time, but at the year boundary it may be adjusted to YEAR-1
-     * or YEAR+1 to reflect the overlap of a week into an adjacent year.  In
-     * this case, a simple increment or decrement is performed on YEAR, even
-     * though this may yield an invalid YEAR value.  For instance, if the YEAR
-     * is part of a calendar system with an N-year cycle field CYCLE, then
-     * incrementing the YEAR may involve incrementing CYCLE and setting YEAR
-     * back to 0 or 1.  This is not handled by this code, and in fact cannot be
-     * simply handled without having subclasses define an entire parallel set of
-     * fields for fields larger than or equal to a year.  This additional
-     * complexity is not warranted, since the intention of the YEAR_WOY field is
-     * to support ISO 8601 notation, so it will typically be used with a
-     * proleptic Gregorian calendar, which has no field larger than a year.
-     */
-    void computeWeekFields(UErrorCode &ec);
-
-
-    /**
-     * Ensure that each field is within its valid range by calling {@link
-     * #validateField(int, int&)} on each field that has been set.  This method
-     * should only be called if this calendar is not lenient.
-     * @see #isLenient
-     * @see #validateField(int, int&)
-     * @internal
-     */
-    void validateFields(UErrorCode &status);
-
-    /**
-     * Validate a single field of this calendar.  Subclasses should
-     * override this method to validate any calendar-specific fields.
-     * Generic fields can be handled by
-     * <code>Calendar.validateField()</code>.
-     * @see #validateField(int, int, int, int&)
-     * @internal
-     */
-    virtual void validateField(UCalendarDateFields field, UErrorCode &status);
-
-    /**
-     * Validate a single field of this calendar given its minimum and
-     * maximum allowed value.  If the field is out of range,
-     * <code>U_ILLEGAL_ARGUMENT_ERROR</code> will be set.  Subclasses may
-     * use this method in their implementation of {@link
-     * #validateField(int, int&)}.
-     * @internal
-     */
-    void validateField(UCalendarDateFields field, int32_t min, int32_t max, UErrorCode& status);
-
- protected:
-    /**
-     * Convert a quasi Julian date to the day of the week. The Julian date used here is
-     * not a true Julian date, since it is measured from midnight, not noon. Return
-     * value is one-based.
-     *
-     * @param julian  The given Julian date number.
-     * @return   Day number from 1..7 (SUN..SAT).
-     * @internal
-     */
-    static uint8_t julianDayToDayOfWeek(double julian);
-
- private:
-    char validLocale[ULOC_FULLNAME_CAPACITY];
-    char actualLocale[ULOC_FULLNAME_CAPACITY];
-
- public:
-#if !UCONFIG_NO_SERVICE
-    /**
-     * INTERNAL FOR 2.6 --  Registration.
-     */
-
-    /**
-     * Return a StringEnumeration over the locales available at the time of the call,
-     * including registered locales.
-     * @return a StringEnumeration over the locales available at the time of the call
-     * @internal
-     */
-    static StringEnumeration* getAvailableLocales(void);
-
-    /**
-     * Register a new Calendar factory.  The factory will be adopted.
-     * INTERNAL in 2.6
-     * @param toAdopt the factory instance to be adopted
-     * @param status the in/out status code, no special meanings are assigned
-     * @return a registry key that can be used to unregister this factory
-     * @internal
-     */
-    static URegistryKey registerFactory(ICUServiceFactory* toAdopt, UErrorCode& status);
-
-    /**
-     * Unregister a previously-registered CalendarFactory using the key returned from the
-     * register call.  Key becomes invalid after a successful call and should not be used again.
-     * The CalendarFactory corresponding to the key will be deleted.
-     * INTERNAL in 2.6
-     * @param key the registry key returned by a previous call to registerFactory
-     * @param status the in/out status code, no special meanings are assigned
-     * @return TRUE if the factory for the key was successfully unregistered
-     * @internal
-     */
-    static UBool unregister(URegistryKey key, UErrorCode& status);
-
-    /**
-     * Multiple Calendar Implementation
-     * @internal
-     */
-    friend class CalendarFactory;
-
-    /**
-     * Multiple Calendar Implementation
-     * @internal
-     */
-    friend class CalendarService;
-
-    /**
-     * Multiple Calendar Implementation
-     * @internal
-     */
-    friend class DefaultCalendarFactory;
-#endif /* !UCONFIG_NO_SERVICE */
-
-    /**
-     * @internal
-     * @return TRUE if this calendar has a default century (i.e. 03 -> 2003)
-     */
-    virtual UBool haveDefaultCentury() const = 0;
-
-    /**
-     * @internal
-     * @return the start of the default century, as a UDate
-     */
-    virtual UDate defaultCenturyStart() const = 0;
-    /**
-     * @internal
-     * @return the beginning year of the default century, as a year
-     */
-    virtual int32_t defaultCenturyStartYear() const = 0;
-
-    /** Get the locale for this calendar object. You can choose between valid and actual locale.
-     *  @param type type of the locale we're looking for (valid or actual)
-     *  @param status error code for the operation
-     *  @return the locale
-     *  @stable ICU 2.8
-     */
-    Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const;
-
-    /** Get the locale for this calendar object. You can choose between valid and actual locale.
-     *  @param type type of the locale we're looking for (valid or actual)
-     *  @param status error code for the operation
-     *  @return the locale
-     *  @internal
-     */
-    const char* getLocaleID(ULocDataLocaleType type, UErrorCode &status) const;
-
-};
-
-// -------------------------------------
-
-inline Calendar*
-Calendar::createInstance(TimeZone* zone, UErrorCode& errorCode)
-{
-    // since the Locale isn't specified, use the default locale
-    return createInstance(zone, Locale::getDefault(), errorCode);
-}
-
-// -------------------------------------
-
-inline void
-Calendar::roll(UCalendarDateFields field, UBool up, UErrorCode& status)
-{
-    roll(field, (int32_t)(up ? +1 : -1), status);
-}
-
-inline void
-Calendar::roll(EDateFields field, UBool up, UErrorCode& status)
-{
-    roll((UCalendarDateFields) field, up, status);
-}
-
-
-// -------------------------------------
-
-/**
- * Fast method for subclasses.  The caller must maintain fUserSetDSTOffset and
- * fUserSetZoneOffset, as well as the isSet[] array.
- */
-
-inline void
-Calendar::internalSet(UCalendarDateFields field, int32_t value)
-{
-    fFields[field] = value;
-    fStamp[field] = kInternallySet;
-    fIsSet[field]     = TRUE; // Remove later
-}
-
-inline int32_t  Calendar::weekNumber(int32_t dayOfPeriod, int32_t dayOfWeek)
-{
-  return weekNumber(dayOfPeriod, dayOfPeriod, dayOfWeek);
-}
-
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _CALENDAR

Copied: MacRuby/trunk/icu-1060/unicode/calendar.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/calendar.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/calendar.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/calendar.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,2170 @@
+/*
+********************************************************************************
+*   Copyright (C) 1997-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+********************************************************************************
+*
+* File CALENDAR.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   04/22/97    aliu        Expanded and corrected comments and other header
+*                           contents.
+*   05/01/97    aliu        Made equals(), before(), after() arguments const.
+*   05/20/97    aliu        Replaced fAreFieldsSet with fAreFieldsInSync and
+*                           fAreAllFieldsSet.
+*   07/27/98    stephen     Sync up with JDK 1.2
+*   11/15/99    weiv        added YEAR_WOY and DOW_LOCAL
+*                           to EDateFields
+*    8/19/2002  srl         Removed Javaisms
+*   11/07/2003  srl         Update, clean up documentation.
+********************************************************************************
+*/
+
+#ifndef CALENDAR_H
+#define CALENDAR_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Calendar object
+ */
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/uobject.h"
+#include "unicode/locid.h"
+#include "unicode/timezone.h"
+#include "unicode/ucal.h"
+#include "unicode/umisc.h"
+
+U_NAMESPACE_BEGIN
+
+class ICUServiceFactory;
+
+/**
+ * @internal
+ */
+typedef int32_t UFieldResolutionTable[12][8];
+
+/**
+ * <code>Calendar</code> is an abstract base class for converting between
+ * a <code>UDate</code> object and a set of integer fields such as
+ * <code>YEAR</code>, <code>MONTH</code>, <code>DAY</code>, <code>HOUR</code>,
+ * and so on. (A <code>UDate</code> object represents a specific instant in
+ * time with millisecond precision. See UDate
+ * for information about the <code>UDate</code> class.)
+ *
+ * <p>
+ * Subclasses of <code>Calendar</code> interpret a <code>UDate</code>
+ * according to the rules of a specific calendar system.
+ * The most commonly used subclass of <code>Calendar</code> is
+ * <code>GregorianCalendar</code>. Other subclasses could represent
+ * the various types of lunar calendars in use in many parts of the world.
+ *
+ * <p>
+ * <b>NOTE</b>: (ICU 2.6) The subclass interface should be considered unstable
+ * - it WILL change.
+ *
+ * <p>
+ * Like other locale-sensitive classes, <code>Calendar</code> provides a
+ * static method, <code>createInstance</code>, for getting a generally useful
+ * object of this type. <code>Calendar</code>'s <code>createInstance</code> method
+ * returns the appropriate <code>Calendar</code> subclass whose
+ * time fields have been initialized with the current date and time:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * Calendar *rightNow = Calendar::createInstance(errCode);
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <p>
+ * A <code>Calendar</code> object can produce all the time field values
+ * needed to implement the date-time formatting for a particular language
+ * and calendar style (for example, Japanese-Gregorian, Japanese-Traditional).
+ *
+ * <p>
+ * When computing a <code>UDate</code> from time fields, two special circumstances
+ * may arise: there may be insufficient information to compute the
+ * <code>UDate</code> (such as only year and month but no day in the month),
+ * or there may be inconsistent information (such as "Tuesday, July 15, 1996"
+ * -- July 15, 1996 is actually a Monday).
+ *
+ * <p>
+ * <strong>Insufficient information.</strong> The calendar will use default
+ * information to specify the missing fields. This may vary by calendar; for
+ * the Gregorian calendar, the default for a field is the same as that of the
+ * start of the epoch: i.e., YEAR = 1970, MONTH = JANUARY, DATE = 1, etc.
+ *
+ * <p>
+ * <strong>Inconsistent information.</strong> If fields conflict, the calendar
+ * will give preference to fields set more recently. For example, when
+ * determining the day, the calendar will look for one of the following
+ * combinations of fields.  The most recent combination, as determined by the
+ * most recently set single field, will be used.
+ *
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * MONTH + DAY_OF_MONTH
+ * MONTH + WEEK_OF_MONTH + DAY_OF_WEEK
+ * MONTH + DAY_OF_WEEK_IN_MONTH + DAY_OF_WEEK
+ * DAY_OF_YEAR
+ * DAY_OF_WEEK + WEEK_OF_YEAR
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * For the time of day:
+ *
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * HOUR_OF_DAY
+ * AM_PM + HOUR
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <p>
+ * <strong>Note:</strong> for some non-Gregorian calendars, different
+ * fields may be necessary for complete disambiguation. For example, a full
+ * specification of the historial Arabic astronomical calendar requires year,
+ * month, day-of-month <em>and</em> day-of-week in some cases.
+ *
+ * <p>
+ * <strong>Note:</strong> There are certain possible ambiguities in
+ * interpretation of certain singular times, which are resolved in the
+ * following ways:
+ * <ol>
+ *     <li> 24:00:00 "belongs" to the following day. That is,
+ *          23:59 on Dec 31, 1969 &lt; 24:00 on Jan 1, 1970 &lt; 24:01:00 on Jan 1, 1970
+ *
+ *     <li> Although historically not precise, midnight also belongs to "am",
+ *          and noon belongs to "pm", so on the same day,
+ *          12:00 am (midnight) &lt; 12:01 am, and 12:00 pm (noon) &lt; 12:01 pm
+ * </ol>
+ *
+ * <p>
+ * The date or time format strings are not part of the definition of a
+ * calendar, as those must be modifiable or overridable by the user at
+ * runtime. Use {@link DateFormat}
+ * to format dates.
+ *
+ * <p>
+ * <code>Calendar</code> provides an API for field "rolling", where fields
+ * can be incremented or decremented, but wrap around. For example, rolling the
+ * month up in the date <code>December 12, <b>1996</b></code> results in
+ * <code>January 12, <b>1996</b></code>.
+ *
+ * <p>
+ * <code>Calendar</code> also provides a date arithmetic function for
+ * adding the specified (signed) amount of time to a particular time field.
+ * For example, subtracting 5 days from the date <code>September 12, 1996</code>
+ * results in <code>September 7, 1996</code>.
+ *
+ * @stable ICU 2.0
+ */
+class U_I18N_API Calendar : public UObject {
+public:
+
+    /**
+     * Field IDs for date and time. Used to specify date/time fields. ERA is calendar
+     * specific. Example ranges given are for illustration only; see specific Calendar
+     * subclasses for actual ranges.
+     * @deprecated ICU 2.6. Use C enum UCalendarDateFields defined in ucal.h
+     */
+    enum EDateFields {
+#ifndef U_HIDE_DEPRECATED_API
+        ERA,                  // Example: 0..1
+        YEAR,                 // Example: 1..big number
+        MONTH,                // Example: 0..11
+        WEEK_OF_YEAR,         // Example: 1..53
+        WEEK_OF_MONTH,        // Example: 1..4
+        DATE,                 // Example: 1..31
+        DAY_OF_YEAR,          // Example: 1..365
+        DAY_OF_WEEK,          // Example: 1..7
+        DAY_OF_WEEK_IN_MONTH, // Example: 1..4, may be specified as -1
+        AM_PM,                // Example: 0..1
+        HOUR,                 // Example: 0..11
+        HOUR_OF_DAY,          // Example: 0..23
+        MINUTE,               // Example: 0..59
+        SECOND,               // Example: 0..59
+        MILLISECOND,          // Example: 0..999
+        ZONE_OFFSET,          // Example: -12*U_MILLIS_PER_HOUR..12*U_MILLIS_PER_HOUR
+        DST_OFFSET,           // Example: 0 or U_MILLIS_PER_HOUR
+        YEAR_WOY,             // 'Y' Example: 1..big number - Year of Week of Year
+        DOW_LOCAL,            // 'e' Example: 1..7 - Day of Week / Localized
+		
+		EXTENDED_YEAR,
+		JULIAN_DAY,
+		MILLISECONDS_IN_DAY,
+		IS_LEAP_MONTH,
+
+        FIELD_COUNT = UCAL_FIELD_COUNT // See ucal.h for other fields.
+#endif /* U_HIDE_DEPRECATED_API */
+    };
+
+    /**
+     * Useful constant for days of week. Note: Calendar day-of-week is 1-based. Clients
+     * who create locale resources for the field of first-day-of-week should be aware of
+     * this. For instance, in US locale, first-day-of-week is set to 1, i.e., SUNDAY.
+     * @deprecated ICU 2.6. Use C enum UCalendarDaysOfWeek defined in ucal.h
+     */
+    enum EDaysOfWeek {
+#ifndef U_HIDE_DEPRECATED_API
+        SUNDAY = 1,
+        MONDAY,
+        TUESDAY,
+        WEDNESDAY,
+        THURSDAY,
+        FRIDAY,
+        SATURDAY
+#endif /* U_HIDE_DEPRECATED_API */
+    };
+
+    /**
+     * Useful constants for month. Note: Calendar month is 0-based.
+     * @deprecated ICU 2.6. Use C enum UCalendarMonths defined in ucal.h
+     */
+    enum EMonths {
+#ifndef U_HIDE_DEPRECATED_API
+        JANUARY,
+        FEBRUARY,
+        MARCH,
+        APRIL,
+        MAY,
+        JUNE,
+        JULY,
+        AUGUST,
+        SEPTEMBER,
+        OCTOBER,
+        NOVEMBER,
+        DECEMBER,
+        UNDECIMBER
+#endif /* U_HIDE_DEPRECATED_API */
+    };
+
+    /**
+     * Useful constants for hour in 12-hour clock. Used in GregorianCalendar.
+     * @deprecated ICU 2.6. Use C enum UCalendarAMPMs defined in ucal.h
+     */
+    enum EAmpm {
+#ifndef U_HIDE_DEPRECATED_API
+        AM,
+        PM
+#endif /* U_HIDE_DEPRECATED_API */
+    };
+
+    /**
+     * destructor
+     * @stable ICU 2.0
+     */
+    virtual ~Calendar();
+
+    /**
+     * Create and return a polymorphic copy of this calendar.
+     *
+     * @return    a polymorphic copy of this calendar.
+     * @stable ICU 2.0
+     */
+    virtual Calendar* clone(void) const = 0;
+
+    /**
+     * Creates a Calendar using the default timezone and locale. Clients are responsible
+     * for deleting the object returned.
+     *
+     * @param success  Indicates the success/failure of Calendar creation. Filled in
+     *                 with U_ZERO_ERROR if created successfully, set to a failure result
+     *                 otherwise. U_MISSING_RESOURCE_ERROR will be returned if the resource data
+     *                 requests a calendar type which has not been installed.
+     * @return         A Calendar if created successfully. NULL otherwise.
+     * @stable ICU 2.0
+     */
+    static Calendar* U_EXPORT2 createInstance(UErrorCode& success);
+
+    /**
+     * Creates a Calendar using the given timezone and the default locale.
+     * The Calendar takes ownership of zoneToAdopt; the
+     * client must not delete it.
+     *
+     * @param zoneToAdopt  The given timezone to be adopted.
+     * @param success      Indicates the success/failure of Calendar creation. Filled in
+     *                     with U_ZERO_ERROR if created successfully, set to a failure result
+     *                     otherwise.
+     * @return             A Calendar if created successfully. NULL otherwise.
+     * @stable ICU 2.0
+     */
+    static Calendar* U_EXPORT2 createInstance(TimeZone* zoneToAdopt, UErrorCode& success);
+
+    /**
+     * Creates a Calendar using the given timezone and the default locale.  The TimeZone
+     * is _not_ adopted; the client is still responsible for deleting it.
+     *
+     * @param zone  The timezone.
+     * @param success      Indicates the success/failure of Calendar creation. Filled in
+     *                     with U_ZERO_ERROR if created successfully, set to a failure result
+     *                     otherwise.
+     * @return             A Calendar if created successfully. NULL otherwise.
+     * @stable ICU 2.0
+     */
+    static Calendar* U_EXPORT2 createInstance(const TimeZone& zone, UErrorCode& success);
+
+    /**
+     * Creates a Calendar using the default timezone and the given locale.
+     *
+     * @param aLocale  The given locale.
+     * @param success  Indicates the success/failure of Calendar creation. Filled in
+     *                 with U_ZERO_ERROR if created successfully, set to a failure result
+     *                 otherwise.
+     * @return         A Calendar if created successfully. NULL otherwise.
+     * @stable ICU 2.0
+     */
+    static Calendar* U_EXPORT2 createInstance(const Locale& aLocale, UErrorCode& success);
+
+    /**
+     * Creates a Calendar using the given timezone and given locale.
+     * The Calendar takes ownership of zoneToAdopt; the
+     * client must not delete it.
+     *
+     * @param zoneToAdopt  The given timezone to be adopted.
+     * @param aLocale      The given locale.
+     * @param success      Indicates the success/failure of Calendar creation. Filled in
+     *                     with U_ZERO_ERROR if created successfully, set to a failure result
+     *                     otherwise.
+     * @return             A Calendar if created successfully. NULL otherwise.
+     * @stable ICU 2.0
+     */
+    static Calendar* U_EXPORT2 createInstance(TimeZone* zoneToAdopt, const Locale& aLocale, UErrorCode& success);
+
+    /**
+     * Gets a Calendar using the given timezone and given locale.  The TimeZone
+     * is _not_ adopted; the client is still responsible for deleting it.
+     *
+     * @param zoneToAdopt  The given timezone to be adopted.
+     * @param aLocale      The given locale.
+     * @param success      Indicates the success/failure of Calendar creation. Filled in
+     *                     with U_ZERO_ERROR if created successfully, set to a failure result
+     *                     otherwise.
+     * @return             A Calendar if created successfully. NULL otherwise.
+     * @stable ICU 2.0
+     */
+    static Calendar* U_EXPORT2 createInstance(const TimeZone& zoneToAdopt, const Locale& aLocale, UErrorCode& success);
+
+    /**
+     * Returns a list of the locales for which Calendars are installed.
+     *
+     * @param count  Number of locales returned.
+     * @return       An array of Locale objects representing the set of locales for which
+     *               Calendars are installed.  The system retains ownership of this list;
+     *               the caller must NOT delete it. Does not include user-registered Calendars.
+     * @stable ICU 2.0
+     */
+    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+
+    /**
+     * Returns the current UTC (GMT) time measured in milliseconds since 0:00:00 on 1/1/70
+     * (derived from the system time).
+     *
+     * @return   The current UTC time in milliseconds.
+     * @stable ICU 2.0
+     */
+    static UDate U_EXPORT2 getNow(void);
+
+    /**
+     * Gets this Calendar's time as milliseconds. May involve recalculation of time due
+     * to previous calls to set time field values. The time specified is non-local UTC
+     * (GMT) time. Although this method is const, this object may actually be changed
+     * (semantically const).
+     *
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @return        The current time in UTC (GMT) time, or zero if the operation
+     *                failed.
+     * @stable ICU 2.0
+     */
+    inline UDate getTime(UErrorCode& status) const { return getTimeInMillis(status); }
+
+    /**
+     * Sets this Calendar's current time with the given UDate. The time specified should
+     * be in non-local UTC (GMT) time.
+     *
+     * @param date  The given UDate in UTC (GMT) time.
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @stable ICU 2.0
+     */
+    inline void setTime(UDate date, UErrorCode& status) { setTimeInMillis(date, status); }
+
+    /**
+     * Compares the equality of two Calendar objects. Objects of different subclasses
+     * are considered unequal. This comparison is very exacting; two Calendar objects
+     * must be in exactly the same state to be considered equal. To compare based on the
+     * represented time, use equals() instead.
+     *
+     * @param that  The Calendar object to be compared with.
+     * @return      True if the given Calendar is the same as this Calendar; false
+     *              otherwise.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const Calendar& that) const;
+
+    /**
+     * Compares the inequality of two Calendar objects.
+     *
+     * @param that  The Calendar object to be compared with.
+     * @return      True if the given Calendar is not the same as this Calendar; false
+     *              otherwise.
+     * @stable ICU 2.0
+     */
+    UBool operator!=(const Calendar& that) const {return !operator==(that);}
+
+    /**
+     * Returns TRUE if the given Calendar object is equivalent to this
+     * one.  An equivalent Calendar will behave exactly as this one
+     * does, but it may be set to a different time.  By contrast, for
+     * the operator==() method to return TRUE, the other Calendar must
+     * be set to the same time.
+     *
+     * @param other the Calendar to be compared with this Calendar
+     * @stable ICU 2.4
+     */
+    virtual UBool isEquivalentTo(const Calendar& other) const;
+
+    /**
+     * Compares the Calendar time, whereas Calendar::operator== compares the equality of
+     * Calendar objects.
+     *
+     * @param when    The Calendar to be compared with this Calendar. Although this is a
+     *                const parameter, the object may be modified physically
+     *                (semantically const).
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @return        True if the current time of this Calendar is equal to the time of
+     *                Calendar when; false otherwise.
+     * @stable ICU 2.0
+     */
+    UBool equals(const Calendar& when, UErrorCode& status) const;
+
+    /**
+     * Returns true if this Calendar's current time is before "when"'s current time.
+     *
+     * @param when    The Calendar to be compared with this Calendar. Although this is a
+     *                const parameter, the object may be modified physically
+     *                (semantically const).
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @return        True if the current time of this Calendar is before the time of
+     *                Calendar when; false otherwise.
+     * @stable ICU 2.0
+     */
+    UBool before(const Calendar& when, UErrorCode& status) const;
+
+    /**
+     * Returns true if this Calendar's current time is after "when"'s current time.
+     *
+     * @param when    The Calendar to be compared with this Calendar. Although this is a
+     *                const parameter, the object may be modified physically
+     *                (semantically const).
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @return        True if the current time of this Calendar is after the time of
+     *                Calendar when; false otherwise.
+     * @stable ICU 2.0
+     */
+    UBool after(const Calendar& when, UErrorCode& status) const;
+
+    /**
+     * UDate Arithmetic function. Adds the specified (signed) amount of time to the given
+     * time field, based on the calendar's rules. For example, to subtract 5 days from
+     * the current time of the calendar, call add(Calendar::DATE, -5). When adding on
+     * the month or Calendar::MONTH field, other fields like date might conflict and
+     * need to be changed. For instance, adding 1 month on the date 01/31/96 will result
+     * in 02/29/96.
+     *
+     * @param field   Specifies which date field to modify.
+     * @param amount  The amount of time to be added to the field, in the natural unit
+     *                for that field (e.g., days for the day fields, hours for the hour
+     *                field.)
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @deprecated ICU 2.6. use add(UCalendarDateFields field, int32_t amount, UErrorCode& status) instead.
+     */
+    virtual void add(EDateFields field, int32_t amount, UErrorCode& status);
+
+    /**
+     * UDate Arithmetic function. Adds the specified (signed) amount of time to the given
+     * time field, based on the calendar's rules. For example, to subtract 5 days from
+     * the current time of the calendar, call add(Calendar::DATE, -5). When adding on
+     * the month or Calendar::MONTH field, other fields like date might conflict and
+     * need to be changed. For instance, adding 1 month on the date 01/31/96 will result
+     * in 02/29/96.
+     *
+     * @param field   Specifies which date field to modify.
+     * @param amount  The amount of time to be added to the field, in the natural unit
+     *                for that field (e.g., days for the day fields, hours for the hour
+     *                field.)
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @stable ICU 2.6.
+     */
+    virtual void add(UCalendarDateFields field, int32_t amount, UErrorCode& status);
+
+    /**
+     * Time Field Rolling function. Rolls (up/down) a single unit of time on the given
+     * time field. For example, to roll the current date up by one day, call
+     * roll(Calendar::DATE, true). When rolling on the year or Calendar::YEAR field, it
+     * will roll the year value in the range between getMinimum(Calendar::YEAR) and the
+     * value returned by getMaximum(Calendar::YEAR). When rolling on the month or
+     * Calendar::MONTH field, other fields like date might conflict and, need to be
+     * changed. For instance, rolling the month up on the date 01/31/96 will result in
+     * 02/29/96. Rolling up always means rolling forward in time; e.g., rolling the year
+     * up on "100 BC" will result in "99 BC", for Gregorian calendar. When rolling on the
+     * hour-in-day or Calendar::HOUR_OF_DAY field, it will roll the hour value in the range
+     * between 0 and 23, which is zero-based.
+     * <P>
+     * NOTE: Do not use this method -- use roll(EDateFields, int, UErrorCode&) instead.
+     *
+     * @param field   The time field.
+     * @param up      Indicates if the value of the specified time field is to be rolled
+     *                up or rolled down. Use true if rolling up, false otherwise.
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @deprecated ICU 2.6. Use roll(UCalendarDateFields field, UBool up, UErrorCode& status) instead.
+     */
+    inline void roll(EDateFields field, UBool up, UErrorCode& status);
+
+    /**
+     * Time Field Rolling function. Rolls (up/down) a single unit of time on the given
+     * time field. For example, to roll the current date up by one day, call
+     * roll(Calendar::DATE, true). When rolling on the year or Calendar::YEAR field, it
+     * will roll the year value in the range between getMinimum(Calendar::YEAR) and the
+     * value returned by getMaximum(Calendar::YEAR). When rolling on the month or
+     * Calendar::MONTH field, other fields like date might conflict and, need to be
+     * changed. For instance, rolling the month up on the date 01/31/96 will result in
+     * 02/29/96. Rolling up always means rolling forward in time; e.g., rolling the year
+     * up on "100 BC" will result in "99 BC", for Gregorian calendar. When rolling on the
+     * hour-in-day or Calendar::HOUR_OF_DAY field, it will roll the hour value in the range
+     * between 0 and 23, which is zero-based.
+     * <P>
+     * NOTE: Do not use this method -- use roll(UCalendarDateFields, int, UErrorCode&) instead.
+     *
+     * @param field   The time field.
+     * @param up      Indicates if the value of the specified time field is to be rolled
+     *                up or rolled down. Use true if rolling up, false otherwise.
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @stable ICU 2.6.
+     */
+    inline void roll(UCalendarDateFields field, UBool up, UErrorCode& status);
+
+    /**
+     * Time Field Rolling function. Rolls by the given amount on the given
+     * time field. For example, to roll the current date up by one day, call
+     * roll(Calendar::DATE, +1, status). When rolling on the month or
+     * Calendar::MONTH field, other fields like date might conflict and, need to be
+     * changed. For instance, rolling the month up on the date 01/31/96 will result in
+     * 02/29/96.  Rolling by a positive value always means rolling forward in time;
+     * e.g., rolling the year by +1 on "100 BC" will result in "99 BC", for Gregorian
+     * calendar. When rolling on the hour-in-day or Calendar::HOUR_OF_DAY field, it will
+     * roll the hour value in the range between 0 and 23, which is zero-based.
+     * <P>
+     * The only difference between roll() and add() is that roll() does not change
+     * the value of more significant fields when it reaches the minimum or maximum
+     * of its range, whereas add() does.
+     *
+     * @param field   The time field.
+     * @param amount  Indicates amount to roll.
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid, this will be set to
+     *                an error status.
+     * @deprecated ICU 2.6. Use roll(UCalendarDateFields field, int32_t amount, UErrorCode& status) instead.
+     */
+    virtual void roll(EDateFields field, int32_t amount, UErrorCode& status);
+
+    /**
+     * Time Field Rolling function. Rolls by the given amount on the given
+     * time field. For example, to roll the current date up by one day, call
+     * roll(Calendar::DATE, +1, status). When rolling on the month or
+     * Calendar::MONTH field, other fields like date might conflict and, need to be
+     * changed. For instance, rolling the month up on the date 01/31/96 will result in
+     * 02/29/96.  Rolling by a positive value always means rolling forward in time;
+     * e.g., rolling the year by +1 on "100 BC" will result in "99 BC", for Gregorian
+     * calendar. When rolling on the hour-in-day or Calendar::HOUR_OF_DAY field, it will
+     * roll the hour value in the range between 0 and 23, which is zero-based.
+     * <P>
+     * The only difference between roll() and add() is that roll() does not change
+     * the value of more significant fields when it reaches the minimum or maximum
+     * of its range, whereas add() does.
+     *
+     * @param field   The time field.
+     * @param amount  Indicates amount to roll.
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid, this will be set to
+     *                an error status.
+     * @stable ICU 2.6.
+     */
+    virtual void roll(UCalendarDateFields field, int32_t amount, UErrorCode& status);
+
+    /**
+     * Return the difference between the given time and the time this
+     * calendar object is set to.  If this calendar is set
+     * <em>before</em> the given time, the returned value will be
+     * positive.  If this calendar is set <em>after</em> the given
+     * time, the returned value will be negative.  The
+     * <code>field</code> parameter specifies the units of the return
+     * value.  For example, if <code>fieldDifference(when,
+     * Calendar::MONTH)</code> returns 3, then this calendar is set to
+     * 3 months before <code>when</code>, and possibly some addition
+     * time less than one month.
+     *
+     * <p>As a side effect of this call, this calendar is advanced
+     * toward <code>when</code> by the given amount.  That is, calling
+     * this method has the side effect of calling <code>add(field,
+     * n)</code>, where <code>n</code> is the return value.
+     *
+     * <p>Usage: To use this method, call it first with the largest
+     * field of interest, then with progressively smaller fields.  For
+     * example:
+     *
+     * <pre>
+     * int y = cal->fieldDifference(when, Calendar::YEAR, err);
+     * int m = cal->fieldDifference(when, Calendar::MONTH, err);
+     * int d = cal->fieldDifference(when, Calendar::DATE, err);</pre>
+     *
+     * computes the difference between <code>cal</code> and
+     * <code>when</code> in years, months, and days.
+     *
+     * <p>Note: <code>fieldDifference()</code> is
+     * <em>asymmetrical</em>.  That is, in the following code:
+     *
+     * <pre>
+     * cal->setTime(date1, err);
+     * int m1 = cal->fieldDifference(date2, Calendar::MONTH, err);
+     * int d1 = cal->fieldDifference(date2, Calendar::DATE, err);
+     * cal->setTime(date2, err);
+     * int m2 = cal->fieldDifference(date1, Calendar::MONTH, err);
+     * int d2 = cal->fieldDifference(date1, Calendar::DATE, err);</pre>
+     *
+     * one might expect that <code>m1 == -m2 && d1 == -d2</code>.
+     * However, this is not generally the case, because of
+     * irregularities in the underlying calendar system (e.g., the
+     * Gregorian calendar has a varying number of days per month).
+     *
+     * @param when the date to compare this calendar's time to
+     * @param field the field in which to compute the result
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid, this will be set to
+     *                an error status.
+     * @return the difference, either positive or negative, between
+     * this calendar's time and <code>when</code>, in terms of
+     * <code>field</code>.
+     * @deprecated ICU 2.6. Use fieldDifference(UDate when, UCalendarDateFields field, UErrorCode& status).
+     */
+    virtual int32_t fieldDifference(UDate when, EDateFields field, UErrorCode& status);
+
+    /**
+     * Return the difference between the given time and the time this
+     * calendar object is set to.  If this calendar is set
+     * <em>before</em> the given time, the returned value will be
+     * positive.  If this calendar is set <em>after</em> the given
+     * time, the returned value will be negative.  The
+     * <code>field</code> parameter specifies the units of the return
+     * value.  For example, if <code>fieldDifference(when,
+     * Calendar::MONTH)</code> returns 3, then this calendar is set to
+     * 3 months before <code>when</code>, and possibly some addition
+     * time less than one month.
+     *
+     * <p>As a side effect of this call, this calendar is advanced
+     * toward <code>when</code> by the given amount.  That is, calling
+     * this method has the side effect of calling <code>add(field,
+     * n)</code>, where <code>n</code> is the return value.
+     *
+     * <p>Usage: To use this method, call it first with the largest
+     * field of interest, then with progressively smaller fields.  For
+     * example:
+     *
+     * <pre>
+     * int y = cal->fieldDifference(when, Calendar::YEAR, err);
+     * int m = cal->fieldDifference(when, Calendar::MONTH, err);
+     * int d = cal->fieldDifference(when, Calendar::DATE, err);</pre>
+     *
+     * computes the difference between <code>cal</code> and
+     * <code>when</code> in years, months, and days.
+     *
+     * <p>Note: <code>fieldDifference()</code> is
+     * <em>asymmetrical</em>.  That is, in the following code:
+     *
+     * <pre>
+     * cal->setTime(date1, err);
+     * int m1 = cal->fieldDifference(date2, Calendar::MONTH, err);
+     * int d1 = cal->fieldDifference(date2, Calendar::DATE, err);
+     * cal->setTime(date2, err);
+     * int m2 = cal->fieldDifference(date1, Calendar::MONTH, err);
+     * int d2 = cal->fieldDifference(date1, Calendar::DATE, err);</pre>
+     *
+     * one might expect that <code>m1 == -m2 && d1 == -d2</code>.
+     * However, this is not generally the case, because of
+     * irregularities in the underlying calendar system (e.g., the
+     * Gregorian calendar has a varying number of days per month).
+     *
+     * @param when the date to compare this calendar's time to
+     * @param field the field in which to compute the result
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid, this will be set to
+     *                an error status.
+     * @return the difference, either positive or negative, between
+     * this calendar's time and <code>when</code>, in terms of
+     * <code>field</code>.
+     * @stable ICU 2.6.
+     */
+    virtual int32_t fieldDifference(UDate when, UCalendarDateFields field, UErrorCode& status);
+
+    /**
+     * Sets the calendar's time zone to be the one passed in. The Calendar takes ownership
+     * of the TimeZone; the caller is no longer responsible for deleting it.  If the
+     * given time zone is NULL, this function has no effect.
+     *
+     * @param value  The given time zone.
+     * @stable ICU 2.0
+     */
+    void adoptTimeZone(TimeZone* value);
+
+    /**
+     * Sets the calendar's time zone to be the same as the one passed in. The TimeZone
+     * passed in is _not_ adopted; the client is still responsible for deleting it.
+     *
+     * @param zone  The given time zone.
+     * @stable ICU 2.0
+     */
+    void setTimeZone(const TimeZone& zone);
+
+    /**
+     * Returns a reference to the time zone owned by this calendar. The returned reference
+     * is only valid until clients make another call to adoptTimeZone or setTimeZone,
+     * or this Calendar is destroyed.
+     *
+     * @return   The time zone object associated with this calendar.
+     * @stable ICU 2.0
+     */
+    const TimeZone& getTimeZone(void) const;
+
+    /**
+     * Returns the time zone owned by this calendar. The caller owns the returned object
+     * and must delete it when done.  After this call, the new time zone associated
+     * with this Calendar is the default TimeZone as returned by TimeZone::createDefault().
+     *
+     * @return   The time zone object which was associated with this calendar.
+     * @stable ICU 2.0
+     */
+    TimeZone* orphanTimeZone(void);
+
+    /**
+     * Queries if the current date for this Calendar is in Daylight Savings Time.
+     *
+     * @param status Fill-in parameter which receives the status of this operation.
+     * @return   True if the current date for this Calendar is in Daylight Savings Time,
+     *           false, otherwise.
+     * @stable ICU 2.0
+     */
+    virtual UBool inDaylightTime(UErrorCode& status) const = 0;
+
+    /**
+     * Specifies whether or not date/time interpretation is to be lenient. With lenient
+     * interpretation, a date such as "February 942, 1996" will be treated as being
+     * equivalent to the 941st day after February 1, 1996. With strict interpretation,
+     * such dates will cause an error when computing time from the time field values
+     * representing the dates.
+     *
+     * @param lenient  True specifies date/time interpretation to be lenient.
+     *
+     * @see            DateFormat#setLenient
+     * @stable ICU 2.0
+     */
+    void setLenient(UBool lenient);
+
+    /**
+     * Tells whether date/time interpretation is to be lenient.
+     *
+     * @return   True tells that date/time interpretation is to be lenient.
+     * @stable ICU 2.0
+     */
+    UBool isLenient(void) const;
+
+    /**
+     * Sets what the first day of the week is; e.g., Sunday in US, Monday in France.
+     *
+     * @param value  The given first day of the week.
+     * @deprecated ICU 2.6. Use setFirstDayOfWeek(UCalendarDaysOfWeek value) instead.
+     */
+    void setFirstDayOfWeek(EDaysOfWeek value);
+
+    /**
+     * Sets what the first day of the week is; e.g., Sunday in US, Monday in France.
+     *
+     * @param value  The given first day of the week.
+     * @stable ICU 2.6.
+     */
+    void setFirstDayOfWeek(UCalendarDaysOfWeek value);
+
+    /**
+     * Gets what the first day of the week is; e.g., Sunday in US, Monday in France.
+     *
+     * @return   The first day of the week.
+     * @deprecated ICU 2.6 use the overload with error code
+     */
+    EDaysOfWeek getFirstDayOfWeek(void) const;
+
+    /**
+     * Gets what the first day of the week is; e.g., Sunday in US, Monday in France.
+     *
+     * @param status error code
+     * @return   The first day of the week.
+     * @stable ICU 2.6
+     */
+    UCalendarDaysOfWeek getFirstDayOfWeek(UErrorCode &status) const;
+
+    /**
+     * Sets what the minimal days required in the first week of the year are; For
+     * example, if the first week is defined as one that contains the first day of the
+     * first month of a year, call the method with value 1. If it must be a full week,
+     * use value 7.
+     *
+     * @param value  The given minimal days required in the first week of the year.
+     * @stable ICU 2.0
+     */
+    void setMinimalDaysInFirstWeek(uint8_t value);
+
+    /**
+     * Gets what the minimal days required in the first week of the year are; e.g., if
+     * the first week is defined as one that contains the first day of the first month
+     * of a year, getMinimalDaysInFirstWeek returns 1. If the minimal days required must
+     * be a full week, getMinimalDaysInFirstWeek returns 7.
+     *
+     * @return   The minimal days required in the first week of the year.
+     * @stable ICU 2.0
+     */
+    uint8_t getMinimalDaysInFirstWeek(void) const;
+
+    /**
+     * Gets the minimum value for the given time field. e.g., for Gregorian
+     * DAY_OF_MONTH, 1.
+     *
+     * @param field  The given time field.
+     * @return       The minimum value for the given time field.
+     * @deprecated ICU 2.6. Use getMinimum(UCalendarDateFields field) instead.
+     */
+    virtual int32_t getMinimum(EDateFields field) const;
+
+    /**
+     * Gets the minimum value for the given time field. e.g., for Gregorian
+     * DAY_OF_MONTH, 1.
+     *
+     * @param field  The given time field.
+     * @return       The minimum value for the given time field.
+     * @stable ICU 2.6.
+     */
+    virtual int32_t getMinimum(UCalendarDateFields field) const;
+
+    /**
+     * Gets the maximum value for the given time field. e.g. for Gregorian DAY_OF_MONTH,
+     * 31.
+     *
+     * @param field  The given time field.
+     * @return       The maximum value for the given time field.
+     * @deprecated ICU 2.6. Use getMaximum(UCalendarDateFields field) instead.
+     */
+    virtual int32_t getMaximum(EDateFields field) const;
+
+    /**
+     * Gets the maximum value for the given time field. e.g. for Gregorian DAY_OF_MONTH,
+     * 31.
+     *
+     * @param field  The given time field.
+     * @return       The maximum value for the given time field.
+     * @stable ICU 2.6.
+     */
+    virtual int32_t getMaximum(UCalendarDateFields field) const;
+
+    /**
+     * Gets the highest minimum value for the given field if varies. Otherwise same as
+     * getMinimum(). For Gregorian, no difference.
+     *
+     * @param field  The given time field.
+     * @return       The highest minimum value for the given time field.
+     * @deprecated ICU 2.6. Use getGreatestMinimum(UCalendarDateFields field) instead.
+     */
+    virtual int32_t getGreatestMinimum(EDateFields field) const;
+
+    /**
+     * Gets the highest minimum value for the given field if varies. Otherwise same as
+     * getMinimum(). For Gregorian, no difference.
+     *
+     * @param field  The given time field.
+     * @return       The highest minimum value for the given time field.
+     * @stable ICU 2.6.
+     */
+    virtual int32_t getGreatestMinimum(UCalendarDateFields field) const;
+
+    /**
+     * Gets the lowest maximum value for the given field if varies. Otherwise same as
+     * getMaximum(). e.g., for Gregorian DAY_OF_MONTH, 28.
+     *
+     * @param field  The given time field.
+     * @return       The lowest maximum value for the given time field.
+     * @deprecated ICU 2.6. Use getLeastMaximum(UCalendarDateFields field) instead.
+     */
+    virtual int32_t getLeastMaximum(EDateFields field) const;
+
+    /**
+     * Gets the lowest maximum value for the given field if varies. Otherwise same as
+     * getMaximum(). e.g., for Gregorian DAY_OF_MONTH, 28.
+     *
+     * @param field  The given time field.
+     * @return       The lowest maximum value for the given time field.
+     * @stable ICU 2.6.
+     */
+    virtual int32_t getLeastMaximum(UCalendarDateFields field) const;
+
+    /**
+     * Return the minimum value that this field could have, given the current date.
+     * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum().
+     *
+     * The version of this function on Calendar uses an iterative algorithm to determine the
+     * actual minimum value for the field.  There is almost always a more efficient way to
+     * accomplish this (in most cases, you can simply return getMinimum()).  GregorianCalendar
+     * overrides this function with a more efficient implementation.
+     *
+     * @param field    the field to determine the minimum of
+     * @param status   Fill-in parameter which receives the status of this operation.
+     * @return         the minimum of the given field for the current date of this Calendar
+     * @deprecated ICU 2.6. Use getActualMinimum(UCalendarDateFields field, UErrorCode& status) instead.
+     */
+    int32_t getActualMinimum(EDateFields field, UErrorCode& status) const;
+
+    /**
+     * Return the minimum value that this field could have, given the current date.
+     * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum().
+     *
+     * The version of this function on Calendar uses an iterative algorithm to determine the
+     * actual minimum value for the field.  There is almost always a more efficient way to
+     * accomplish this (in most cases, you can simply return getMinimum()).  GregorianCalendar
+     * overrides this function with a more efficient implementation.
+     *
+     * @param field    the field to determine the minimum of
+     * @param status   Fill-in parameter which receives the status of this operation.
+     * @return         the minimum of the given field for the current date of this Calendar
+     * @stable ICU 2.6.
+     */
+    virtual int32_t getActualMinimum(UCalendarDateFields field, UErrorCode& status) const;
+
+    /**
+     * Return the maximum value that this field could have, given the current date.
+     * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual
+     * maximum would be 28; for "Feb 3, 1996" it s 29.  Similarly for a Hebrew calendar,
+     * for some years the actual maximum for MONTH is 12, and for others 13.
+     *
+     * The version of this function on Calendar uses an iterative algorithm to determine the
+     * actual maximum value for the field.  There is almost always a more efficient way to
+     * accomplish this (in most cases, you can simply return getMaximum()).  GregorianCalendar
+     * overrides this function with a more efficient implementation.
+     *
+     * @param field    the field to determine the maximum of
+     * @param status   Fill-in parameter which receives the status of this operation.
+     * @return         the maximum of the given field for the current date of this Calendar
+     * @deprecated ICU 2.6. Use getActualMaximum(UCalendarDateFields field, UErrorCode& status) instead.
+     */
+    int32_t getActualMaximum(EDateFields field, UErrorCode& status) const;
+
+    /**
+     * Return the maximum value that this field could have, given the current date.
+     * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual
+     * maximum would be 28; for "Feb 3, 1996" it s 29.  Similarly for a Hebrew calendar,
+     * for some years the actual maximum for MONTH is 12, and for others 13.
+     *
+     * The version of this function on Calendar uses an iterative algorithm to determine the
+     * actual maximum value for the field.  There is almost always a more efficient way to
+     * accomplish this (in most cases, you can simply return getMaximum()).  GregorianCalendar
+     * overrides this function with a more efficient implementation.
+     *
+     * @param field    the field to determine the maximum of
+     * @param status   Fill-in parameter which receives the status of this operation.
+     * @return         the maximum of the given field for the current date of this Calendar
+     * @stable ICU 2.6.
+     */
+    virtual int32_t getActualMaximum(UCalendarDateFields field, UErrorCode& status) const;
+
+    /**
+     * Gets the value for a given time field. Recalculate the current time field values
+     * if the time value has been changed by a call to setTime(). Return zero for unset
+     * fields if any fields have been explicitly set by a call to set(). To force a
+     * recomputation of all fields regardless of the previous state, call complete().
+     * This method is semantically const, but may alter the object in memory.
+     *
+     * @param field  The given time field.
+     * @param status Fill-in parameter which receives the status of the operation.
+     * @return       The value for the given time field, or zero if the field is unset,
+     *               and set() has been called for any other field.
+     * @deprecated ICU 2.6. Use get(UCalendarDateFields field, UErrorCode& status) instead.
+     */
+    int32_t get(EDateFields field, UErrorCode& status) const;
+
+    /**
+     * Gets the value for a given time field. Recalculate the current time field values
+     * if the time value has been changed by a call to setTime(). Return zero for unset
+     * fields if any fields have been explicitly set by a call to set(). To force a
+     * recomputation of all fields regardless of the previous state, call complete().
+     * This method is semantically const, but may alter the object in memory.
+     *
+     * @param field  The given time field.
+     * @param status Fill-in parameter which receives the status of the operation.
+     * @return       The value for the given time field, or zero if the field is unset,
+     *               and set() has been called for any other field.
+     * @stable ICU 2.6.
+     */
+    int32_t get(UCalendarDateFields field, UErrorCode& status) const;
+
+    /**
+     * Determines if the given time field has a value set. This can affect in the
+     * resolving of time in Calendar. Unset fields have a value of zero, by definition.
+     *
+     * @param field  The given time field.
+     * @return   True if the given time field has a value set; false otherwise.
+     * @deprecated ICU 2.6. Use isSet(UCalendarDateFields field) instead.
+     */
+    UBool isSet(EDateFields field) const;
+
+    /**
+     * Determines if the given time field has a value set. This can affect in the
+     * resolving of time in Calendar. Unset fields have a value of zero, by definition.
+     *
+     * @param field  The given time field.
+     * @return   True if the given time field has a value set; false otherwise.
+     * @stable ICU 2.6.
+     */
+    UBool isSet(UCalendarDateFields field) const;
+
+    /**
+     * Sets the given time field with the given value.
+     *
+     * @param field  The given time field.
+     * @param value  The value to be set for the given time field.
+     * @deprecated ICU 2.6. Use set(UCalendarDateFields field, int32_t value) instead.
+     */
+    void set(EDateFields field, int32_t value);
+
+    /**
+     * Sets the given time field with the given value.
+     *
+     * @param field  The given time field.
+     * @param value  The value to be set for the given time field.
+     * @stable ICU 2.6.
+     */
+    void set(UCalendarDateFields field, int32_t value);
+
+    /**
+     * Sets the values for the fields YEAR, MONTH, and DATE. Other field values are
+     * retained; call clear() first if this is not desired.
+     *
+     * @param year   The value used to set the YEAR time field.
+     * @param month  The value used to set the MONTH time field. Month value is 0-based.
+     *               e.g., 0 for January.
+     * @param date   The value used to set the DATE time field.
+     * @stable ICU 2.0
+     */
+    void set(int32_t year, int32_t month, int32_t date);
+
+    /**
+     * Sets the values for the fields YEAR, MONTH, DATE, HOUR_OF_DAY, and MINUTE. Other
+     * field values are retained; call clear() first if this is not desired.
+     *
+     * @param year    The value used to set the YEAR time field.
+     * @param month   The value used to set the MONTH time field. Month value is
+     *                0-based. E.g., 0 for January.
+     * @param date    The value used to set the DATE time field.
+     * @param hour    The value used to set the HOUR_OF_DAY time field.
+     * @param minute  The value used to set the MINUTE time field.
+     * @stable ICU 2.0
+     */
+    void set(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute);
+
+    /**
+     * Sets the values for the fields YEAR, MONTH, DATE, HOUR_OF_DAY, MINUTE, and SECOND.
+     * Other field values are retained; call clear() first if this is not desired.
+     *
+     * @param year    The value used to set the YEAR time field.
+     * @param month   The value used to set the MONTH time field. Month value is
+     *                0-based. E.g., 0 for January.
+     * @param date    The value used to set the DATE time field.
+     * @param hour    The value used to set the HOUR_OF_DAY time field.
+     * @param minute  The value used to set the MINUTE time field.
+     * @param second  The value used to set the SECOND time field.
+     * @stable ICU 2.0
+     */
+    void set(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute, int32_t second);
+
+    /**
+     * Clears the values of all the time fields, making them both unset and assigning
+     * them a value of zero. The field values will be determined during the next
+     * resolving of time into time fields.
+     * @stable ICU 2.0
+     */
+    void clear(void);
+
+    /**
+     * Clears the value in the given time field, both making it unset and assigning it a
+     * value of zero. This field value will be determined during the next resolving of
+     * time into time fields.
+     *
+     * @param field  The time field to be cleared.
+     * @deprecated ICU 2.6. Use clear(UCalendarDateFields field) instead.
+     */
+    void clear(EDateFields field);
+
+    /**
+     * Clears the value in the given time field, both making it unset and assigning it a
+     * value of zero. This field value will be determined during the next resolving of
+     * time into time fields.
+     *
+     * @param field  The time field to be cleared.
+     * @stable ICU 2.6.
+     */
+    void clear(UCalendarDateFields field);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual method. This method is to
+     * implement a simple version of RTTI, since not all C++ compilers support genuine
+     * RTTI. Polymorphic operator==() and clone() methods call this method.
+     * <P>
+     * Concrete subclasses of Calendar must implement getDynamicClassID() and also a
+     * static method and data member:
+     *
+     *      static UClassID getStaticClassID() { return (UClassID)&amp;fgClassID; }
+     *      static char fgClassID;
+     *
+     * @return   The class ID for this object. All objects of a given class have the
+     *           same class ID. Objects of other classes have different class IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const = 0;
+
+    /**
+     * Returns the resource key string used for this calendar type.
+     * For example, prepending "Eras_" to this string could return "Eras_japanese"
+     * or "Eras_gregorian".
+     *
+     * @returns static string, for example, "gregorian" or "japanese"
+     * @internal
+     */
+    virtual const char * getType() const = 0;
+
+protected:
+
+     /**
+      * Constructs a Calendar with the default time zone as returned by
+      * TimeZone::createInstance(), and the default locale.
+      *
+      * @param success  Indicates the status of Calendar object construction. Returns
+      *                 U_ZERO_ERROR if constructed successfully.
+     * @stable ICU 2.0
+      */
+    Calendar(UErrorCode& success);
+
+    /**
+     * Copy constructor
+     *
+     * @param source    Calendar object to be copied from
+     * @stable ICU 2.0
+     */
+    Calendar(const Calendar& source);
+
+    /**
+     * Default assignment operator
+     *
+     * @param right    Calendar object to be copied
+     * @stable ICU 2.0
+     */
+    Calendar& operator=(const Calendar& right);
+
+    /**
+     * Constructs a Calendar with the given time zone and locale. Clients are no longer
+     * responsible for deleting the given time zone object after it's adopted.
+     *
+     * @param zone     The given time zone.
+     * @param aLocale  The given locale.
+     * @param success  Indicates the status of Calendar object construction. Returns
+     *                 U_ZERO_ERROR if constructed successfully.
+     * @stable ICU 2.0
+     */
+    Calendar(TimeZone* zone, const Locale& aLocale, UErrorCode& success);
+
+    /**
+     * Constructs a Calendar with the given time zone and locale.
+     *
+     * @param zone     The given time zone.
+     * @param aLocale  The given locale.
+     * @param success  Indicates the status of Calendar object construction. Returns
+     *                 U_ZERO_ERROR if constructed successfully.
+     * @stable ICU 2.0
+     */
+    Calendar(const TimeZone& zone, const Locale& aLocale, UErrorCode& success);
+
+    /**
+     * Converts Calendar's time field values to GMT as milliseconds.
+     *
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @stable ICU 2.0
+     */
+    virtual void computeTime(UErrorCode& status);
+
+    /**
+     * Converts GMT as milliseconds to time field values. This allows you to sync up the
+     * time field values with a new time that is set for the calendar.  This method
+     * does NOT recompute the time first; to recompute the time, then the fields, use
+     * the method complete().
+     *
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @stable ICU 2.0
+     */
+    virtual void computeFields(UErrorCode& status);
+
+    /**
+     * Gets this Calendar's current time as a long.
+     *
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @return the current time as UTC milliseconds from the epoch.
+     * @stable ICU 2.0
+     */
+    double getTimeInMillis(UErrorCode& status) const;
+
+    /**
+     * Sets this Calendar's current time from the given long value.
+     * @param millis  the new time in UTC milliseconds from the epoch.
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @stable ICU 2.0
+     */
+    void setTimeInMillis( double millis, UErrorCode& status );
+
+    /**
+     * Recomputes the current time from currently set fields, and then fills in any
+     * unset fields in the time field list.
+     *
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     * @stable ICU 2.0
+     */
+    void complete(UErrorCode& status);
+
+    /**
+     * Gets the value for a given time field. Subclasses can use this function to get
+     * field values without forcing recomputation of time.
+     *
+     * @param field  The given time field.
+     * @return       The value for the given time field.
+     * @deprecated ICU 2.6. Use internalGet(UCalendarDateFields field) instead.
+     */
+    inline int32_t internalGet(EDateFields field) const {return fFields[field];}
+
+    /**
+     * Gets the value for a given time field. Subclasses can use this function to get
+     * field values without forcing recomputation of time. If the field's stamp is UNSET,
+     * the defaultValue is used.
+     *
+     * @param field  The given time field.
+     * @param defaultValue a default value used if the field is unset.
+     * @return       The value for the given time field.
+     * @internal
+     */
+    inline int32_t internalGet(UCalendarDateFields field, int32_t defaultValue) const {return fStamp[field]>kUnset ? fFields[field] : defaultValue;}
+
+    /**
+     * Gets the value for a given time field. Subclasses can use this function to get
+     * field values without forcing recomputation of time.
+     *
+     * @param field  The given time field.
+     * @return       The value for the given time field.
+     * @internal
+     */
+    inline int32_t internalGet(UCalendarDateFields field) const {return fFields[field];}
+
+    /**
+     * Sets the value for a given time field.  This is a fast internal method for
+     * subclasses.  It does not affect the areFieldsInSync, isTimeSet, or areAllFieldsSet
+     * flags.
+     *
+     * @param field    The given time field.
+     * @param value    The value for the given time field.
+     * @deprecated ICU 2.6. Use internalSet(UCalendarDateFields field, int32_t value) instead.
+     */
+    void internalSet(EDateFields field, int32_t value);
+
+    /**
+     * Sets the value for a given time field.  This is a fast internal method for
+     * subclasses.  It does not affect the areFieldsInSync, isTimeSet, or areAllFieldsSet
+     * flags.
+     *
+     * @param field    The given time field.
+     * @param value    The value for the given time field.
+     * @stable ICU 2.6.
+     */
+    inline void internalSet(UCalendarDateFields field, int32_t value);
+
+    /**
+     * Prepare this calendar for computing the actual minimum or maximum.
+     * This method modifies this calendar's fields; it is called on a
+     * temporary calendar.
+     * @internal
+     */
+    virtual void prepareGetActual(UCalendarDateFields field, UBool isMinimum, UErrorCode &status);
+
+    /**
+     * Limit enums. Not in sync with UCalendarLimitType (refers to internal fields).
+     * @internal
+     */
+    enum ELimitType {
+      UCAL_LIMIT_MINIMUM = 0,
+      UCAL_LIMIT_GREATEST_MINIMUM,
+      UCAL_LIMIT_LEAST_MAXIMUM,
+      UCAL_LIMIT_MAXIMUM,
+      UCAL_LIMIT_COUNT
+    };
+
+    /**
+     * Subclass API for defining limits of different types.
+     * Subclasses must implement this method to return limits for the
+     * following fields:
+     *
+     * <pre>UCAL_ERA
+     * UCAL_YEAR
+     * UCAL_MONTH
+     * UCAL_WEEK_OF_YEAR
+     * UCAL_WEEK_OF_MONTH
+     * UCAL_DATE (DAY_OF_MONTH on Java)
+     * UCAL_DAY_OF_YEAR
+     * UCAL_DAY_OF_WEEK_IN_MONTH
+     * UCAL_YEAR_WOY
+     * UCAL_EXTENDED_YEAR</pre>
+     *
+     * @param field one of the above field numbers
+     * @param limitType one of <code>MINIMUM</code>, <code>GREATEST_MINIMUM</code>,
+     * <code>LEAST_MAXIMUM</code>, or <code>MAXIMUM</code>
+     * @internal
+     */
+    virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const = 0;
+
+    /**
+     * Return a limit for a field.
+     * @param field the field, from <code>0..UCAL_MAX_FIELD</code>
+     * @param limitType the type specifier for the limit
+     * @see #ELimitType
+     * @internal
+     */
+    virtual int32_t getLimit(UCalendarDateFields field, ELimitType limitType) const;
+
+
+    /**
+     * Return the Julian day number of day before the first day of the
+     * given month in the given extended year.  Subclasses should override
+     * this method to implement their calendar system.
+     * @param eyear the extended year
+     * @param month the zero-based month, or 0 if useMonth is false
+     * @param useMonth if false, compute the day before the first day of
+     * the given year, otherwise, compute the day before the first day of
+     * the given month
+     * @return the Julian day number of the day before the first
+     * day of the given month and year
+     * @internal
+     */
+    virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month,
+                                                   UBool useMonth) const  = 0;
+
+    /**
+     * Return the number of days in the given month of the given extended
+     * year of this calendar system.  Subclasses should override this
+     * method if they can provide a more correct or more efficient
+     * implementation than the default implementation in Calendar.
+     * @internal
+     */
+    virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const ;
+
+    /**
+     * Return the number of days in the given extended year of this
+     * calendar system.  Subclasses should override this method if they can
+     * provide a more correct or more efficient implementation than the
+     * default implementation in Calendar.
+     * @stable ICU 2.0
+     */
+    virtual int32_t handleGetYearLength(int32_t eyear) const;
+
+
+    /**
+     * Return the extended year defined by the current fields.  This will
+     * use the UCAL_EXTENDED_YEAR field or the UCAL_YEAR and supra-year fields (such
+     * as UCAL_ERA) specific to the calendar system, depending on which set of
+     * fields is newer.
+     * @return the extended year
+     * @internal
+     */
+    virtual int32_t handleGetExtendedYear() = 0;
+
+    /**
+     * Subclasses may override this.  This method calls
+     * handleGetMonthLength() to obtain the calendar-specific month
+     * length.
+     * @param bestField which field to use to calculate the date
+     * @return julian day specified by calendar fields.
+     * @internal
+     */
+    virtual int32_t handleComputeJulianDay(UCalendarDateFields bestField);
+
+    /**
+     * Subclasses must override this to convert from week fields
+     * (YEAR_WOY and WEEK_OF_YEAR) to an extended year in the case
+     * where YEAR, EXTENDED_YEAR are not set.
+     * The Calendar implementation assumes yearWoy is in extended gregorian form
+     * @internal
+     * @return the extended year, UCAL_EXTENDED_YEAR
+     */
+    virtual int32_t handleGetExtendedYearFromWeekFields(int32_t yearWoy, int32_t woy);
+
+    /**
+     * Compute the Julian day from fields.  Will determine whether to use
+     * the JULIAN_DAY field directly, or other fields.
+     * @return the julian day
+     * @internal
+     */
+    int32_t computeJulianDay();
+
+    /**
+     * Compute the milliseconds in the day from the fields.  This is a
+     * value from 0 to 23:59:59.999 inclusive, unless fields are out of
+     * range, in which case it can be an arbitrary value.  This value
+     * reflects local zone wall time.
+     * @internal
+     */
+    int32_t computeMillisInDay();
+
+    /**
+     * This method can assume EXTENDED_YEAR has been set.
+     * @param millis milliseconds of the date fields
+     * @param millisInDay milliseconds of the time fields; may be out
+     * or range.
+     * @param ec Output param set to failure code on function return
+     *          when this function fails.
+     * @internal
+     */
+    int32_t computeZoneOffset(double millis, int32_t millisInDay, UErrorCode &ec);
+
+
+    /**
+     * Determine the best stamp in a range.
+     * @param start first enum to look at
+     * @param end last enum to look at
+     * @param bestSoFar stamp prior to function call
+     * @return the stamp value of the best stamp
+     * @internal
+     */
+    int32_t newestStamp(UCalendarDateFields start, UCalendarDateFields end, int32_t bestSoFar) const;
+
+    /**
+     * Values for field resolution tables
+     * @see #resolveFields
+     * @internal
+     */
+    enum {
+      /** Marker for end of resolve set (row or group). */
+      kResolveSTOP = -1,
+      /** Value to be bitwised "ORed" against resolve table field values for remapping.  Example: (UCAL_DATE | kResolveRemap) in 1st column will cause 'UCAL_DATE' to be returned, but will not examine the value of UCAL_DATE.  */
+      kResolveRemap = 32
+    };
+
+    /**
+     * Precedence table for Dates
+     * @see #resolveFields
+     * @internal
+     */
+    static const UFieldResolutionTable kDatePrecedence[];
+
+    /**
+     * Precedence table for Year
+     * @see #resolveFields
+     * @internal
+     */
+    static const UFieldResolutionTable kYearPrecedence[];
+
+    /**
+     * Precedence table for Day of Week
+     * @see #resolveFields
+     * @internal
+     */
+    static const UFieldResolutionTable kDOWPrecedence[];
+
+    /**
+     * Given a precedence table, return the newest field combination in
+     * the table, or UCAL_FIELD_COUNT if none is found.
+     *
+     * <p>The precedence table is a 3-dimensional array of integers.  It
+     * may be thought of as an array of groups.  Each group is an array of
+     * lines.  Each line is an array of field numbers.  Within a line, if
+     * all fields are set, then the time stamp of the line is taken to be
+     * the stamp of the most recently set field.  If any field of a line is
+     * unset, then the line fails to match.  Within a group, the line with
+     * the newest time stamp is selected.  The first field of the line is
+     * returned to indicate which line matched.
+     *
+     * <p>In some cases, it may be desirable to map a line to field that
+     * whose stamp is NOT examined.  For example, if the best field is
+     * DAY_OF_WEEK then the DAY_OF_WEEK_IN_MONTH algorithm may be used.  In
+     * order to do this, insert the value <code>kResolveRemap | F</code> at
+     * the start of the line, where <code>F</code> is the desired return
+     * field value.  This field will NOT be examined; it only determines
+     * the return value if the other fields in the line are the newest.
+     *
+     * <p>If all lines of a group contain at least one unset field, then no
+     * line will match, and the group as a whole will fail to match.  In
+     * that case, the next group will be processed.  If all groups fail to
+     * match, then UCAL_FIELD_COUNT is returned.
+     * @internal
+     */
+    UCalendarDateFields resolveFields(const UFieldResolutionTable *precedenceTable);
+
+
+    /**
+     * @internal
+     */
+    virtual const UFieldResolutionTable* getFieldResolutionTable() const;
+
+    /**
+     * Return the field that is newer, either defaultField, or
+     * alternateField.  If neither is newer or neither is set, return defaultField.
+     * @internal
+     */
+    UCalendarDateFields newerField(UCalendarDateFields defaultField, UCalendarDateFields alternateField) const;
+
+
+private:
+    /**
+     * Helper function for calculating limits by trial and error
+     * @param field The field being investigated
+     * @param startValue starting (least max) value of field
+     * @param endValue ending (greatest max) value of field
+     * @param status return type
+     * @internal
+     */
+    int32_t getActualHelper(UCalendarDateFields field, int32_t startValue, int32_t endValue, UErrorCode &status) const;
+
+
+protected:
+    /**
+     * The flag which indicates if the current time is set in the calendar.
+     * @stable ICU 2.0
+     */
+    UBool      fIsTimeSet;
+
+    /**
+     * True if the fields are in sync with the currently set time of this Calendar.
+     * If false, then the next attempt to get the value of a field will
+     * force a recomputation of all fields from the current value of the time
+     * field.
+     * <P>
+     * This should really be named areFieldsInSync, but the old name is retained
+     * for backward compatibility.
+     * @stable ICU 2.0
+     */
+    UBool      fAreFieldsSet;
+
+    /**
+     * True if all of the fields have been set.  This is initially false, and set to
+     * true by computeFields().
+     * @stable ICU 2.0
+     */
+    UBool      fAreAllFieldsSet;
+
+    /**
+     * True if all fields have been virtually set, but have not yet been
+     * computed.  This occurs only in setTimeInMillis().  A calendar set
+     * to this state will compute all fields from the time if it becomes
+     * necessary, but otherwise will delay such computation.
+     * @stable ICU 3.0
+     */
+    UBool fAreFieldsVirtuallySet;
+
+    /**
+     * Get the current time without recomputing.
+     *
+     * @return     the current time without recomputing.
+     * @stable ICU 2.0
+     */
+    UDate        internalGetTime(void) const     { return fTime; }
+
+    /**
+     * Set the current time without affecting flags or fields.
+     *
+     * @param time    The time to be set
+     * @return        the current time without recomputing.
+     * @stable ICU 2.0
+     */
+    void        internalSetTime(UDate time)     { fTime = time; }
+
+    /**
+     * The time fields containing values into which the millis is computed.
+     * @stable ICU 2.0
+     */
+    int32_t     fFields[UCAL_FIELD_COUNT];
+
+    /**
+     * The flags which tell if a specified time field for the calendar is set.
+     * @deprecated ICU 2.8 use (fStamp[n]!=kUnset)
+     */
+    UBool      fIsSet[UCAL_FIELD_COUNT];
+
+    /** Special values of stamp[]
+     * @stable ICU 2.0
+     */
+    enum {
+        kUnset                 = 0,
+        kInternallySet,
+        kMinimumUserStamp
+    };
+
+    /**
+     * Pseudo-time-stamps which specify when each field was set. There
+     * are two special values, UNSET and INTERNALLY_SET. Values from
+     * MINIMUM_USER_SET to Integer.MAX_VALUE are legal user set values.
+     * @stable ICU 2.0
+     */
+    int32_t        fStamp[UCAL_FIELD_COUNT];
+
+    /**
+     * Subclasses may override this method to compute several fields
+     * specific to each calendar system.  These are:
+     *
+     * <ul><li>ERA
+     * <li>YEAR
+     * <li>MONTH
+     * <li>DAY_OF_MONTH
+     * <li>DAY_OF_YEAR
+     * <li>EXTENDED_YEAR</ul>
+     *
+     * Subclasses can refer to the DAY_OF_WEEK and DOW_LOCAL fields, which
+     * will be set when this method is called.  Subclasses can also call
+     * the getGregorianXxx() methods to obtain Gregorian calendar
+     * equivalents for the given Julian day.
+     *
+     * <p>In addition, subclasses should compute any subclass-specific
+     * fields, that is, fields from BASE_FIELD_COUNT to
+     * getFieldCount() - 1.
+     *
+     * <p>The default implementation in <code>Calendar</code> implements
+     * a pure proleptic Gregorian calendar.
+     * @internal
+     */
+    virtual void handleComputeFields(int32_t julianDay, UErrorCode &status);
+
+    /**
+     * Return the extended year on the Gregorian calendar as computed by
+     * <code>computeGregorianFields()</code>.
+     * @internal
+     */
+    int32_t getGregorianYear() const {
+        return fGregorianYear;
+    }
+
+    /**
+     * Return the month (0-based) on the Gregorian calendar as computed by
+     * <code>computeGregorianFields()</code>.
+     * @internal
+     */
+    int32_t getGregorianMonth() const {
+        return fGregorianMonth;
+    }
+
+    /**
+     * Return the day of year (1-based) on the Gregorian calendar as
+     * computed by <code>computeGregorianFields()</code>.
+     * @internal
+     */
+    int32_t getGregorianDayOfYear() const {
+        return fGregorianDayOfYear;
+    }
+
+    /**
+     * Return the day of month (1-based) on the Gregorian calendar as
+     * computed by <code>computeGregorianFields()</code>.
+     * @internal
+     */
+    int32_t getGregorianDayOfMonth() const {
+      return fGregorianDayOfMonth;
+    }
+
+    /**
+     * Called by computeJulianDay.  Returns the default month (0-based) for the year,
+     * taking year and era into account.  Defaults to 0 for Gregorian, which doesn't care.
+     * @internal
+     * @internal
+     */
+    virtual int32_t getDefaultMonthInYear() ;
+
+
+    /**
+     * Called by computeJulianDay.  Returns the default day (1-based) for the month,
+     * taking currently-set year and era into account.  Defaults to 1 for Gregorian.
+     * @internal
+     */
+    virtual int32_t getDefaultDayInMonth(int32_t /*month*/);
+
+    //-------------------------------------------------------------------------
+    // Protected utility methods for use by subclasses.  These are very handy
+    // for implementing add, roll, and computeFields.
+    //-------------------------------------------------------------------------
+
+    /**
+     * Adjust the specified field so that it is within
+     * the allowable range for the date to which this calendar is set.
+     * For example, in a Gregorian calendar pinning the {@link #UCalendarDateFields DAY_OF_MONTH}
+     * field for a calendar set to April 31 would cause it to be set
+     * to April 30.
+     * <p>
+     * <b>Subclassing:</b>
+     * <br>
+     * This utility method is intended for use by subclasses that need to implement
+     * their own overrides of {@link #roll roll} and {@link #add add}.
+     * <p>
+     * <b>Note:</b>
+     * <code>pinField</code> is implemented in terms of
+     * {@link #getActualMinimum getActualMinimum}
+     * and {@link #getActualMaximum getActualMaximum}.  If either of those methods uses
+     * a slow, iterative algorithm for a particular field, it would be
+     * unwise to attempt to call <code>pinField</code> for that field.  If you
+     * really do need to do so, you should override this method to do
+     * something more efficient for that field.
+     * <p>
+     * @param field The calendar field whose value should be pinned.
+     * @param status Output param set to failure code on function return
+     *          when this function fails.
+     *
+     * @see #getActualMinimum
+     * @see #getActualMaximum
+     * @stable ICU 2.0
+     */
+    virtual void pinField(UCalendarDateFields field, UErrorCode& status);
+
+    /**
+     * Return the week number of a day, within a period. This may be the week number in
+     * a year or the week number in a month. Usually this will be a value >= 1, but if
+     * some initial days of the period are excluded from week 1, because
+     * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} is > 1, then
+     * the week number will be zero for those
+     * initial days. This method requires the day number and day of week for some
+     * known date in the period in order to determine the day of week
+     * on the desired day.
+     * <p>
+     * <b>Subclassing:</b>
+     * <br>
+     * This method is intended for use by subclasses in implementing their
+     * {@link #computeTime computeTime} and/or {@link #computeFields computeFields} methods.
+     * It is often useful in {@link #getActualMinimum getActualMinimum} and
+     * {@link #getActualMaximum getActualMaximum} as well.
+     * <p>
+     * This variant is handy for computing the week number of some other
+     * day of a period (often the first or last day of the period) when its day
+     * of the week is not known but the day number and day of week for some other
+     * day in the period (e.g. the current date) <em>is</em> known.
+     * <p>
+     * @param desiredDay    The {@link #UCalendarDateFields DAY_OF_YEAR} or
+     *              {@link #UCalendarDateFields DAY_OF_MONTH} whose week number is desired.
+     *              Should be 1 for the first day of the period.
+     *
+     * @param dayOfPeriod   The {@link #UCalendarDateFields DAY_OF_YEAR}
+     *              or {@link #UCalendarDateFields DAY_OF_MONTH} for a day in the period whose
+     *              {@link #UCalendarDateFields DAY_OF_WEEK} is specified by the
+     *              <code>knownDayOfWeek</code> parameter.
+     *              Should be 1 for first day of period.
+     *
+     * @param dayOfWeek  The {@link #UCalendarDateFields DAY_OF_WEEK} for the day
+     *              corresponding to the <code>knownDayOfPeriod</code> parameter.
+     *              1-based with 1=Sunday.
+     *
+     * @return      The week number (one-based), or zero if the day falls before
+     *              the first week because
+     *              {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek}
+     *              is more than one.
+     *
+     * @stable ICU 2.8
+     */
+    int32_t weekNumber(int32_t desiredDay, int32_t dayOfPeriod, int32_t dayOfWeek);
+
+
+    /**
+     * Return the week number of a day, within a period. This may be the week number in
+     * a year, or the week number in a month. Usually this will be a value >= 1, but if
+     * some initial days of the period are excluded from week 1, because
+     * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} is > 1,
+     * then the week number will be zero for those
+     * initial days. This method requires the day of week for the given date in order to
+     * determine the result.
+     * <p>
+     * <b>Subclassing:</b>
+     * <br>
+     * This method is intended for use by subclasses in implementing their
+     * {@link #computeTime computeTime} and/or {@link #computeFields computeFields} methods.
+     * It is often useful in {@link #getActualMinimum getActualMinimum} and
+     * {@link #getActualMaximum getActualMaximum} as well.
+     * <p>
+     * @param dayOfPeriod   The {@link #UCalendarDateFields DAY_OF_YEAR} or
+     *                      {@link #UCalendarDateFields DAY_OF_MONTH} whose week number is desired.
+     *                      Should be 1 for the first day of the period.
+     *
+     * @param dayOfWeek     The {@link #UCalendarDateFields DAY_OF_WEEK} for the day
+     *                      corresponding to the <code>dayOfPeriod</code> parameter.
+     *                      1-based with 1=Sunday.
+     *
+     * @return      The week number (one-based), or zero if the day falls before
+     *              the first week because
+     *              {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek}
+     *              is more than one.
+     * @internal
+     */
+    inline int32_t weekNumber(int32_t dayOfPeriod, int32_t dayOfWeek);
+
+    /**
+     * returns the local DOW, valid range 0..6
+     * @internal
+     */
+    int32_t getLocalDOW();
+
+private:
+
+    /**
+     * The next available value for fStamp[]
+     */
+    int32_t fNextStamp;// = MINIMUM_USER_STAMP;
+
+    /**
+     * The current time set for the calendar.
+     */
+    UDate        fTime;
+
+    /**
+     * @see   #setLenient
+     */
+    UBool      fLenient;
+
+    /**
+     * Time zone affects the time calculation done by Calendar. Calendar subclasses use
+     * the time zone data to produce the local time.
+     */
+    TimeZone*   fZone;
+
+    /**
+     * Both firstDayOfWeek and minimalDaysInFirstWeek are locale-dependent. They are
+     * used to figure out the week count for a specific date for a given locale. These
+     * must be set when a Calendar is constructed. For example, in US locale,
+     * firstDayOfWeek is SUNDAY; minimalDaysInFirstWeek is 1. They are used to figure
+     * out the week count for a specific date for a given locale. These must be set when
+     * a Calendar is constructed.
+     */
+    UCalendarDaysOfWeek fFirstDayOfWeek;
+    uint8_t     fMinimalDaysInFirstWeek;
+
+    /**
+     * Sets firstDayOfWeek and minimalDaysInFirstWeek. Called at Calendar construction
+     * time.
+     *
+     * @param desiredLocale  The given locale.
+     * @param type           The calendar type identifier, e.g: gregorian, buddhist, etc.
+     * @param success        Indicates the status of setting the week count data from
+     *                       the resource for the given locale. Returns U_ZERO_ERROR if
+     *                       constructed successfully.
+     */
+    void        setWeekCountData(const Locale& desiredLocale, const char *type, UErrorCode& success);
+
+    /**
+     * Recompute the time and update the status fields isTimeSet
+     * and areFieldsSet.  Callers should check isTimeSet and only
+     * call this method if isTimeSet is false.
+     *
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid or restricted by
+     *                leniency, this will be set to an error status.
+     */
+    void updateTime(UErrorCode& status);
+
+    /**
+     * The Gregorian year, as computed by computeGregorianFields() and
+     * returned by getGregorianYear().
+     * @see #computeGregorianFields
+     */
+    int32_t fGregorianYear;
+
+    /**
+     * The Gregorian month, as computed by computeGregorianFields() and
+     * returned by getGregorianMonth().
+     * @see #computeGregorianFields
+     */
+    int32_t fGregorianMonth;
+
+    /**
+     * The Gregorian day of the year, as computed by
+     * computeGregorianFields() and returned by getGregorianDayOfYear().
+     * @see #computeGregorianFields
+     */
+    int32_t fGregorianDayOfYear;
+
+    /**
+     * The Gregorian day of the month, as computed by
+     * computeGregorianFields() and returned by getGregorianDayOfMonth().
+     * @see #computeGregorianFields
+     */
+    int32_t fGregorianDayOfMonth;
+
+    /* calculations */
+
+    /**
+     * Compute the Gregorian calendar year, month, and day of month from
+     * the given Julian day.  These values are not stored in fields, but in
+     * member variables gregorianXxx.  Also compute the DAY_OF_WEEK and
+     * DOW_LOCAL fields.
+     */
+    void computeGregorianAndDOWFields(int32_t julianDay, UErrorCode &ec);
+
+	protected:
+
+    /**
+     * Compute the Gregorian calendar year, month, and day of month from the
+     * Julian day.  These values are not stored in fields, but in member
+     * variables gregorianXxx.  They are used for time zone computations and by
+     * subclasses that are Gregorian derivatives.  Subclasses may call this
+     * method to perform a Gregorian calendar millis->fields computation.
+     * To perform a Gregorian calendar fields->millis computation, call
+     * computeGregorianMonthStart().
+     * @see #computeGregorianMonthStart
+     */
+    void computeGregorianFields(int32_t julianDay, UErrorCode &ec);
+
+	private:
+
+    /**
+     * Compute the fields WEEK_OF_YEAR, YEAR_WOY, WEEK_OF_MONTH,
+     * DAY_OF_WEEK_IN_MONTH, and DOW_LOCAL from EXTENDED_YEAR, YEAR,
+     * DAY_OF_WEEK, and DAY_OF_YEAR.  The latter fields are computed by the
+     * subclass based on the calendar system.
+     *
+     * <p>The YEAR_WOY field is computed simplistically.  It is equal to YEAR
+     * most of the time, but at the year boundary it may be adjusted to YEAR-1
+     * or YEAR+1 to reflect the overlap of a week into an adjacent year.  In
+     * this case, a simple increment or decrement is performed on YEAR, even
+     * though this may yield an invalid YEAR value.  For instance, if the YEAR
+     * is part of a calendar system with an N-year cycle field CYCLE, then
+     * incrementing the YEAR may involve incrementing CYCLE and setting YEAR
+     * back to 0 or 1.  This is not handled by this code, and in fact cannot be
+     * simply handled without having subclasses define an entire parallel set of
+     * fields for fields larger than or equal to a year.  This additional
+     * complexity is not warranted, since the intention of the YEAR_WOY field is
+     * to support ISO 8601 notation, so it will typically be used with a
+     * proleptic Gregorian calendar, which has no field larger than a year.
+     */
+    void computeWeekFields(UErrorCode &ec);
+
+
+    /**
+     * Ensure that each field is within its valid range by calling {@link
+     * #validateField(int, int&)} on each field that has been set.  This method
+     * should only be called if this calendar is not lenient.
+     * @see #isLenient
+     * @see #validateField(int, int&)
+     * @internal
+     */
+    void validateFields(UErrorCode &status);
+
+    /**
+     * Validate a single field of this calendar.  Subclasses should
+     * override this method to validate any calendar-specific fields.
+     * Generic fields can be handled by
+     * <code>Calendar.validateField()</code>.
+     * @see #validateField(int, int, int, int&)
+     * @internal
+     */
+    virtual void validateField(UCalendarDateFields field, UErrorCode &status);
+
+    /**
+     * Validate a single field of this calendar given its minimum and
+     * maximum allowed value.  If the field is out of range,
+     * <code>U_ILLEGAL_ARGUMENT_ERROR</code> will be set.  Subclasses may
+     * use this method in their implementation of {@link
+     * #validateField(int, int&)}.
+     * @internal
+     */
+    void validateField(UCalendarDateFields field, int32_t min, int32_t max, UErrorCode& status);
+
+ protected:
+    /**
+     * Convert a quasi Julian date to the day of the week. The Julian date used here is
+     * not a true Julian date, since it is measured from midnight, not noon. Return
+     * value is one-based.
+     *
+     * @param julian  The given Julian date number.
+     * @return   Day number from 1..7 (SUN..SAT).
+     * @internal
+     */
+    static uint8_t julianDayToDayOfWeek(double julian);
+
+ private:
+    char validLocale[ULOC_FULLNAME_CAPACITY];
+    char actualLocale[ULOC_FULLNAME_CAPACITY];
+
+ public:
+#if !UCONFIG_NO_SERVICE
+    /**
+     * INTERNAL FOR 2.6 --  Registration.
+     */
+
+    /**
+     * Return a StringEnumeration over the locales available at the time of the call,
+     * including registered locales.
+     * @return a StringEnumeration over the locales available at the time of the call
+     * @internal
+     */
+    static StringEnumeration* getAvailableLocales(void);
+
+    /**
+     * Register a new Calendar factory.  The factory will be adopted.
+     * INTERNAL in 2.6
+     * @param toAdopt the factory instance to be adopted
+     * @param status the in/out status code, no special meanings are assigned
+     * @return a registry key that can be used to unregister this factory
+     * @internal
+     */
+    static URegistryKey registerFactory(ICUServiceFactory* toAdopt, UErrorCode& status);
+
+    /**
+     * Unregister a previously-registered CalendarFactory using the key returned from the
+     * register call.  Key becomes invalid after a successful call and should not be used again.
+     * The CalendarFactory corresponding to the key will be deleted.
+     * INTERNAL in 2.6
+     * @param key the registry key returned by a previous call to registerFactory
+     * @param status the in/out status code, no special meanings are assigned
+     * @return TRUE if the factory for the key was successfully unregistered
+     * @internal
+     */
+    static UBool unregister(URegistryKey key, UErrorCode& status);
+
+    /**
+     * Multiple Calendar Implementation
+     * @internal
+     */
+    friend class CalendarFactory;
+
+    /**
+     * Multiple Calendar Implementation
+     * @internal
+     */
+    friend class CalendarService;
+
+    /**
+     * Multiple Calendar Implementation
+     * @internal
+     */
+    friend class DefaultCalendarFactory;
+#endif /* !UCONFIG_NO_SERVICE */
+
+    /**
+     * @internal
+     * @return TRUE if this calendar has a default century (i.e. 03 -> 2003)
+     */
+    virtual UBool haveDefaultCentury() const = 0;
+
+    /**
+     * @internal
+     * @return the start of the default century, as a UDate
+     */
+    virtual UDate defaultCenturyStart() const = 0;
+    /**
+     * @internal
+     * @return the beginning year of the default century, as a year
+     */
+    virtual int32_t defaultCenturyStartYear() const = 0;
+
+    /** Get the locale for this calendar object. You can choose between valid and actual locale.
+     *  @param type type of the locale we're looking for (valid or actual)
+     *  @param status error code for the operation
+     *  @return the locale
+     *  @stable ICU 2.8
+     */
+    Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const;
+
+    /** Get the locale for this calendar object. You can choose between valid and actual locale.
+     *  @param type type of the locale we're looking for (valid or actual)
+     *  @param status error code for the operation
+     *  @return the locale
+     *  @internal
+     */
+    const char* getLocaleID(ULocDataLocaleType type, UErrorCode &status) const;
+
+};
+
+// -------------------------------------
+
+inline Calendar*
+Calendar::createInstance(TimeZone* zone, UErrorCode& errorCode)
+{
+    // since the Locale isn't specified, use the default locale
+    return createInstance(zone, Locale::getDefault(), errorCode);
+}
+
+// -------------------------------------
+
+inline void
+Calendar::roll(UCalendarDateFields field, UBool up, UErrorCode& status)
+{
+    roll(field, (int32_t)(up ? +1 : -1), status);
+}
+
+inline void
+Calendar::roll(EDateFields field, UBool up, UErrorCode& status)
+{
+    roll((UCalendarDateFields) field, up, status);
+}
+
+
+// -------------------------------------
+
+/**
+ * Fast method for subclasses.  The caller must maintain fUserSetDSTOffset and
+ * fUserSetZoneOffset, as well as the isSet[] array.
+ */
+
+inline void
+Calendar::internalSet(UCalendarDateFields field, int32_t value)
+{
+    fFields[field] = value;
+    fStamp[field] = kInternallySet;
+    fIsSet[field]     = TRUE; // Remove later
+}
+
+inline int32_t  Calendar::weekNumber(int32_t dayOfPeriod, int32_t dayOfWeek)
+{
+  return weekNumber(dayOfPeriod, dayOfPeriod, dayOfWeek);
+}
+
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _CALENDAR

Deleted: MacRuby/trunk/icu-1060/unicode/caniter.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/caniter.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/caniter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,201 +0,0 @@
-/*
- *******************************************************************************
- * Copyright (C) 1996-2006, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
- *******************************************************************************
- */
-
-#ifndef CANITER_H
-#define CANITER_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-
-/**
- * \file
- * \brief C++ API: Canonical Iterator
- */
- 
-/** Should permutation skip characters with combining class zero
- *  Should be either TRUE or FALSE. This is a compile time option
- *  @stable ICU 2.4
- */
-#ifndef CANITER_SKIP_ZEROES
-#define CANITER_SKIP_ZEROES TRUE
-#endif
-
-U_NAMESPACE_BEGIN
-
-class Hashtable;
-
-/**
- * This class allows one to iterate through all the strings that are canonically equivalent to a given
- * string. For example, here are some sample results:
-Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
-1: \\u0041\\u030A\\u0064\\u0307\\u0327
- = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
-2: \\u0041\\u030A\\u0064\\u0327\\u0307
- = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
-3: \\u0041\\u030A\\u1E0B\\u0327
- = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
-4: \\u0041\\u030A\\u1E11\\u0307
- = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
-5: \\u00C5\\u0064\\u0307\\u0327
- = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
-6: \\u00C5\\u0064\\u0327\\u0307
- = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
-7: \\u00C5\\u1E0B\\u0327
- = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
-8: \\u00C5\\u1E11\\u0307
- = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
-9: \\u212B\\u0064\\u0307\\u0327
- = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
-10: \\u212B\\u0064\\u0327\\u0307
- = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
-11: \\u212B\\u1E0B\\u0327
- = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
-12: \\u212B\\u1E11\\u0307
- = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
- *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
- * since it has not been optimized for that situation.
- * Note, CanonicalIterator is not intended to be subclassed.
- * @author M. Davis
- * @author C++ port by V. Weinstein
- * @stable ICU 2.4
- */
-class U_COMMON_API CanonicalIterator : public UObject {
-public:
-    /**
-     * Construct a CanonicalIterator object
-     * @param source    string to get results for
-     * @param status    Fill-in parameter which receives the status of this operation.
-     * @stable ICU 2.4
-     */
-    CanonicalIterator(const UnicodeString &source, UErrorCode &status);
-
-    /** Destructor
-     *  Cleans pieces
-     * @stable ICU 2.4
-     */
-    virtual ~CanonicalIterator();
-
-    /**
-     * Gets the NFD form of the current source we are iterating over.
-     * @return gets the source: NOTE: it is the NFD form of source
-     * @stable ICU 2.4
-     */
-    UnicodeString getSource();
-
-    /**
-     * Resets the iterator so that one can start again from the beginning.
-     * @stable ICU 2.4
-     */
-    void reset();
-
-    /**
-     * Get the next canonically equivalent string.
-     * <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
-     * @return the next string that is canonically equivalent. A bogus string is returned when
-     * the iteration is done.
-     * @stable ICU 2.4
-     */
-    UnicodeString next();
-
-    /**
-     * Set a new source for this iterator. Allows object reuse.
-     * @param newSource     the source string to iterate against. This allows the same iterator to be used
-     *                     while changing the source string, saving object creation.
-     * @param status        Fill-in parameter which receives the status of this operation.
-     * @stable ICU 2.4
-     */
-    void setSource(const UnicodeString &newSource, UErrorCode &status);
-
-    /**
-     * Dumb recursive implementation of permutation.
-     * TODO: optimize
-     * @param source     the string to find permutations for
-     * @param skipZeros  determine if skip zeros
-     * @param result     the results in a set.
-     * @param status       Fill-in parameter which receives the status of this operation.
-     * @internal
-     */
-    static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.2
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.2
-     */
-    virtual UClassID getDynamicClassID() const;
-
-private:
-    // ===================== PRIVATES ==============================
-    // private default constructor
-    CanonicalIterator();
-
-
-    /**
-     * Copy constructor. Private for now.
-     * @internal
-     */
-    CanonicalIterator(const CanonicalIterator& other);
-
-    /**
-     * Assignment operator. Private for now.
-     * @internal
-     */
-    CanonicalIterator& operator=(const CanonicalIterator& other);
-
-    // fields
-    UnicodeString source;
-    UBool done;
-
-    // 2 dimensional array holds the pieces of the string with
-    // their different canonically equivalent representations
-    UnicodeString **pieces;
-    int32_t pieces_length;
-    int32_t *pieces_lengths;
-
-    // current is used in iterating to combine pieces
-    int32_t *current;
-    int32_t current_length;
-
-    // transient fields
-    UnicodeString buffer;
-
-    // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
-    UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
-
-    //Set getEquivalents2(String segment);
-    Hashtable *getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status);
-    //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
-
-    /**
-     * See if the decomposition of cp2 is at segment starting at segmentPos
-     * (with canonical rearrangment!)
-     * If so, take the remainder, and return the equivalents
-     */
-    //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
-    Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
-    //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
-
-    void cleanPieces();
-
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/caniter.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/caniter.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/caniter.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/caniter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,201 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2006, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+
+#ifndef CANITER_H
+#define CANITER_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: Canonical Iterator
+ */
+ 
+/** Should permutation skip characters with combining class zero
+ *  Should be either TRUE or FALSE. This is a compile time option
+ *  @stable ICU 2.4
+ */
+#ifndef CANITER_SKIP_ZEROES
+#define CANITER_SKIP_ZEROES TRUE
+#endif
+
+U_NAMESPACE_BEGIN
+
+class Hashtable;
+
+/**
+ * This class allows one to iterate through all the strings that are canonically equivalent to a given
+ * string. For example, here are some sample results:
+Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+1: \\u0041\\u030A\\u0064\\u0307\\u0327
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+2: \\u0041\\u030A\\u0064\\u0327\\u0307
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+3: \\u0041\\u030A\\u1E0B\\u0327
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+4: \\u0041\\u030A\\u1E11\\u0307
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+5: \\u00C5\\u0064\\u0307\\u0327
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+6: \\u00C5\\u0064\\u0327\\u0307
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+7: \\u00C5\\u1E0B\\u0327
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+8: \\u00C5\\u1E11\\u0307
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+9: \\u212B\\u0064\\u0307\\u0327
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+10: \\u212B\\u0064\\u0327\\u0307
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+11: \\u212B\\u1E0B\\u0327
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+12: \\u212B\\u1E11\\u0307
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+ *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
+ * since it has not been optimized for that situation.
+ * Note, CanonicalIterator is not intended to be subclassed.
+ * @author M. Davis
+ * @author C++ port by V. Weinstein
+ * @stable ICU 2.4
+ */
+class U_COMMON_API CanonicalIterator : public UObject {
+public:
+    /**
+     * Construct a CanonicalIterator object
+     * @param source    string to get results for
+     * @param status    Fill-in parameter which receives the status of this operation.
+     * @stable ICU 2.4
+     */
+    CanonicalIterator(const UnicodeString &source, UErrorCode &status);
+
+    /** Destructor
+     *  Cleans pieces
+     * @stable ICU 2.4
+     */
+    virtual ~CanonicalIterator();
+
+    /**
+     * Gets the NFD form of the current source we are iterating over.
+     * @return gets the source: NOTE: it is the NFD form of source
+     * @stable ICU 2.4
+     */
+    UnicodeString getSource();
+
+    /**
+     * Resets the iterator so that one can start again from the beginning.
+     * @stable ICU 2.4
+     */
+    void reset();
+
+    /**
+     * Get the next canonically equivalent string.
+     * <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
+     * @return the next string that is canonically equivalent. A bogus string is returned when
+     * the iteration is done.
+     * @stable ICU 2.4
+     */
+    UnicodeString next();
+
+    /**
+     * Set a new source for this iterator. Allows object reuse.
+     * @param newSource     the source string to iterate against. This allows the same iterator to be used
+     *                     while changing the source string, saving object creation.
+     * @param status        Fill-in parameter which receives the status of this operation.
+     * @stable ICU 2.4
+     */
+    void setSource(const UnicodeString &newSource, UErrorCode &status);
+
+    /**
+     * Dumb recursive implementation of permutation.
+     * TODO: optimize
+     * @param source     the string to find permutations for
+     * @param skipZeros  determine if skip zeros
+     * @param result     the results in a set.
+     * @param status       Fill-in parameter which receives the status of this operation.
+     * @internal
+     */
+    static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.2
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const;
+
+private:
+    // ===================== PRIVATES ==============================
+    // private default constructor
+    CanonicalIterator();
+
+
+    /**
+     * Copy constructor. Private for now.
+     * @internal
+     */
+    CanonicalIterator(const CanonicalIterator& other);
+
+    /**
+     * Assignment operator. Private for now.
+     * @internal
+     */
+    CanonicalIterator& operator=(const CanonicalIterator& other);
+
+    // fields
+    UnicodeString source;
+    UBool done;
+
+    // 2 dimensional array holds the pieces of the string with
+    // their different canonically equivalent representations
+    UnicodeString **pieces;
+    int32_t pieces_length;
+    int32_t *pieces_lengths;
+
+    // current is used in iterating to combine pieces
+    int32_t *current;
+    int32_t current_length;
+
+    // transient fields
+    UnicodeString buffer;
+
+    // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
+    UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
+
+    //Set getEquivalents2(String segment);
+    Hashtable *getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status);
+    //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
+
+    /**
+     * See if the decomposition of cp2 is at segment starting at segmentPos
+     * (with canonical rearrangment!)
+     * If so, take the remainder, and return the equivalents
+     */
+    //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
+    Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
+    //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
+
+    void cleanPieces();
+
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/chariter.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/chariter.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/chariter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,716 +0,0 @@
-/*
-********************************************************************
-*
-*   Copyright (C) 1997-2005, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-********************************************************************
-*/
-
-#ifndef CHARITER_H
-#define CHARITER_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-/**
- * \file
- * \brief C++ API: Character Iterator
- */
- 
-U_NAMESPACE_BEGIN
-/**
- * Abstract class that defines an API for forward-only iteration
- * on text objects.
- * This is a minimal interface for iteration without random access
- * or backwards iteration. It is especially useful for wrapping
- * streams with converters into an object for collation or
- * normalization.
- *
- * <p>Characters can be accessed in two ways: as code units or as
- * code points.
- * Unicode code points are 21-bit integers and are the scalar values
- * of Unicode characters. ICU uses the type UChar32 for them.
- * Unicode code units are the storage units of a given
- * Unicode/UCS Transformation Format (a character encoding scheme).
- * With UTF-16, all code points can be represented with either one
- * or two code units ("surrogates").
- * String storage is typically based on code units, while properties
- * of characters are typically determined using code point values.
- * Some processes may be designed to work with sequences of code units,
- * or it may be known that all characters that are important to an
- * algorithm can be represented with single code units.
- * Other processes will need to use the code point access functions.</p>
- *
- * <p>ForwardCharacterIterator provides nextPostInc() to access
- * a code unit and advance an internal position into the text object,
- * similar to a <code>return text[position++]</code>.<br>
- * It provides next32PostInc() to access a code point and advance an internal
- * position.</p>
- *
- * <p>next32PostInc() assumes that the current position is that of
- * the beginning of a code point, i.e., of its first code unit.
- * After next32PostInc(), this will be true again.
- * In general, access to code units and code points in the same
- * iteration loop should not be mixed. In UTF-16, if the current position
- * is on a second code unit (Low Surrogate), then only that code unit
- * is returned even by next32PostInc().</p>
- *
- * <p>For iteration with either function, there are two ways to
- * check for the end of the iteration. When there are no more
- * characters in the text object:
- * <ul>
- * <li>The hasNext() function returns FALSE.</li>
- * <li>nextPostInc() and next32PostInc() return DONE
- *     when one attempts to read beyond the end of the text object.</li>
- * </ul>
- *
- * Example:
- * \code 
- * void function1(ForwardCharacterIterator &it) {
- *     UChar32 c;
- *     while(it.hasNext()) {
- *         c=it.next32PostInc();
- *         // use c
- *     }
- * }
- *
- * void function1(ForwardCharacterIterator &it) {
- *     UChar c;
- *     while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
- *         // use c
- *      }
- *  }
- * \endcode
- * </p>
- *
- * @stable ICU 2.0
- */
-class U_COMMON_API ForwardCharacterIterator : public UObject {
-public:
-    /**
-     * Value returned by most of ForwardCharacterIterator's functions
-     * when the iterator has reached the limits of its iteration.
-     * @stable ICU 2.0
-     */
-    enum { DONE = 0xffff };
-    
-    /**
-     * Destructor.  
-     * @stable ICU 2.0
-     */
-    virtual ~ForwardCharacterIterator();
-    
-    /**
-     * Returns true when both iterators refer to the same
-     * character in the same character-storage object.  
-     * @param that The ForwardCharacterIterator to be compared for equality
-     * @return true when both iterators refer to the same
-     * character in the same character-storage object
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
-    
-    /**
-     * Returns true when the iterators refer to different
-     * text-storage objects, or to different characters in the
-     * same text-storage object.  
-     * @param that The ForwardCharacterIterator to be compared for inequality
-     * @return true when the iterators refer to different
-     * text-storage objects, or to different characters in the
-     * same text-storage object
-     * @stable ICU 2.0
-     */
-    inline UBool operator!=(const ForwardCharacterIterator& that) const;
-    
-    /**
-     * Generates a hash code for this iterator.  
-     * @return the hash code.
-     * @stable ICU 2.0
-     */
-    virtual int32_t hashCode(void) const = 0;
-    
-    /**
-     * Returns a UClassID for this ForwardCharacterIterator ("poor man's
-     * RTTI").<P> Despite the fact that this function is public,
-     * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! 
-     * @return a UClassID for this ForwardCharacterIterator 
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const = 0;
-    
-    /**
-     * Gets the current code unit for returning and advances to the next code unit
-     * in the iteration range
-     * (toward endIndex()).  If there are
-     * no more code units to return, returns DONE.
-     * @return the current code unit.
-     * @stable ICU 2.0
-     */
-    virtual UChar         nextPostInc(void) = 0;
-    
-    /**
-     * Gets the current code point for returning and advances to the next code point
-     * in the iteration range
-     * (toward endIndex()).  If there are
-     * no more code points to return, returns DONE.
-     * @return the current code point.
-     * @stable ICU 2.0
-     */
-    virtual UChar32       next32PostInc(void) = 0;
-    
-    /**
-     * Returns FALSE if there are no more code units or code points
-     * at or after the current position in the iteration range.
-     * This is used with nextPostInc() or next32PostInc() in forward
-     * iteration.
-     * @returns FALSE if there are no more code units or code points
-     * at or after the current position in the iteration range.
-     * @stable ICU 2.0
-     */
-    virtual UBool        hasNext() = 0;
-    
-protected:
-    /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
-    ForwardCharacterIterator();
-    
-    /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
-    ForwardCharacterIterator(const ForwardCharacterIterator &other);
-    
-    /**
-     * Assignment operator to be overridden in the implementing class.
-     * @stable ICU 2.0
-     */
-    ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
-};
-
-/**
- * Abstract class that defines an API for iteration
- * on text objects.
- * This is an interface for forward and backward iteration
- * and random access into a text object.
- *
- * <p>The API provides backward compatibility to the Java and older ICU
- * CharacterIterator classes but extends them significantly:
- * <ol>
- * <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
- * <li>While the old API functions provided forward iteration with
- *     "pre-increment" semantics, the new one also provides functions
- *     with "post-increment" semantics. They are more efficient and should
- *     be the preferred iterator functions for new implementations.
- *     The backward iteration always had "pre-decrement" semantics, which
- *     are efficient.</li>
- * <li>Just like ForwardCharacterIterator, it provides access to
- *     both code units and code points. Code point access versions are available
- *     for the old and the new iteration semantics.</li>
- * <li>There are new functions for setting and moving the current position
- *     without returning a character, for efficiency.</li>
- * </ol>
- *
- * See ForwardCharacterIterator for examples for using the new forward iteration
- * functions. For backward iteration, there is also a hasPrevious() function
- * that can be used analogously to hasNext().
- * The old functions work as before and are shown below.</p>
- *
- * <p>Examples for some of the new functions:</p>
- *
- * Forward iteration with hasNext():
- * \code
- * void forward1(CharacterIterator &it) {
- *     UChar32 c;
- *     for(it.setToStart(); it.hasNext();) {
- *         c=it.next32PostInc();
- *         // use c
- *     }
- *  }
- * \endcode
- * Forward iteration more similar to loops with the old forward iteration,
- * showing a way to convert simple for() loops:
- * \code
- * void forward2(CharacterIterator &it) {
- *     UChar c;
- *     for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
- *          // use c
- *      }
- * }
- * \endcode
- * Backward iteration with setToEnd() and hasPrevious():
- * \code
- *  void backward1(CharacterIterator &it) {
- *      UChar32 c;
- *      for(it.setToEnd(); it.hasPrevious();) {
- *         c=it.previous32();
- *          // use c
- *      }
- *  }
- * \endcode
- * Backward iteration with a more traditional for() loop:
- * \code
- * void backward2(CharacterIterator &it) {
- *     UChar c;
- *     for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
- *         // use c
- *      }
- *  }
- * \endcode
- *
- * Example for random access:
- * \code
- *  void random(CharacterIterator &it) {
- *      // set to the third code point from the beginning
- *      it.move32(3, CharacterIterator::kStart);
- *      // get a code point from here without moving the position
- *      UChar32 c=it.current32();
- *      // get the position
- *      int32_t pos=it.getIndex();
- *      // get the previous code unit
- *      UChar u=it.previous();
- *      // move back one more code unit
- *      it.move(-1, CharacterIterator::kCurrent);
- *      // set the position back to where it was
- *      // and read the same code point c and move beyond it
- *      it.setIndex(pos);
- *      if(c!=it.next32PostInc()) {
- *          exit(1); // CharacterIterator inconsistent
- *      }
- *  }
- * \endcode
- *
- * <p>Examples, especially for the old API:</p>
- *
- * Function processing characters, in this example simple output
- * <pre>
- * \code
- *  void processChar( UChar c )
- *  {
- *      cout << " " << c;
- *  }
- * \endcode
- * </pre>
- * Traverse the text from start to finish
- * <pre> 
- * \code
- *  void traverseForward(CharacterIterator& iter)
- *  {
- *      for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
- *          processChar(c);
- *      }
- *  }
- * \endcode
- * </pre>
- * Traverse the text backwards, from end to start
- * <pre>
- * \code
- *  void traverseBackward(CharacterIterator& iter)
- *  {
- *      for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
- *          processChar(c);
- *      }
- *  }
- * \endcode
- * </pre>
- * Traverse both forward and backward from a given position in the text. 
- * Calls to notBoundary() in this example represents some additional stopping criteria.
- * <pre>
- * \code
- * void traverseOut(CharacterIterator& iter, int32_t pos)
- * {
- *      UChar c;
- *      for (c = iter.setIndex(pos);
- *      c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
- *          c = iter.next()) {}
- *      int32_t end = iter.getIndex();
- *      for (c = iter.setIndex(pos);
- *          c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
- *          c = iter.previous()) {}
- *      int32_t start = iter.getIndex() + 1;
- *  
- *      cout << "start: " << start << " end: " << end << endl;
- *      for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
- *          processChar(c);
- *     }
- *  }
- * \endcode
- * </pre>
- * Creating a StringCharacterIterator and calling the test functions
- * <pre>
- * \code
- *  void CharacterIterator_Example( void )
- *   {
- *       cout << endl << "===== CharacterIterator_Example: =====" << endl;
- *       UnicodeString text("Ein kleiner Satz.");
- *       StringCharacterIterator iterator(text);
- *       cout << "----- traverseForward: -----------" << endl;
- *       traverseForward( iterator );
- *       cout << endl << endl << "----- traverseBackward: ----------" << endl;
- *       traverseBackward( iterator );
- *       cout << endl << endl << "----- traverseOut: ---------------" << endl;
- *       traverseOut( iterator, 7 );
- *       cout << endl << endl << "-----" << endl;
- *   }
- * \endcode
- * </pre>
- *
- * @stable ICU 2.0
- */
-class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
-public:
-    /**
-     * Origin enumeration for the move() and move32() functions.
-     * @stable ICU 2.0
-     */
-    enum EOrigin { kStart, kCurrent, kEnd };
-
-    /**
-     * Returns a pointer to a new CharacterIterator of the same
-     * concrete class as this one, and referring to the same
-     * character in the same text-storage object as this one.  The
-     * caller is responsible for deleting the new clone.  
-     * @return a pointer to a new CharacterIterator
-     * @stable ICU 2.0
-     */
-    virtual CharacterIterator* clone(void) const = 0;
-
-    /**
-     * Sets the iterator to refer to the first code unit in its
-     * iteration range, and returns that code unit.
-     * This can be used to begin an iteration with next().
-     * @return the first code unit in its iteration range.
-     * @stable ICU 2.0
-     */
-    virtual UChar         first(void) = 0;
-
-    /**
-     * Sets the iterator to refer to the first code unit in its
-     * iteration range, returns that code unit, and moves the position
-     * to the second code unit. This is an alternative to setToStart()
-     * for forward iteration with nextPostInc().
-     * @return the first code unit in its iteration range.
-     * @stable ICU 2.0
-     */
-    virtual UChar         firstPostInc(void);
-
-    /**
-     * Sets the iterator to refer to the first code point in its
-     * iteration range, and returns that code unit,
-     * This can be used to begin an iteration with next32().
-     * Note that an iteration with next32PostInc(), beginning with,
-     * e.g., setToStart() or firstPostInc(), is more efficient.
-     * @return the first code point in its iteration range.
-     * @stable ICU 2.0
-     */
-    virtual UChar32       first32(void) = 0;
-
-    /**
-     * Sets the iterator to refer to the first code point in its
-     * iteration range, returns that code point, and moves the position
-     * to the second code point. This is an alternative to setToStart()
-     * for forward iteration with next32PostInc().
-     * @return the first code point in its iteration range.
-     * @stable ICU 2.0
-     */
-    virtual UChar32       first32PostInc(void);
-
-    /**
-     * Sets the iterator to refer to the first code unit or code point in its
-     * iteration range. This can be used to begin a forward
-     * iteration with nextPostInc() or next32PostInc().
-     * @return the start position of the iteration range
-     * @stable ICU 2.0
-     */
-    inline int32_t    setToStart();
-
-    /**
-     * Sets the iterator to refer to the last code unit in its
-     * iteration range, and returns that code unit.
-     * This can be used to begin an iteration with previous().
-     * @return the last code unit.
-     * @stable ICU 2.0
-     */
-    virtual UChar         last(void) = 0;
-        
-    /**
-     * Sets the iterator to refer to the last code point in its
-     * iteration range, and returns that code unit.
-     * This can be used to begin an iteration with previous32().
-     * @return the last code point.
-     * @stable ICU 2.0
-     */
-    virtual UChar32       last32(void) = 0;
-
-    /**
-     * Sets the iterator to the end of its iteration range, just behind
-     * the last code unit or code point. This can be used to begin a backward
-     * iteration with previous() or previous32().
-     * @return the end position of the iteration range
-     * @stable ICU 2.0
-     */
-    inline int32_t    setToEnd();
-
-    /**
-     * Sets the iterator to refer to the "position"-th code unit
-     * in the text-storage object the iterator refers to, and
-     * returns that code unit.  
-     * @param position the "position"-th code unit in the text-storage object
-     * @return the "position"-th code unit.
-     * @stable ICU 2.0
-     */
-    virtual UChar         setIndex(int32_t position) = 0;
-
-    /**
-     * Sets the iterator to refer to the beginning of the code point
-     * that contains the "position"-th code unit
-     * in the text-storage object the iterator refers to, and
-     * returns that code point.
-     * The current position is adjusted to the beginning of the code point
-     * (its first code unit).
-     * @param position the "position"-th code unit in the text-storage object
-     * @return the "position"-th code point.
-     * @stable ICU 2.0
-     */
-    virtual UChar32       setIndex32(int32_t position) = 0;
-
-    /**
-     * Returns the code unit the iterator currently refers to. 
-     * @return the current code unit. 
-     * @stable ICU 2.0
-     */
-    virtual UChar         current(void) const = 0;
-        
-    /**
-     * Returns the code point the iterator currently refers to.  
-     * @return the current code point.
-     * @stable ICU 2.0
-     */
-    virtual UChar32       current32(void) const = 0;
-        
-    /**
-     * Advances to the next code unit in the iteration range
-     * (toward endIndex()), and returns that code unit.  If there are
-     * no more code units to return, returns DONE.
-     * @return the next code unit.
-     * @stable ICU 2.0
-     */
-    virtual UChar         next(void) = 0;
-        
-    /**
-     * Advances to the next code point in the iteration range
-     * (toward endIndex()), and returns that code point.  If there are
-     * no more code points to return, returns DONE.
-     * Note that iteration with "pre-increment" semantics is less
-     * efficient than iteration with "post-increment" semantics
-     * that is provided by next32PostInc().
-     * @return the next code point.
-     * @stable ICU 2.0
-     */
-    virtual UChar32       next32(void) = 0;
-        
-    /**
-     * Advances to the previous code unit in the iteration range
-     * (toward startIndex()), and returns that code unit.  If there are
-     * no more code units to return, returns DONE.  
-     * @return the previous code unit.
-     * @stable ICU 2.0
-     */
-    virtual UChar         previous(void) = 0;
-
-    /**
-     * Advances to the previous code point in the iteration range
-     * (toward startIndex()), and returns that code point.  If there are
-     * no more code points to return, returns DONE. 
-     * @return the previous code point. 
-     * @stable ICU 2.0
-     */
-    virtual UChar32       previous32(void) = 0;
-
-    /**
-     * Returns FALSE if there are no more code units or code points
-     * before the current position in the iteration range.
-     * This is used with previous() or previous32() in backward
-     * iteration.
-     * @return FALSE if there are no more code units or code points
-     * before the current position in the iteration range, return TRUE otherwise.
-     * @stable ICU 2.0
-     */
-    virtual UBool        hasPrevious() = 0;
-
-    /**
-     * Returns the numeric index in the underlying text-storage
-     * object of the character returned by first().  Since it's
-     * possible to create an iterator that iterates across only
-     * part of a text-storage object, this number isn't
-     * necessarily 0.  
-     * @returns the numeric index in the underlying text-storage
-     * object of the character returned by first().
-     * @stable ICU 2.0
-     */
-    inline int32_t       startIndex(void) const;
-        
-    /**
-     * Returns the numeric index in the underlying text-storage
-     * object of the position immediately BEYOND the character
-     * returned by last().  
-     * @return the numeric index in the underlying text-storage
-     * object of the position immediately BEYOND the character
-     * returned by last().
-     * @stable ICU 2.0
-     */
-    inline int32_t       endIndex(void) const;
-        
-    /**
-     * Returns the numeric index in the underlying text-storage
-     * object of the character the iterator currently refers to
-     * (i.e., the character returned by current()).  
-     * @return the numberic index in the text-storage object of 
-     * the character the iterator currently refers to
-     * @stable ICU 2.0
-     */
-    inline int32_t       getIndex(void) const;
-
-    /**
-     * Returns the length of the entire text in the underlying
-     * text-storage object.
-     * @return the length of the entire text in the text-storage object
-     * @stable ICU 2.0
-     */
-    inline int32_t           getLength() const;
-
-    /**
-     * Moves the current position relative to the start or end of the
-     * iteration range, or relative to the current position itself.
-     * The movement is expressed in numbers of code units forward
-     * or backward by specifying a positive or negative delta.
-     * @param delta the position relative to origin. A positive delta means forward;
-     * a negative delta means backward.
-     * @param origin Origin enumeration {kStart, kCurrent, kEnd}
-     * @return the new position
-     * @stable ICU 2.0
-     */
-    virtual int32_t      move(int32_t delta, EOrigin origin) = 0;
-
-    /**
-     * Moves the current position relative to the start or end of the
-     * iteration range, or relative to the current position itself.
-     * The movement is expressed in numbers of code points forward
-     * or backward by specifying a positive or negative delta.
-     * @param delta the position relative to origin. A positive delta means forward;
-     * a negative delta means backward.
-     * @param origin Origin enumeration {kStart, kCurrent, kEnd}
-     * @return the new position
-     * @stable ICU 2.0
-     */
-    virtual int32_t      move32(int32_t delta, EOrigin origin) = 0;
-
-    /**
-     * Copies the text under iteration into the UnicodeString
-     * referred to by "result".  
-     * @param result Receives a copy of the text under iteration.  
-     * @stable ICU 2.0
-     */
-    virtual void            getText(UnicodeString&  result) = 0;
-
-protected:
-    /**
-     * Empty constructor.
-     * @stable ICU 2.0
-     */
-    CharacterIterator();
-
-    /**
-     * Constructor, just setting the length field in this base class.
-     * @stable ICU 2.0
-     */
-    CharacterIterator(int32_t length);
-
-    /**
-     * Constructor, just setting the length and position fields in this base class.
-     * @stable ICU 2.0
-     */
-    CharacterIterator(int32_t length, int32_t position);
-
-    /**
-     * Constructor, just setting the length, start, end, and position fields in this base class.
-     * @stable ICU 2.0
-     */
-    CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
-  
-    /**
-     * Copy constructor.
-     *
-     * @param that The CharacterIterator to be copied
-     * @stable ICU 2.0
-     */
-    CharacterIterator(const CharacterIterator &that);
-
-    /**
-     * Assignment operator.  Sets this CharacterIterator to have the same behavior,
-     * as the one passed in.
-     * @param that The CharacterIterator passed in.
-     * @return the newly set CharacterIterator.
-     * @stable ICU 2.0
-     */
-    CharacterIterator &operator=(const CharacterIterator &that);
-
-    /**
-     * Base class text length field.
-     * Necessary this for correct getText() and hashCode().
-     * @stable ICU 2.0
-     */
-    int32_t textLength;
-
-    /**
-     * Base class field for the current position.
-     * @stable ICU 2.0
-     */
-    int32_t  pos;
-
-    /**
-     * Base class field for the start of the iteration range.
-     * @stable ICU 2.0
-     */
-    int32_t  begin;
-
-    /**
-     * Base class field for the end of the iteration range.
-     * @stable ICU 2.0
-     */
-    int32_t  end;
-};
-
-inline UBool
-ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
-    return !operator==(that);
-}
-
-inline int32_t
-CharacterIterator::setToStart() {
-    return move(0, kStart);
-}
-
-inline int32_t
-CharacterIterator::setToEnd() {
-    return move(0, kEnd);
-}
-
-inline int32_t
-CharacterIterator::startIndex(void) const {
-    return begin;
-}
-
-inline int32_t
-CharacterIterator::endIndex(void) const {
-    return end;
-}
-
-inline int32_t
-CharacterIterator::getIndex(void) const {
-    return pos;
-}
-
-inline int32_t
-CharacterIterator::getLength(void) const {
-    return textLength;
-}
-
-U_NAMESPACE_END
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/chariter.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/chariter.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/chariter.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/chariter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,716 @@
+/*
+********************************************************************
+*
+*   Copyright (C) 1997-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+********************************************************************
+*/
+
+#ifndef CHARITER_H
+#define CHARITER_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+/**
+ * \file
+ * \brief C++ API: Character Iterator
+ */
+ 
+U_NAMESPACE_BEGIN
+/**
+ * Abstract class that defines an API for forward-only iteration
+ * on text objects.
+ * This is a minimal interface for iteration without random access
+ * or backwards iteration. It is especially useful for wrapping
+ * streams with converters into an object for collation or
+ * normalization.
+ *
+ * <p>Characters can be accessed in two ways: as code units or as
+ * code points.
+ * Unicode code points are 21-bit integers and are the scalar values
+ * of Unicode characters. ICU uses the type UChar32 for them.
+ * Unicode code units are the storage units of a given
+ * Unicode/UCS Transformation Format (a character encoding scheme).
+ * With UTF-16, all code points can be represented with either one
+ * or two code units ("surrogates").
+ * String storage is typically based on code units, while properties
+ * of characters are typically determined using code point values.
+ * Some processes may be designed to work with sequences of code units,
+ * or it may be known that all characters that are important to an
+ * algorithm can be represented with single code units.
+ * Other processes will need to use the code point access functions.</p>
+ *
+ * <p>ForwardCharacterIterator provides nextPostInc() to access
+ * a code unit and advance an internal position into the text object,
+ * similar to a <code>return text[position++]</code>.<br>
+ * It provides next32PostInc() to access a code point and advance an internal
+ * position.</p>
+ *
+ * <p>next32PostInc() assumes that the current position is that of
+ * the beginning of a code point, i.e., of its first code unit.
+ * After next32PostInc(), this will be true again.
+ * In general, access to code units and code points in the same
+ * iteration loop should not be mixed. In UTF-16, if the current position
+ * is on a second code unit (Low Surrogate), then only that code unit
+ * is returned even by next32PostInc().</p>
+ *
+ * <p>For iteration with either function, there are two ways to
+ * check for the end of the iteration. When there are no more
+ * characters in the text object:
+ * <ul>
+ * <li>The hasNext() function returns FALSE.</li>
+ * <li>nextPostInc() and next32PostInc() return DONE
+ *     when one attempts to read beyond the end of the text object.</li>
+ * </ul>
+ *
+ * Example:
+ * \code 
+ * void function1(ForwardCharacterIterator &it) {
+ *     UChar32 c;
+ *     while(it.hasNext()) {
+ *         c=it.next32PostInc();
+ *         // use c
+ *     }
+ * }
+ *
+ * void function1(ForwardCharacterIterator &it) {
+ *     UChar c;
+ *     while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
+ *         // use c
+ *      }
+ *  }
+ * \endcode
+ * </p>
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API ForwardCharacterIterator : public UObject {
+public:
+    /**
+     * Value returned by most of ForwardCharacterIterator's functions
+     * when the iterator has reached the limits of its iteration.
+     * @stable ICU 2.0
+     */
+    enum { DONE = 0xffff };
+    
+    /**
+     * Destructor.  
+     * @stable ICU 2.0
+     */
+    virtual ~ForwardCharacterIterator();
+    
+    /**
+     * Returns true when both iterators refer to the same
+     * character in the same character-storage object.  
+     * @param that The ForwardCharacterIterator to be compared for equality
+     * @return true when both iterators refer to the same
+     * character in the same character-storage object
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
+    
+    /**
+     * Returns true when the iterators refer to different
+     * text-storage objects, or to different characters in the
+     * same text-storage object.  
+     * @param that The ForwardCharacterIterator to be compared for inequality
+     * @return true when the iterators refer to different
+     * text-storage objects, or to different characters in the
+     * same text-storage object
+     * @stable ICU 2.0
+     */
+    inline UBool operator!=(const ForwardCharacterIterator& that) const;
+    
+    /**
+     * Generates a hash code for this iterator.  
+     * @return the hash code.
+     * @stable ICU 2.0
+     */
+    virtual int32_t hashCode(void) const = 0;
+    
+    /**
+     * Returns a UClassID for this ForwardCharacterIterator ("poor man's
+     * RTTI").<P> Despite the fact that this function is public,
+     * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! 
+     * @return a UClassID for this ForwardCharacterIterator 
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const = 0;
+    
+    /**
+     * Gets the current code unit for returning and advances to the next code unit
+     * in the iteration range
+     * (toward endIndex()).  If there are
+     * no more code units to return, returns DONE.
+     * @return the current code unit.
+     * @stable ICU 2.0
+     */
+    virtual UChar         nextPostInc(void) = 0;
+    
+    /**
+     * Gets the current code point for returning and advances to the next code point
+     * in the iteration range
+     * (toward endIndex()).  If there are
+     * no more code points to return, returns DONE.
+     * @return the current code point.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       next32PostInc(void) = 0;
+    
+    /**
+     * Returns FALSE if there are no more code units or code points
+     * at or after the current position in the iteration range.
+     * This is used with nextPostInc() or next32PostInc() in forward
+     * iteration.
+     * @returns FALSE if there are no more code units or code points
+     * at or after the current position in the iteration range.
+     * @stable ICU 2.0
+     */
+    virtual UBool        hasNext() = 0;
+    
+protected:
+    /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
+    ForwardCharacterIterator();
+    
+    /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
+    ForwardCharacterIterator(const ForwardCharacterIterator &other);
+    
+    /**
+     * Assignment operator to be overridden in the implementing class.
+     * @stable ICU 2.0
+     */
+    ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
+};
+
+/**
+ * Abstract class that defines an API for iteration
+ * on text objects.
+ * This is an interface for forward and backward iteration
+ * and random access into a text object.
+ *
+ * <p>The API provides backward compatibility to the Java and older ICU
+ * CharacterIterator classes but extends them significantly:
+ * <ol>
+ * <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
+ * <li>While the old API functions provided forward iteration with
+ *     "pre-increment" semantics, the new one also provides functions
+ *     with "post-increment" semantics. They are more efficient and should
+ *     be the preferred iterator functions for new implementations.
+ *     The backward iteration always had "pre-decrement" semantics, which
+ *     are efficient.</li>
+ * <li>Just like ForwardCharacterIterator, it provides access to
+ *     both code units and code points. Code point access versions are available
+ *     for the old and the new iteration semantics.</li>
+ * <li>There are new functions for setting and moving the current position
+ *     without returning a character, for efficiency.</li>
+ * </ol>
+ *
+ * See ForwardCharacterIterator for examples for using the new forward iteration
+ * functions. For backward iteration, there is also a hasPrevious() function
+ * that can be used analogously to hasNext().
+ * The old functions work as before and are shown below.</p>
+ *
+ * <p>Examples for some of the new functions:</p>
+ *
+ * Forward iteration with hasNext():
+ * \code
+ * void forward1(CharacterIterator &it) {
+ *     UChar32 c;
+ *     for(it.setToStart(); it.hasNext();) {
+ *         c=it.next32PostInc();
+ *         // use c
+ *     }
+ *  }
+ * \endcode
+ * Forward iteration more similar to loops with the old forward iteration,
+ * showing a way to convert simple for() loops:
+ * \code
+ * void forward2(CharacterIterator &it) {
+ *     UChar c;
+ *     for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
+ *          // use c
+ *      }
+ * }
+ * \endcode
+ * Backward iteration with setToEnd() and hasPrevious():
+ * \code
+ *  void backward1(CharacterIterator &it) {
+ *      UChar32 c;
+ *      for(it.setToEnd(); it.hasPrevious();) {
+ *         c=it.previous32();
+ *          // use c
+ *      }
+ *  }
+ * \endcode
+ * Backward iteration with a more traditional for() loop:
+ * \code
+ * void backward2(CharacterIterator &it) {
+ *     UChar c;
+ *     for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
+ *         // use c
+ *      }
+ *  }
+ * \endcode
+ *
+ * Example for random access:
+ * \code
+ *  void random(CharacterIterator &it) {
+ *      // set to the third code point from the beginning
+ *      it.move32(3, CharacterIterator::kStart);
+ *      // get a code point from here without moving the position
+ *      UChar32 c=it.current32();
+ *      // get the position
+ *      int32_t pos=it.getIndex();
+ *      // get the previous code unit
+ *      UChar u=it.previous();
+ *      // move back one more code unit
+ *      it.move(-1, CharacterIterator::kCurrent);
+ *      // set the position back to where it was
+ *      // and read the same code point c and move beyond it
+ *      it.setIndex(pos);
+ *      if(c!=it.next32PostInc()) {
+ *          exit(1); // CharacterIterator inconsistent
+ *      }
+ *  }
+ * \endcode
+ *
+ * <p>Examples, especially for the old API:</p>
+ *
+ * Function processing characters, in this example simple output
+ * <pre>
+ * \code
+ *  void processChar( UChar c )
+ *  {
+ *      cout << " " << c;
+ *  }
+ * \endcode
+ * </pre>
+ * Traverse the text from start to finish
+ * <pre> 
+ * \code
+ *  void traverseForward(CharacterIterator& iter)
+ *  {
+ *      for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
+ *          processChar(c);
+ *      }
+ *  }
+ * \endcode
+ * </pre>
+ * Traverse the text backwards, from end to start
+ * <pre>
+ * \code
+ *  void traverseBackward(CharacterIterator& iter)
+ *  {
+ *      for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
+ *          processChar(c);
+ *      }
+ *  }
+ * \endcode
+ * </pre>
+ * Traverse both forward and backward from a given position in the text. 
+ * Calls to notBoundary() in this example represents some additional stopping criteria.
+ * <pre>
+ * \code
+ * void traverseOut(CharacterIterator& iter, int32_t pos)
+ * {
+ *      UChar c;
+ *      for (c = iter.setIndex(pos);
+ *      c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
+ *          c = iter.next()) {}
+ *      int32_t end = iter.getIndex();
+ *      for (c = iter.setIndex(pos);
+ *          c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
+ *          c = iter.previous()) {}
+ *      int32_t start = iter.getIndex() + 1;
+ *  
+ *      cout << "start: " << start << " end: " << end << endl;
+ *      for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
+ *          processChar(c);
+ *     }
+ *  }
+ * \endcode
+ * </pre>
+ * Creating a StringCharacterIterator and calling the test functions
+ * <pre>
+ * \code
+ *  void CharacterIterator_Example( void )
+ *   {
+ *       cout << endl << "===== CharacterIterator_Example: =====" << endl;
+ *       UnicodeString text("Ein kleiner Satz.");
+ *       StringCharacterIterator iterator(text);
+ *       cout << "----- traverseForward: -----------" << endl;
+ *       traverseForward( iterator );
+ *       cout << endl << endl << "----- traverseBackward: ----------" << endl;
+ *       traverseBackward( iterator );
+ *       cout << endl << endl << "----- traverseOut: ---------------" << endl;
+ *       traverseOut( iterator, 7 );
+ *       cout << endl << endl << "-----" << endl;
+ *   }
+ * \endcode
+ * </pre>
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
+public:
+    /**
+     * Origin enumeration for the move() and move32() functions.
+     * @stable ICU 2.0
+     */
+    enum EOrigin { kStart, kCurrent, kEnd };
+
+    /**
+     * Returns a pointer to a new CharacterIterator of the same
+     * concrete class as this one, and referring to the same
+     * character in the same text-storage object as this one.  The
+     * caller is responsible for deleting the new clone.  
+     * @return a pointer to a new CharacterIterator
+     * @stable ICU 2.0
+     */
+    virtual CharacterIterator* clone(void) const = 0;
+
+    /**
+     * Sets the iterator to refer to the first code unit in its
+     * iteration range, and returns that code unit.
+     * This can be used to begin an iteration with next().
+     * @return the first code unit in its iteration range.
+     * @stable ICU 2.0
+     */
+    virtual UChar         first(void) = 0;
+
+    /**
+     * Sets the iterator to refer to the first code unit in its
+     * iteration range, returns that code unit, and moves the position
+     * to the second code unit. This is an alternative to setToStart()
+     * for forward iteration with nextPostInc().
+     * @return the first code unit in its iteration range.
+     * @stable ICU 2.0
+     */
+    virtual UChar         firstPostInc(void);
+
+    /**
+     * Sets the iterator to refer to the first code point in its
+     * iteration range, and returns that code unit,
+     * This can be used to begin an iteration with next32().
+     * Note that an iteration with next32PostInc(), beginning with,
+     * e.g., setToStart() or firstPostInc(), is more efficient.
+     * @return the first code point in its iteration range.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       first32(void) = 0;
+
+    /**
+     * Sets the iterator to refer to the first code point in its
+     * iteration range, returns that code point, and moves the position
+     * to the second code point. This is an alternative to setToStart()
+     * for forward iteration with next32PostInc().
+     * @return the first code point in its iteration range.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       first32PostInc(void);
+
+    /**
+     * Sets the iterator to refer to the first code unit or code point in its
+     * iteration range. This can be used to begin a forward
+     * iteration with nextPostInc() or next32PostInc().
+     * @return the start position of the iteration range
+     * @stable ICU 2.0
+     */
+    inline int32_t    setToStart();
+
+    /**
+     * Sets the iterator to refer to the last code unit in its
+     * iteration range, and returns that code unit.
+     * This can be used to begin an iteration with previous().
+     * @return the last code unit.
+     * @stable ICU 2.0
+     */
+    virtual UChar         last(void) = 0;
+        
+    /**
+     * Sets the iterator to refer to the last code point in its
+     * iteration range, and returns that code unit.
+     * This can be used to begin an iteration with previous32().
+     * @return the last code point.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       last32(void) = 0;
+
+    /**
+     * Sets the iterator to the end of its iteration range, just behind
+     * the last code unit or code point. This can be used to begin a backward
+     * iteration with previous() or previous32().
+     * @return the end position of the iteration range
+     * @stable ICU 2.0
+     */
+    inline int32_t    setToEnd();
+
+    /**
+     * Sets the iterator to refer to the "position"-th code unit
+     * in the text-storage object the iterator refers to, and
+     * returns that code unit.  
+     * @param position the "position"-th code unit in the text-storage object
+     * @return the "position"-th code unit.
+     * @stable ICU 2.0
+     */
+    virtual UChar         setIndex(int32_t position) = 0;
+
+    /**
+     * Sets the iterator to refer to the beginning of the code point
+     * that contains the "position"-th code unit
+     * in the text-storage object the iterator refers to, and
+     * returns that code point.
+     * The current position is adjusted to the beginning of the code point
+     * (its first code unit).
+     * @param position the "position"-th code unit in the text-storage object
+     * @return the "position"-th code point.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       setIndex32(int32_t position) = 0;
+
+    /**
+     * Returns the code unit the iterator currently refers to. 
+     * @return the current code unit. 
+     * @stable ICU 2.0
+     */
+    virtual UChar         current(void) const = 0;
+        
+    /**
+     * Returns the code point the iterator currently refers to.  
+     * @return the current code point.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       current32(void) const = 0;
+        
+    /**
+     * Advances to the next code unit in the iteration range
+     * (toward endIndex()), and returns that code unit.  If there are
+     * no more code units to return, returns DONE.
+     * @return the next code unit.
+     * @stable ICU 2.0
+     */
+    virtual UChar         next(void) = 0;
+        
+    /**
+     * Advances to the next code point in the iteration range
+     * (toward endIndex()), and returns that code point.  If there are
+     * no more code points to return, returns DONE.
+     * Note that iteration with "pre-increment" semantics is less
+     * efficient than iteration with "post-increment" semantics
+     * that is provided by next32PostInc().
+     * @return the next code point.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       next32(void) = 0;
+        
+    /**
+     * Advances to the previous code unit in the iteration range
+     * (toward startIndex()), and returns that code unit.  If there are
+     * no more code units to return, returns DONE.  
+     * @return the previous code unit.
+     * @stable ICU 2.0
+     */
+    virtual UChar         previous(void) = 0;
+
+    /**
+     * Advances to the previous code point in the iteration range
+     * (toward startIndex()), and returns that code point.  If there are
+     * no more code points to return, returns DONE. 
+     * @return the previous code point. 
+     * @stable ICU 2.0
+     */
+    virtual UChar32       previous32(void) = 0;
+
+    /**
+     * Returns FALSE if there are no more code units or code points
+     * before the current position in the iteration range.
+     * This is used with previous() or previous32() in backward
+     * iteration.
+     * @return FALSE if there are no more code units or code points
+     * before the current position in the iteration range, return TRUE otherwise.
+     * @stable ICU 2.0
+     */
+    virtual UBool        hasPrevious() = 0;
+
+    /**
+     * Returns the numeric index in the underlying text-storage
+     * object of the character returned by first().  Since it's
+     * possible to create an iterator that iterates across only
+     * part of a text-storage object, this number isn't
+     * necessarily 0.  
+     * @returns the numeric index in the underlying text-storage
+     * object of the character returned by first().
+     * @stable ICU 2.0
+     */
+    inline int32_t       startIndex(void) const;
+        
+    /**
+     * Returns the numeric index in the underlying text-storage
+     * object of the position immediately BEYOND the character
+     * returned by last().  
+     * @return the numeric index in the underlying text-storage
+     * object of the position immediately BEYOND the character
+     * returned by last().
+     * @stable ICU 2.0
+     */
+    inline int32_t       endIndex(void) const;
+        
+    /**
+     * Returns the numeric index in the underlying text-storage
+     * object of the character the iterator currently refers to
+     * (i.e., the character returned by current()).  
+     * @return the numberic index in the text-storage object of 
+     * the character the iterator currently refers to
+     * @stable ICU 2.0
+     */
+    inline int32_t       getIndex(void) const;
+
+    /**
+     * Returns the length of the entire text in the underlying
+     * text-storage object.
+     * @return the length of the entire text in the text-storage object
+     * @stable ICU 2.0
+     */
+    inline int32_t           getLength() const;
+
+    /**
+     * Moves the current position relative to the start or end of the
+     * iteration range, or relative to the current position itself.
+     * The movement is expressed in numbers of code units forward
+     * or backward by specifying a positive or negative delta.
+     * @param delta the position relative to origin. A positive delta means forward;
+     * a negative delta means backward.
+     * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+     * @return the new position
+     * @stable ICU 2.0
+     */
+    virtual int32_t      move(int32_t delta, EOrigin origin) = 0;
+
+    /**
+     * Moves the current position relative to the start or end of the
+     * iteration range, or relative to the current position itself.
+     * The movement is expressed in numbers of code points forward
+     * or backward by specifying a positive or negative delta.
+     * @param delta the position relative to origin. A positive delta means forward;
+     * a negative delta means backward.
+     * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+     * @return the new position
+     * @stable ICU 2.0
+     */
+    virtual int32_t      move32(int32_t delta, EOrigin origin) = 0;
+
+    /**
+     * Copies the text under iteration into the UnicodeString
+     * referred to by "result".  
+     * @param result Receives a copy of the text under iteration.  
+     * @stable ICU 2.0
+     */
+    virtual void            getText(UnicodeString&  result) = 0;
+
+protected:
+    /**
+     * Empty constructor.
+     * @stable ICU 2.0
+     */
+    CharacterIterator();
+
+    /**
+     * Constructor, just setting the length field in this base class.
+     * @stable ICU 2.0
+     */
+    CharacterIterator(int32_t length);
+
+    /**
+     * Constructor, just setting the length and position fields in this base class.
+     * @stable ICU 2.0
+     */
+    CharacterIterator(int32_t length, int32_t position);
+
+    /**
+     * Constructor, just setting the length, start, end, and position fields in this base class.
+     * @stable ICU 2.0
+     */
+    CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
+  
+    /**
+     * Copy constructor.
+     *
+     * @param that The CharacterIterator to be copied
+     * @stable ICU 2.0
+     */
+    CharacterIterator(const CharacterIterator &that);
+
+    /**
+     * Assignment operator.  Sets this CharacterIterator to have the same behavior,
+     * as the one passed in.
+     * @param that The CharacterIterator passed in.
+     * @return the newly set CharacterIterator.
+     * @stable ICU 2.0
+     */
+    CharacterIterator &operator=(const CharacterIterator &that);
+
+    /**
+     * Base class text length field.
+     * Necessary this for correct getText() and hashCode().
+     * @stable ICU 2.0
+     */
+    int32_t textLength;
+
+    /**
+     * Base class field for the current position.
+     * @stable ICU 2.0
+     */
+    int32_t  pos;
+
+    /**
+     * Base class field for the start of the iteration range.
+     * @stable ICU 2.0
+     */
+    int32_t  begin;
+
+    /**
+     * Base class field for the end of the iteration range.
+     * @stable ICU 2.0
+     */
+    int32_t  end;
+};
+
+inline UBool
+ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
+    return !operator==(that);
+}
+
+inline int32_t
+CharacterIterator::setToStart() {
+    return move(0, kStart);
+}
+
+inline int32_t
+CharacterIterator::setToEnd() {
+    return move(0, kEnd);
+}
+
+inline int32_t
+CharacterIterator::startIndex(void) const {
+    return begin;
+}
+
+inline int32_t
+CharacterIterator::endIndex(void) const {
+    return end;
+}
+
+inline int32_t
+CharacterIterator::getIndex(void) const {
+    return pos;
+}
+
+inline int32_t
+CharacterIterator::getLength(void) const {
+    return textLength;
+}
+
+U_NAMESPACE_END
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/choicfmt.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/choicfmt.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/choicfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,746 +0,0 @@
-/*
-********************************************************************************
-*   Copyright (C) 1997-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-********************************************************************************
-*
-* File CHOICFMT.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   02/19/97    aliu        Converted from java.
-*   03/20/97    helena      Finished first cut of implementation and got rid 
-*                           of nextDouble/previousDouble and replaced with
-*                           boolean array.
-*   4/10/97     aliu        Clean up.  Modified to work on AIX.
-*   8/6/97      nos         Removed overloaded constructor, member var 'buffer'.
-*   07/22/98    stephen     Removed operator!= (implemented in Format)
-********************************************************************************
-*/
- 
-#ifndef CHOICFMT_H
-#define CHOICFMT_H
- 
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Choice Format.
- */
- 
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/unistr.h"
-#include "unicode/numfmt.h"
-#include "unicode/fieldpos.h"
-#include "unicode/format.h"
-
-U_NAMESPACE_BEGIN
-
-class MessageFormat;
-
-/**
- * ChoiceFormat converts between ranges of numeric values
- * and string names for those ranges. A <code>ChoiceFormat</code> splits
- * the real number line <code>-Inf</code> to <code>+Inf</code> into two
- * or more contiguous ranges. Each range is mapped to a
- * string. <code>ChoiceFormat</code> is generally used in a
- * <code>MessageFormat</code> for displaying grammatically correct
- * plurals such as &quot;There are 2 files.&quot;</p>
- * 
- * <p>There are two methods of defining a <code>ChoiceFormat</code>; both
- * are equivalent.  The first is by using a string pattern. This is the
- * preferred method in most cases.  The second method is through direct
- * specification of the arrays that make up the
- * <code>ChoiceFormat</code>.</p>
- * 
- * <p><strong>Patterns</strong></p>
- * 
- * <p>In most cases, the preferred way to define a
- * <code>ChoiceFormat</code> is with a pattern. Here is an example of a
- * <code>ChoiceFormat</code> pattern:</p>
- *
- * \htmlonly<pre>    0&#x2264;are no files|1&#x2264;is one file|1&lt;are many files</pre>\endhtmlonly
- * 
- * <p>or equivalently,</p>
- * 
- * \htmlonly<pre>    0#are no files|1#is one file|1&lt;are many files</pre>\endhtmlonly
- * 
- * <p>The pattern consists of a number or <em>range specifiers</em>
- * separated by vertical bars '|' (U+007C). There is no
- * vertical bar after the last range.  Each range specifier is of the
- * form:</p>
- *
- * \htmlonly<blockquote><em>Number Separator String</em></blockquote>\endhtmlonly
- * 
- * <p><em>Number</em> is a floating point number that can be parsed by a
- * default <code>NumberFormat</code> for the US locale. It gives the
- * lower limit of this range. The lower limit is either inclusive or
- * exclusive, depending on the <em>separator</em>. The upper limit is
- * given by the lower limit of the next range.  The Unicode infinity
- * sign \htmlonly&#x221E \endhtmlonly (U+221E) is recognized for positive infinity. It may be preceded by
- * '-' (U+002D) to indicate negative infinity.</p>
- * 
- * <p><em>String</em> is the format string for this range, with special
- * characters enclosed in single quotes (<code>'The #
- * sign'</code>). Single quotes themselves are indicated by two single
- * quotes in a row (<code>'o''clock'</code>).</p>
- * 
- * <p><em>Separator</em> is one of the following single characters:
- * 
- * <ul>
- *   <li>\htmlonly'&#x2264;' \endhtmlonly (U+2264) or '#' (U+0023)
- *   indicates that the lower limit given by <em>Number</em> is
- *   inclusive.  (The two characters are equivalent to ChoiceFormat.)
- *   This means that the limit value <em>Number</em> belongs to this
- *   range.  Another way of saying this is that the corresponding
- *   closure is <code>FALSE</code>.</li>
- *
- *   <li>'<' (U+003C) indicates that the lower limit given by
- *   <em>Number</em> is exclusive.  This means that the value
- *   <em>Number</em> belongs to the prior range.</li> Another way of
- *   saying this is that the corresponding closure is
- *   <code>TRUE</code>.
- * </ul>
- * 
- * <p>See below for more information about closures.</p>
- * 
- * <p><strong>Arrays</strong></p>
- * 
- * <p>A <code>ChoiceFormat</code> defining <code>n</code> intervals
- * (<code>n</code> &gt;= 2) is specified by three arrays of
- * <code>n</code> items:
- * 
- * <ul>
- *   <li><code>double limits[]</code> gives the start of each
- *     interval. This must be a non-decreasing list of values, none of
- *     which may be <code>NaN</code>.</li>
- *   <li><code>UBool closures[]</code> determines whether each limit
- *     value is contained in the interval below it or in the interval
- *     above it. If <code>closures[i]</code> is <code>FALSE</code>, then
- *     <code>limits[i]</code> is a member of interval
- *     <code>i</code>. Otherwise it is a member of interval
- *     <code>i+1</code>. If no closures array is specified, this is
- *     equivalent to having all closures be <code>FALSE</code>. Closures
- *     allow one to specify half-open, open, or closed intervals.</li>
- *   <li><code>UnicodeString formats[]</code> gives the string label
- *     associated with each interval.</li>
- * </ul>
- * 
- * <p><strong>Formatting and Parsing</strong></p>
- * 
- * <p>During formatting, a number is converted to a
- * string. <code>ChoiceFormat</code> accomplishes this by mapping the
- * number to an interval using the following rule. Given a number
- * <code>X</code> and and index value <code>j</code> in the range
- * <code>0..n-1</code>, where <code>n</code> is the number of ranges:</p>
- * 
- * \htmlonly<blockquote>\endhtmlonly<code>X</code> matches <code>j</code> if and only if
- * <code>limit[j] &lt;= X &lt; limit[j+1]</code>
- * \htmlonly</blockquote>\endhtmlonly
- * 
- * <p>(This assumes that all closures are <code>FALSE</code>.  If some
- * closures are <code>TRUE</code> then the relations must be changed to
- * <code>&lt;=</code> or <code>&lt;</code> as appropriate.) If there is
- * no match, then either the first or last index is used, depending on
- * whether the number is too low or too high. Once a number is mapped to
- * an interval <code>j</code>, the string <code>formats[j]</code> is
- * output.</p>
- * 
- * <p>During parsing, a string is converted to a
- * number. <code>ChoiceFormat</code> finds the element
- * <code>formats[j]</code> equal to the string, and returns
- * <code>limits[j]</code> as the parsed value.</p>
- * 
- * <p><strong>Notes</strong></p>
- * 
- * <p>The first limit value does not define a range boundary. For
- * example, in the pattern \htmlonly&quot;<code>1.0#a|2.0#b</code>&quot;\endhtmlonly, the
- * intervals are [-Inf, 2.0) and [2.0, +Inf].  It appears that the first
- * interval should be [1.0, 2.0).  However, since all values that are too
- * small are mapped to range zero, the first interval is effectively
- * [-Inf, 2.0).  However, the first limit value <em>is</em> used during
- * formatting. In this example, <code>parse(&quot;a&quot;)</code> returns
- * 1.0.</p>
- * 
- * <p>There are no gaps between intervals and the entire number line is
- * covered.  A <code>ChoiceFormat</code> maps <em>all</em> possible
- * double values to a finite set of intervals.</p>
- * 
- * <p>The non-number <code>NaN</code> is mapped to interval zero during
- * formatting.</p>
- * 
- * <p><strong>Examples</strong></p>
- * 
- * <p>Here is an example of two arrays that map the number
- * <code>1..7</code> to the English day of the week abbreviations
- * <code>Sun..Sat</code>. No closures array is given; this is the same as
- * specifying all closures to be <code>FALSE</code>.</p>
- * 
- * <pre>    {1,2,3,4,5,6,7},
- *     {&quot;Sun&quot;,&quot;Mon&quot;,&quot;Tue&quot;,&quot;Wed&quot;,&quot;Thur&quot;,&quot;Fri&quot;,&quot;Sat&quot;}</pre>
- * 
- * <p>Here is an example that maps the ranges [-Inf, 1), [1, 1], and (1,
- * +Inf] to three strings. That is, the number line is split into three
- * ranges: x &lt; 1.0, x = 1.0, and x &gt; 1.0.</p>
- * 
- * <pre>    {0, 1, 1},
- *     {FALSE, FALSE, TRUE},
- *     {&quot;no files&quot;, &quot;one file&quot;, &quot;many files&quot;}</pre>
- * 
- * <p>Here is a simple example that shows formatting and parsing: </p>
- * 
- * \code
- *   #include <unicode/choicfmt.h>
- *   #include <unicode/unistr.h>
- *   #include <iostream.h>
- *   
- *   int main(int argc, char *argv[]) {
- *       double limits[] = {1,2,3,4,5,6,7};
- *       UnicodeString monthNames[] = {
- *           "Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
- *       ChoiceFormat fmt(limits, monthNames, 7);
- *       UnicodeString str;
- *       char buf[256];
- *       for (double x = 1.0; x <= 8.0; x += 1.0) {
- *           fmt.format(x, str);
- *           str.extract(0, str.length(), buf, 256, "");
- *           str.truncate(0);
- *           cout << x << " -> "
- *                << buf << endl;
- *       }
- *       cout << endl;
- *       return 0;
- *   }
- * \endcode
- * 
- * <p>Here is a more complex example using a <code>ChoiceFormat</code>
- * constructed from a pattern together with a
- * <code>MessageFormat</code>.</p>
- * 
- * \code
- *   #include <unicode/choicfmt.h>
- *   #include <unicode/msgfmt.h>
- *   #include <unicode/unistr.h>
- *   #include <iostream.h>
- * 
- *   int main(int argc, char *argv[]) {
- *       UErrorCode status = U_ZERO_ERROR;
- *       double filelimits[] = {0,1,2};
- *       UnicodeString filepart[] =
- *           {"are no files","is one file","are {0} files"};
- *       ChoiceFormat* fileform = new ChoiceFormat(filelimits, filepart, 3 );
- *       Format* testFormats[] =
- *           {fileform, NULL, NumberFormat::createInstance(status)};
- *       MessageFormat pattform("There {0} on {1}", status );
- *       pattform.adoptFormats(testFormats, 3);
- *       Formattable testArgs[] = {0L, "Disk A"};
- *       FieldPosition fp(0);
- *       UnicodeString str;
- *       char buf[256];
- *       for (int32_t i = 0; i < 4; ++i) {
- *           Formattable fInt(i);
- *           testArgs[0] = fInt;
- *           pattform.format(testArgs, 2, str, fp, status );
- *           str.extract(0, str.length(), buf, "");
- *           str.truncate(0);
- *           cout << "Output for i=" << i << " : " << buf << endl;
- *       }
- *       cout << endl;
- *       return 0;
- *   }
- * \endcode
- *
- * <p><em>User subclasses are not supported.</em> While clients may write
- * subclasses, such code will not necessarily work and will not be
- * guaranteed to work stably from release to release.
- */
-class U_I18N_API ChoiceFormat: public NumberFormat {
-public:
-    /**
-     * Construct a new ChoiceFormat with the limits and the corresponding formats
-     * based on the pattern.
-     *
-     * @param pattern   Pattern used to construct object.
-     * @param status    Output param to receive success code.  If the
-     *                  pattern cannot be parsed, set to failure code.
-     * @stable ICU 2.0
-     */
-    ChoiceFormat(const UnicodeString& pattern,
-                 UErrorCode& status);
-
-
-    /**
-     * Construct a new ChoiceFormat with the given limits and formats.  Copy
-     * the limits and formats instead of adopting them.
-     *
-     * @param limits    Array of limit values.
-     * @param formats   Array of formats.
-     * @param count     Size of 'limits' and 'formats' arrays.
-     * @stable ICU 2.0
-     */
-    
-    ChoiceFormat(const double* limits,
-                 const UnicodeString* formats,
-                 int32_t count );
-
-    /**
-     * Construct a new ChoiceFormat with the given limits and formats.
-     * Copy the limits and formats (instead of adopting them).  By
-     * default, each limit in the array specifies the inclusive lower
-     * bound of its range, and the exclusive upper bound of the previous
-     * range.  However, if the isLimitOpen element corresponding to a
-     * limit is TRUE, then the limit is the exclusive lower bound of its
-     * range, and the inclusive upper bound of the previous range.
-     * @param limits Array of limit values
-     * @param closures Array of booleans specifying whether each
-     * element of 'limits' is open or closed.  If FALSE, then the
-     * corresponding limit is a member of the range above it.  If TRUE,
-     * then the limit belongs to the range below it.
-     * @param formats Array of formats
-     * @param count Size of 'limits', 'closures', and 'formats' arrays
-     * @stable ICU 2.4
-     */
-    ChoiceFormat(const double* limits,
-                 const UBool* closures,
-                 const UnicodeString* formats,
-                 int32_t count);
-
-    /**
-     * Copy constructor.
-     *
-     * @param that   ChoiceFormat object to be copied from
-     * @stable ICU 2.0
-     */
-    ChoiceFormat(const ChoiceFormat& that);
-
-    /**
-     * Assignment operator.
-     *
-     * @param that   ChoiceFormat object to be copied
-     * @stable ICU 2.0
-     */
-    const ChoiceFormat& operator=(const ChoiceFormat& that);
-
-    /**
-     * Destructor.
-     * @stable ICU 2.0
-     */
-    virtual ~ChoiceFormat();
-
-    /**
-     * Clone this Format object polymorphically. The caller owns the
-     * result and should delete it when done.
-     *
-     * @return a copy of this object
-     * @stable ICU 2.0
-     */
-    virtual Format* clone(void) const;
-
-    /**
-     * Return true if the given Format objects are semantically equal.
-     * Objects of different subclasses are considered unequal.
-     *
-     * @param other    ChoiceFormat object to be compared 
-     * @return         true if other is the same as this. 
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const Format& other) const;
-
-    /**
-     * Sets the pattern.
-     * @param pattern   The pattern to be applied.
-     * @param status    Output param set to success/failure code on
-     *                  exit. If the pattern is invalid, this will be
-     *                  set to a failure result.
-     * @stable ICU 2.0
-     */
-    virtual void applyPattern(const UnicodeString& pattern,
-                              UErrorCode& status);
-
-    /**
-     * Sets the pattern.
-     * @param pattern    The pattern to be applied.
-     * @param parseError Struct to recieve information on position 
-     *                   of error if an error is encountered
-     * @param status     Output param set to success/failure code on
-     *                   exit. If the pattern is invalid, this will be
-     *                   set to a failure result.
-     * @stable ICU 2.0
-     */
-    virtual void applyPattern(const UnicodeString& pattern,
-                             UParseError& parseError,
-                             UErrorCode& status);
-    /**
-     * Gets the pattern.
-     * 
-     * @param pattern    Output param which will recieve the pattern
-     *                   Previous contents are deleted.
-     * @return    A reference to 'pattern'
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& toPattern(UnicodeString &pattern) const;
-
-    /**
-     * Set the choices to be used in formatting.
-     *
-     * @param limitsToCopy      Contains the top value that you want
-     *                          parsed with that format,and should be in
-     *                          ascending sorted order. When formatting X,
-     *                          the choice will be the i, where limit[i]
-     *                          &lt;= X &lt; limit[i+1].
-     * @param formatsToCopy     The format strings you want to use for each limit.
-     * @param count             The size of the above arrays.
-     * @stable ICU 2.0
-     */
-    virtual void setChoices(const double* limitsToCopy,
-                            const UnicodeString* formatsToCopy,
-                            int32_t count );    
-
-    /**
-     * Set the choices to be used in formatting.  See class description
-     * for documenatation of the limits, closures, and formats arrays.
-     * @param limits Array of limits
-     * @param closures Array of limit booleans
-     * @param formats Array of format string
-     * @param count The size of the above arrays
-     * @stable ICU 2.4
-     */
-    virtual void setChoices(const double* limits,
-                            const UBool* closures,
-                            const UnicodeString* formats,
-                            int32_t count);
-
-    /**
-     * Get the limits passed in the constructor.
-     *
-     * @param count    The size of the limits arrays
-     * @return the limits.
-     * @stable ICU 2.0
-     */
-    virtual const double* getLimits(int32_t& count) const;
-    
-    /**
-     * Get the limit booleans passed in the constructor.  The caller
-     * must not delete the result.
-     *
-     * @param count   The size of the arrays
-     * @return the closures
-     * @stable ICU 2.4
-     */
-    virtual const UBool* getClosures(int32_t& count) const;
-
-    /**
-     * Get the formats passed in the constructor.
-     *
-     * @param count   The size of the arrays
-     * @return the formats.
-     * @stable ICU 2.0
-     */
-    virtual const UnicodeString* getFormats(int32_t& count) const;
-
-    /**
-     * Format a double or long number using this object's choices.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& format(double number,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos) const;
-    /**
-     * Format a int_32t number using this object's choices.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& format(int32_t number,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos) const;
-
-    /**
-     * Format an int64_t number using this object's choices.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.8
-     */
-    virtual UnicodeString& format(int64_t number,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos) const;
-
-    /**
-     * Format an array of objects using this object's choices.
-     *
-     * @param objs      The array of objects to be formatted.
-     * @param cnt       The size of objs.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @param success   Output param set to success/failure code on
-     *                  exit. 
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& format(const Formattable* objs,
-                                  int32_t cnt,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos,
-                                  UErrorCode& success) const;
-    /**
-     * Format an object using this object's choices.
-     *
-     *
-     * @param obj       The object to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @param status    Output param set to success/failure code on
-     *                  exit. 
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& format(const Formattable& obj,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos, 
-                                  UErrorCode& status) const;
-
-    /**
-     * Redeclared NumberFormat method.
-     *
-     * @param obj       The object to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param status    Output param set to success/failure code on
-     *                  exit. 
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(const Formattable& obj,
-                          UnicodeString& appendTo,
-                          UErrorCode& status) const;
-
-    /**
-     * Redeclared NumberFormat method.
-     * Format a double number. These methods call the NumberFormat
-     * pure virtual format() methods with the default FieldPosition.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(  double number,
-                            UnicodeString& appendTo) const;
-
-    /**
-     * Redeclared NumberFormat method.
-     * Format a long number. These methods call the NumberFormat
-     * pure virtual format() methods with the default FieldPosition.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(  int32_t number,
-                            UnicodeString& appendTo) const;
-
-   /**
-    * Return a long if possible (e.g. within range LONG_MAX,
-    * LONG_MAX], and with no decimals), otherwise a double.  If
-    * IntegerOnly is set, will stop at a decimal point (or equivalent;
-    * e.g. for rational numbers "1 2/3", will stop after the 1).
-    * <P>
-    * If no object can be parsed, parsePosition is unchanged, and NULL is
-    * returned.
-    *
-    * @param text           The text to be parsed.
-    * @param result         Formattable to be set to the parse result.
-    *                       If parse fails, return contents are undefined.
-    * @param parsePosition  The position to start parsing at on input.
-    *                       On output, moved to after the last successfully
-    *                       parse character. On parse failure, does not change.
-    * @see                  NumberFormat::isParseIntegerOnly
-    * @stable ICU 2.0
-    */
-    virtual void parse(const UnicodeString& text,
-                       Formattable& result,
-                       ParsePosition& parsePosition) const;
-    
-    /**
-    * Return a long if possible (e.g. within range LONG_MAX,
-    * LONG_MAX], and with no decimals), otherwise a double.  If
-    * IntegerOnly is set, will stop at a decimal point (or equivalent;
-    * e.g. for rational numbers "1 2/3", will stop after the 1).
-    * <P>
-    * If no object can be parsed, parsePosition is unchanged, and NULL is
-    * returned.
-    *
-    * @param text           The text to be parsed.
-    * @param result         Formattable to be set to the parse result.
-    *                       If parse fails, return contents are undefined.
-    * @param status         Output param with the formatted string.
-    * @see                  NumberFormat::isParseIntegerOnly
-    * @stable ICU 2.0
-    */
-    virtual void parse(const UnicodeString& text,
-                       Formattable& result,
-                       UErrorCode& status) const;
-    
-    
-public:
-    /**
-     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
-     * This method is to implement a simple version of RTTI, since not all
-     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
-     * clone() methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-
-    /**
-     * Return the class ID for this class.  This is useful only for
-     * comparing to a return value from getDynamicClassID().  For example:
-     * <pre>
-     * .       Base* polymorphic_pointer = createPolymorphicObject();
-     * .       if (polymorphic_pointer->getDynamicClassID() ==
-     * .           Derived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-private:
-    // static cache management (thread-safe)
-  //  static NumberFormat* getNumberFormat(UErrorCode &status); // call this function to 'check out' a numberformat from the cache.
-  //  static void          releaseNumberFormat(NumberFormat *adopt); // call this function to 'return' the number format to the cache.
-    
-    /**
-     * Converts a string to a double value using a default NumberFormat object
-     * which is static (shared by all ChoiceFormat instances).
-     * @param string the string to be converted with.
-     * @return the converted double number.
-     */
-    static double stod(const UnicodeString& string);
-
-    /**
-     * Converts a double value to a string using a default NumberFormat object
-     * which is static (shared by all ChoiceFormat instances).
-     * @param value the double number to be converted with.
-     * @param string the result string.
-     * @return the converted string.
-     */
-    static UnicodeString& dtos(double value, UnicodeString& string);
-
-    ChoiceFormat(); // default constructor not implemented
-
-    /**
-     * Construct a new ChoiceFormat with the limits and the corresponding formats
-     * based on the pattern.
-     *
-     * @param newPattern   Pattern used to construct object.
-     * @param parseError   Struct to recieve information on position 
-     *                     of error if an error is encountered.
-     * @param status       Output param to receive success code.  If the
-     *                     pattern cannot be parsed, set to failure code.
-     * @stable ICU 2.0
-     */
-    ChoiceFormat(const UnicodeString& newPattern,
-                 UParseError& parseError,
-                 UErrorCode& status);
-
-    friend class MessageFormat;
-    /**
-     * Each ChoiceFormat divides the range -Inf..+Inf into fCount
-     * intervals.  The intervals are:
-     *
-     *         0: fChoiceLimits[0]..fChoiceLimits[1]
-     *         1: fChoiceLimits[1]..fChoiceLimits[2]
-     *        ...
-     *  fCount-2: fChoiceLimits[fCount-2]..fChoiceLimits[fCount-1]
-     *  fCount-1: fChoiceLimits[fCount-1]..+Inf
-     *
-     * Interval 0 is special; during formatting (mapping numbers to
-     * strings), it also contains all numbers less than
-     * fChoiceLimits[0], as well as NaN values.
-     *
-     * Interval i maps to and from string fChoiceFormats[i].  When
-     * parsing (mapping strings to numbers), then intervals map to
-     * their lower limit, that is, interval i maps to fChoiceLimit[i].
-     *
-     * The intervals may be closed, half open, or open.  This affects
-     * formatting but does not affect parsing.  Interval i is affected
-     * by fClosures[i] and fClosures[i+1].  If fClosures[i]
-     * is FALSE, then the value fChoiceLimits[i] is in interval i.
-     * That is, intervals i and i are:
-     *
-     *  i-1:                 ... x < fChoiceLimits[i]
-     *    i: fChoiceLimits[i] <= x ...
-     *
-     * If fClosures[i] is TRUE, then the value fChoiceLimits[i] is
-     * in interval i-1.  That is, intervals i-1 and i are:
-     *
-     *  i-1:                ... x <= fChoiceLimits[i]
-     *    i: fChoiceLimits[i] < x ...
-     *
-     * Because of the nature of interval 0, fClosures[0] has no
-     * effect.
-
-     */
-    double*         fChoiceLimits;
-    UBool*          fClosures;
-    UnicodeString*  fChoiceFormats;
-    int32_t         fCount;
-};
- 
-inline UnicodeString&
-ChoiceFormat::format(const Formattable& obj,
-                     UnicodeString& appendTo,
-                     UErrorCode& status) const {
-    // Don't use Format:: - use immediate base class only,
-    // in case immediate base modifies behavior later.
-    return NumberFormat::format(obj, appendTo, status);
-}
-
-inline UnicodeString&
-ChoiceFormat::format(double number,
-                     UnicodeString& appendTo) const {
-    return NumberFormat::format(number, appendTo);
-}
-
-inline UnicodeString&
-ChoiceFormat::format(int32_t number,
-                     UnicodeString& appendTo) const {
-    return NumberFormat::format(number, appendTo);
-}
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _CHOICFMT
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/choicfmt.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/choicfmt.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/choicfmt.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/choicfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,746 @@
+/*
+********************************************************************************
+*   Copyright (C) 1997-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+********************************************************************************
+*
+* File CHOICFMT.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   02/19/97    aliu        Converted from java.
+*   03/20/97    helena      Finished first cut of implementation and got rid 
+*                           of nextDouble/previousDouble and replaced with
+*                           boolean array.
+*   4/10/97     aliu        Clean up.  Modified to work on AIX.
+*   8/6/97      nos         Removed overloaded constructor, member var 'buffer'.
+*   07/22/98    stephen     Removed operator!= (implemented in Format)
+********************************************************************************
+*/
+ 
+#ifndef CHOICFMT_H
+#define CHOICFMT_H
+ 
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Choice Format.
+ */
+ 
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/unistr.h"
+#include "unicode/numfmt.h"
+#include "unicode/fieldpos.h"
+#include "unicode/format.h"
+
+U_NAMESPACE_BEGIN
+
+class MessageFormat;
+
+/**
+ * ChoiceFormat converts between ranges of numeric values
+ * and string names for those ranges. A <code>ChoiceFormat</code> splits
+ * the real number line <code>-Inf</code> to <code>+Inf</code> into two
+ * or more contiguous ranges. Each range is mapped to a
+ * string. <code>ChoiceFormat</code> is generally used in a
+ * <code>MessageFormat</code> for displaying grammatically correct
+ * plurals such as &quot;There are 2 files.&quot;</p>
+ * 
+ * <p>There are two methods of defining a <code>ChoiceFormat</code>; both
+ * are equivalent.  The first is by using a string pattern. This is the
+ * preferred method in most cases.  The second method is through direct
+ * specification of the arrays that make up the
+ * <code>ChoiceFormat</code>.</p>
+ * 
+ * <p><strong>Patterns</strong></p>
+ * 
+ * <p>In most cases, the preferred way to define a
+ * <code>ChoiceFormat</code> is with a pattern. Here is an example of a
+ * <code>ChoiceFormat</code> pattern:</p>
+ *
+ * \htmlonly<pre>    0&#x2264;are no files|1&#x2264;is one file|1&lt;are many files</pre>\endhtmlonly
+ * 
+ * <p>or equivalently,</p>
+ * 
+ * \htmlonly<pre>    0#are no files|1#is one file|1&lt;are many files</pre>\endhtmlonly
+ * 
+ * <p>The pattern consists of a number or <em>range specifiers</em>
+ * separated by vertical bars '|' (U+007C). There is no
+ * vertical bar after the last range.  Each range specifier is of the
+ * form:</p>
+ *
+ * \htmlonly<blockquote><em>Number Separator String</em></blockquote>\endhtmlonly
+ * 
+ * <p><em>Number</em> is a floating point number that can be parsed by a
+ * default <code>NumberFormat</code> for the US locale. It gives the
+ * lower limit of this range. The lower limit is either inclusive or
+ * exclusive, depending on the <em>separator</em>. The upper limit is
+ * given by the lower limit of the next range.  The Unicode infinity
+ * sign \htmlonly&#x221E \endhtmlonly (U+221E) is recognized for positive infinity. It may be preceded by
+ * '-' (U+002D) to indicate negative infinity.</p>
+ * 
+ * <p><em>String</em> is the format string for this range, with special
+ * characters enclosed in single quotes (<code>'The #
+ * sign'</code>). Single quotes themselves are indicated by two single
+ * quotes in a row (<code>'o''clock'</code>).</p>
+ * 
+ * <p><em>Separator</em> is one of the following single characters:
+ * 
+ * <ul>
+ *   <li>\htmlonly'&#x2264;' \endhtmlonly (U+2264) or '#' (U+0023)
+ *   indicates that the lower limit given by <em>Number</em> is
+ *   inclusive.  (The two characters are equivalent to ChoiceFormat.)
+ *   This means that the limit value <em>Number</em> belongs to this
+ *   range.  Another way of saying this is that the corresponding
+ *   closure is <code>FALSE</code>.</li>
+ *
+ *   <li>'<' (U+003C) indicates that the lower limit given by
+ *   <em>Number</em> is exclusive.  This means that the value
+ *   <em>Number</em> belongs to the prior range.</li> Another way of
+ *   saying this is that the corresponding closure is
+ *   <code>TRUE</code>.
+ * </ul>
+ * 
+ * <p>See below for more information about closures.</p>
+ * 
+ * <p><strong>Arrays</strong></p>
+ * 
+ * <p>A <code>ChoiceFormat</code> defining <code>n</code> intervals
+ * (<code>n</code> &gt;= 2) is specified by three arrays of
+ * <code>n</code> items:
+ * 
+ * <ul>
+ *   <li><code>double limits[]</code> gives the start of each
+ *     interval. This must be a non-decreasing list of values, none of
+ *     which may be <code>NaN</code>.</li>
+ *   <li><code>UBool closures[]</code> determines whether each limit
+ *     value is contained in the interval below it or in the interval
+ *     above it. If <code>closures[i]</code> is <code>FALSE</code>, then
+ *     <code>limits[i]</code> is a member of interval
+ *     <code>i</code>. Otherwise it is a member of interval
+ *     <code>i+1</code>. If no closures array is specified, this is
+ *     equivalent to having all closures be <code>FALSE</code>. Closures
+ *     allow one to specify half-open, open, or closed intervals.</li>
+ *   <li><code>UnicodeString formats[]</code> gives the string label
+ *     associated with each interval.</li>
+ * </ul>
+ * 
+ * <p><strong>Formatting and Parsing</strong></p>
+ * 
+ * <p>During formatting, a number is converted to a
+ * string. <code>ChoiceFormat</code> accomplishes this by mapping the
+ * number to an interval using the following rule. Given a number
+ * <code>X</code> and and index value <code>j</code> in the range
+ * <code>0..n-1</code>, where <code>n</code> is the number of ranges:</p>
+ * 
+ * \htmlonly<blockquote>\endhtmlonly<code>X</code> matches <code>j</code> if and only if
+ * <code>limit[j] &lt;= X &lt; limit[j+1]</code>
+ * \htmlonly</blockquote>\endhtmlonly
+ * 
+ * <p>(This assumes that all closures are <code>FALSE</code>.  If some
+ * closures are <code>TRUE</code> then the relations must be changed to
+ * <code>&lt;=</code> or <code>&lt;</code> as appropriate.) If there is
+ * no match, then either the first or last index is used, depending on
+ * whether the number is too low or too high. Once a number is mapped to
+ * an interval <code>j</code>, the string <code>formats[j]</code> is
+ * output.</p>
+ * 
+ * <p>During parsing, a string is converted to a
+ * number. <code>ChoiceFormat</code> finds the element
+ * <code>formats[j]</code> equal to the string, and returns
+ * <code>limits[j]</code> as the parsed value.</p>
+ * 
+ * <p><strong>Notes</strong></p>
+ * 
+ * <p>The first limit value does not define a range boundary. For
+ * example, in the pattern \htmlonly&quot;<code>1.0#a|2.0#b</code>&quot;\endhtmlonly, the
+ * intervals are [-Inf, 2.0) and [2.0, +Inf].  It appears that the first
+ * interval should be [1.0, 2.0).  However, since all values that are too
+ * small are mapped to range zero, the first interval is effectively
+ * [-Inf, 2.0).  However, the first limit value <em>is</em> used during
+ * formatting. In this example, <code>parse(&quot;a&quot;)</code> returns
+ * 1.0.</p>
+ * 
+ * <p>There are no gaps between intervals and the entire number line is
+ * covered.  A <code>ChoiceFormat</code> maps <em>all</em> possible
+ * double values to a finite set of intervals.</p>
+ * 
+ * <p>The non-number <code>NaN</code> is mapped to interval zero during
+ * formatting.</p>
+ * 
+ * <p><strong>Examples</strong></p>
+ * 
+ * <p>Here is an example of two arrays that map the number
+ * <code>1..7</code> to the English day of the week abbreviations
+ * <code>Sun..Sat</code>. No closures array is given; this is the same as
+ * specifying all closures to be <code>FALSE</code>.</p>
+ * 
+ * <pre>    {1,2,3,4,5,6,7},
+ *     {&quot;Sun&quot;,&quot;Mon&quot;,&quot;Tue&quot;,&quot;Wed&quot;,&quot;Thur&quot;,&quot;Fri&quot;,&quot;Sat&quot;}</pre>
+ * 
+ * <p>Here is an example that maps the ranges [-Inf, 1), [1, 1], and (1,
+ * +Inf] to three strings. That is, the number line is split into three
+ * ranges: x &lt; 1.0, x = 1.0, and x &gt; 1.0.</p>
+ * 
+ * <pre>    {0, 1, 1},
+ *     {FALSE, FALSE, TRUE},
+ *     {&quot;no files&quot;, &quot;one file&quot;, &quot;many files&quot;}</pre>
+ * 
+ * <p>Here is a simple example that shows formatting and parsing: </p>
+ * 
+ * \code
+ *   #include <unicode/choicfmt.h>
+ *   #include <unicode/unistr.h>
+ *   #include <iostream.h>
+ *   
+ *   int main(int argc, char *argv[]) {
+ *       double limits[] = {1,2,3,4,5,6,7};
+ *       UnicodeString monthNames[] = {
+ *           "Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
+ *       ChoiceFormat fmt(limits, monthNames, 7);
+ *       UnicodeString str;
+ *       char buf[256];
+ *       for (double x = 1.0; x <= 8.0; x += 1.0) {
+ *           fmt.format(x, str);
+ *           str.extract(0, str.length(), buf, 256, "");
+ *           str.truncate(0);
+ *           cout << x << " -> "
+ *                << buf << endl;
+ *       }
+ *       cout << endl;
+ *       return 0;
+ *   }
+ * \endcode
+ * 
+ * <p>Here is a more complex example using a <code>ChoiceFormat</code>
+ * constructed from a pattern together with a
+ * <code>MessageFormat</code>.</p>
+ * 
+ * \code
+ *   #include <unicode/choicfmt.h>
+ *   #include <unicode/msgfmt.h>
+ *   #include <unicode/unistr.h>
+ *   #include <iostream.h>
+ * 
+ *   int main(int argc, char *argv[]) {
+ *       UErrorCode status = U_ZERO_ERROR;
+ *       double filelimits[] = {0,1,2};
+ *       UnicodeString filepart[] =
+ *           {"are no files","is one file","are {0} files"};
+ *       ChoiceFormat* fileform = new ChoiceFormat(filelimits, filepart, 3 );
+ *       Format* testFormats[] =
+ *           {fileform, NULL, NumberFormat::createInstance(status)};
+ *       MessageFormat pattform("There {0} on {1}", status );
+ *       pattform.adoptFormats(testFormats, 3);
+ *       Formattable testArgs[] = {0L, "Disk A"};
+ *       FieldPosition fp(0);
+ *       UnicodeString str;
+ *       char buf[256];
+ *       for (int32_t i = 0; i < 4; ++i) {
+ *           Formattable fInt(i);
+ *           testArgs[0] = fInt;
+ *           pattform.format(testArgs, 2, str, fp, status );
+ *           str.extract(0, str.length(), buf, "");
+ *           str.truncate(0);
+ *           cout << "Output for i=" << i << " : " << buf << endl;
+ *       }
+ *       cout << endl;
+ *       return 0;
+ *   }
+ * \endcode
+ *
+ * <p><em>User subclasses are not supported.</em> While clients may write
+ * subclasses, such code will not necessarily work and will not be
+ * guaranteed to work stably from release to release.
+ */
+class U_I18N_API ChoiceFormat: public NumberFormat {
+public:
+    /**
+     * Construct a new ChoiceFormat with the limits and the corresponding formats
+     * based on the pattern.
+     *
+     * @param pattern   Pattern used to construct object.
+     * @param status    Output param to receive success code.  If the
+     *                  pattern cannot be parsed, set to failure code.
+     * @stable ICU 2.0
+     */
+    ChoiceFormat(const UnicodeString& pattern,
+                 UErrorCode& status);
+
+
+    /**
+     * Construct a new ChoiceFormat with the given limits and formats.  Copy
+     * the limits and formats instead of adopting them.
+     *
+     * @param limits    Array of limit values.
+     * @param formats   Array of formats.
+     * @param count     Size of 'limits' and 'formats' arrays.
+     * @stable ICU 2.0
+     */
+    
+    ChoiceFormat(const double* limits,
+                 const UnicodeString* formats,
+                 int32_t count );
+
+    /**
+     * Construct a new ChoiceFormat with the given limits and formats.
+     * Copy the limits and formats (instead of adopting them).  By
+     * default, each limit in the array specifies the inclusive lower
+     * bound of its range, and the exclusive upper bound of the previous
+     * range.  However, if the isLimitOpen element corresponding to a
+     * limit is TRUE, then the limit is the exclusive lower bound of its
+     * range, and the inclusive upper bound of the previous range.
+     * @param limits Array of limit values
+     * @param closures Array of booleans specifying whether each
+     * element of 'limits' is open or closed.  If FALSE, then the
+     * corresponding limit is a member of the range above it.  If TRUE,
+     * then the limit belongs to the range below it.
+     * @param formats Array of formats
+     * @param count Size of 'limits', 'closures', and 'formats' arrays
+     * @stable ICU 2.4
+     */
+    ChoiceFormat(const double* limits,
+                 const UBool* closures,
+                 const UnicodeString* formats,
+                 int32_t count);
+
+    /**
+     * Copy constructor.
+     *
+     * @param that   ChoiceFormat object to be copied from
+     * @stable ICU 2.0
+     */
+    ChoiceFormat(const ChoiceFormat& that);
+
+    /**
+     * Assignment operator.
+     *
+     * @param that   ChoiceFormat object to be copied
+     * @stable ICU 2.0
+     */
+    const ChoiceFormat& operator=(const ChoiceFormat& that);
+
+    /**
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~ChoiceFormat();
+
+    /**
+     * Clone this Format object polymorphically. The caller owns the
+     * result and should delete it when done.
+     *
+     * @return a copy of this object
+     * @stable ICU 2.0
+     */
+    virtual Format* clone(void) const;
+
+    /**
+     * Return true if the given Format objects are semantically equal.
+     * Objects of different subclasses are considered unequal.
+     *
+     * @param other    ChoiceFormat object to be compared 
+     * @return         true if other is the same as this. 
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const Format& other) const;
+
+    /**
+     * Sets the pattern.
+     * @param pattern   The pattern to be applied.
+     * @param status    Output param set to success/failure code on
+     *                  exit. If the pattern is invalid, this will be
+     *                  set to a failure result.
+     * @stable ICU 2.0
+     */
+    virtual void applyPattern(const UnicodeString& pattern,
+                              UErrorCode& status);
+
+    /**
+     * Sets the pattern.
+     * @param pattern    The pattern to be applied.
+     * @param parseError Struct to recieve information on position 
+     *                   of error if an error is encountered
+     * @param status     Output param set to success/failure code on
+     *                   exit. If the pattern is invalid, this will be
+     *                   set to a failure result.
+     * @stable ICU 2.0
+     */
+    virtual void applyPattern(const UnicodeString& pattern,
+                             UParseError& parseError,
+                             UErrorCode& status);
+    /**
+     * Gets the pattern.
+     * 
+     * @param pattern    Output param which will recieve the pattern
+     *                   Previous contents are deleted.
+     * @return    A reference to 'pattern'
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& toPattern(UnicodeString &pattern) const;
+
+    /**
+     * Set the choices to be used in formatting.
+     *
+     * @param limitsToCopy      Contains the top value that you want
+     *                          parsed with that format,and should be in
+     *                          ascending sorted order. When formatting X,
+     *                          the choice will be the i, where limit[i]
+     *                          &lt;= X &lt; limit[i+1].
+     * @param formatsToCopy     The format strings you want to use for each limit.
+     * @param count             The size of the above arrays.
+     * @stable ICU 2.0
+     */
+    virtual void setChoices(const double* limitsToCopy,
+                            const UnicodeString* formatsToCopy,
+                            int32_t count );    
+
+    /**
+     * Set the choices to be used in formatting.  See class description
+     * for documenatation of the limits, closures, and formats arrays.
+     * @param limits Array of limits
+     * @param closures Array of limit booleans
+     * @param formats Array of format string
+     * @param count The size of the above arrays
+     * @stable ICU 2.4
+     */
+    virtual void setChoices(const double* limits,
+                            const UBool* closures,
+                            const UnicodeString* formats,
+                            int32_t count);
+
+    /**
+     * Get the limits passed in the constructor.
+     *
+     * @param count    The size of the limits arrays
+     * @return the limits.
+     * @stable ICU 2.0
+     */
+    virtual const double* getLimits(int32_t& count) const;
+    
+    /**
+     * Get the limit booleans passed in the constructor.  The caller
+     * must not delete the result.
+     *
+     * @param count   The size of the arrays
+     * @return the closures
+     * @stable ICU 2.4
+     */
+    virtual const UBool* getClosures(int32_t& count) const;
+
+    /**
+     * Get the formats passed in the constructor.
+     *
+     * @param count   The size of the arrays
+     * @return the formats.
+     * @stable ICU 2.0
+     */
+    virtual const UnicodeString* getFormats(int32_t& count) const;
+
+    /**
+     * Format a double or long number using this object's choices.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& format(double number,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos) const;
+    /**
+     * Format a int_32t number using this object's choices.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& format(int32_t number,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos) const;
+
+    /**
+     * Format an int64_t number using this object's choices.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.8
+     */
+    virtual UnicodeString& format(int64_t number,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos) const;
+
+    /**
+     * Format an array of objects using this object's choices.
+     *
+     * @param objs      The array of objects to be formatted.
+     * @param cnt       The size of objs.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @param success   Output param set to success/failure code on
+     *                  exit. 
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& format(const Formattable* objs,
+                                  int32_t cnt,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos,
+                                  UErrorCode& success) const;
+    /**
+     * Format an object using this object's choices.
+     *
+     *
+     * @param obj       The object to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @param status    Output param set to success/failure code on
+     *                  exit. 
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& format(const Formattable& obj,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos, 
+                                  UErrorCode& status) const;
+
+    /**
+     * Redeclared NumberFormat method.
+     *
+     * @param obj       The object to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param status    Output param set to success/failure code on
+     *                  exit. 
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(const Formattable& obj,
+                          UnicodeString& appendTo,
+                          UErrorCode& status) const;
+
+    /**
+     * Redeclared NumberFormat method.
+     * Format a double number. These methods call the NumberFormat
+     * pure virtual format() methods with the default FieldPosition.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(  double number,
+                            UnicodeString& appendTo) const;
+
+    /**
+     * Redeclared NumberFormat method.
+     * Format a long number. These methods call the NumberFormat
+     * pure virtual format() methods with the default FieldPosition.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(  int32_t number,
+                            UnicodeString& appendTo) const;
+
+   /**
+    * Return a long if possible (e.g. within range LONG_MAX,
+    * LONG_MAX], and with no decimals), otherwise a double.  If
+    * IntegerOnly is set, will stop at a decimal point (or equivalent;
+    * e.g. for rational numbers "1 2/3", will stop after the 1).
+    * <P>
+    * If no object can be parsed, parsePosition is unchanged, and NULL is
+    * returned.
+    *
+    * @param text           The text to be parsed.
+    * @param result         Formattable to be set to the parse result.
+    *                       If parse fails, return contents are undefined.
+    * @param parsePosition  The position to start parsing at on input.
+    *                       On output, moved to after the last successfully
+    *                       parse character. On parse failure, does not change.
+    * @see                  NumberFormat::isParseIntegerOnly
+    * @stable ICU 2.0
+    */
+    virtual void parse(const UnicodeString& text,
+                       Formattable& result,
+                       ParsePosition& parsePosition) const;
+    
+    /**
+    * Return a long if possible (e.g. within range LONG_MAX,
+    * LONG_MAX], and with no decimals), otherwise a double.  If
+    * IntegerOnly is set, will stop at a decimal point (or equivalent;
+    * e.g. for rational numbers "1 2/3", will stop after the 1).
+    * <P>
+    * If no object can be parsed, parsePosition is unchanged, and NULL is
+    * returned.
+    *
+    * @param text           The text to be parsed.
+    * @param result         Formattable to be set to the parse result.
+    *                       If parse fails, return contents are undefined.
+    * @param status         Output param with the formatted string.
+    * @see                  NumberFormat::isParseIntegerOnly
+    * @stable ICU 2.0
+    */
+    virtual void parse(const UnicodeString& text,
+                       Formattable& result,
+                       UErrorCode& status) const;
+    
+    
+public:
+    /**
+     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
+     * This method is to implement a simple version of RTTI, since not all
+     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
+     * clone() methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+    /**
+     * Return the class ID for this class.  This is useful only for
+     * comparing to a return value from getDynamicClassID().  For example:
+     * <pre>
+     * .       Base* polymorphic_pointer = createPolymorphicObject();
+     * .       if (polymorphic_pointer->getDynamicClassID() ==
+     * .           Derived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+private:
+    // static cache management (thread-safe)
+  //  static NumberFormat* getNumberFormat(UErrorCode &status); // call this function to 'check out' a numberformat from the cache.
+  //  static void          releaseNumberFormat(NumberFormat *adopt); // call this function to 'return' the number format to the cache.
+    
+    /**
+     * Converts a string to a double value using a default NumberFormat object
+     * which is static (shared by all ChoiceFormat instances).
+     * @param string the string to be converted with.
+     * @return the converted double number.
+     */
+    static double stod(const UnicodeString& string);
+
+    /**
+     * Converts a double value to a string using a default NumberFormat object
+     * which is static (shared by all ChoiceFormat instances).
+     * @param value the double number to be converted with.
+     * @param string the result string.
+     * @return the converted string.
+     */
+    static UnicodeString& dtos(double value, UnicodeString& string);
+
+    ChoiceFormat(); // default constructor not implemented
+
+    /**
+     * Construct a new ChoiceFormat with the limits and the corresponding formats
+     * based on the pattern.
+     *
+     * @param newPattern   Pattern used to construct object.
+     * @param parseError   Struct to recieve information on position 
+     *                     of error if an error is encountered.
+     * @param status       Output param to receive success code.  If the
+     *                     pattern cannot be parsed, set to failure code.
+     * @stable ICU 2.0
+     */
+    ChoiceFormat(const UnicodeString& newPattern,
+                 UParseError& parseError,
+                 UErrorCode& status);
+
+    friend class MessageFormat;
+    /**
+     * Each ChoiceFormat divides the range -Inf..+Inf into fCount
+     * intervals.  The intervals are:
+     *
+     *         0: fChoiceLimits[0]..fChoiceLimits[1]
+     *         1: fChoiceLimits[1]..fChoiceLimits[2]
+     *        ...
+     *  fCount-2: fChoiceLimits[fCount-2]..fChoiceLimits[fCount-1]
+     *  fCount-1: fChoiceLimits[fCount-1]..+Inf
+     *
+     * Interval 0 is special; during formatting (mapping numbers to
+     * strings), it also contains all numbers less than
+     * fChoiceLimits[0], as well as NaN values.
+     *
+     * Interval i maps to and from string fChoiceFormats[i].  When
+     * parsing (mapping strings to numbers), then intervals map to
+     * their lower limit, that is, interval i maps to fChoiceLimit[i].
+     *
+     * The intervals may be closed, half open, or open.  This affects
+     * formatting but does not affect parsing.  Interval i is affected
+     * by fClosures[i] and fClosures[i+1].  If fClosures[i]
+     * is FALSE, then the value fChoiceLimits[i] is in interval i.
+     * That is, intervals i and i are:
+     *
+     *  i-1:                 ... x < fChoiceLimits[i]
+     *    i: fChoiceLimits[i] <= x ...
+     *
+     * If fClosures[i] is TRUE, then the value fChoiceLimits[i] is
+     * in interval i-1.  That is, intervals i-1 and i are:
+     *
+     *  i-1:                ... x <= fChoiceLimits[i]
+     *    i: fChoiceLimits[i] < x ...
+     *
+     * Because of the nature of interval 0, fClosures[0] has no
+     * effect.
+
+     */
+    double*         fChoiceLimits;
+    UBool*          fClosures;
+    UnicodeString*  fChoiceFormats;
+    int32_t         fCount;
+};
+ 
+inline UnicodeString&
+ChoiceFormat::format(const Formattable& obj,
+                     UnicodeString& appendTo,
+                     UErrorCode& status) const {
+    // Don't use Format:: - use immediate base class only,
+    // in case immediate base modifies behavior later.
+    return NumberFormat::format(obj, appendTo, status);
+}
+
+inline UnicodeString&
+ChoiceFormat::format(double number,
+                     UnicodeString& appendTo) const {
+    return NumberFormat::format(number, appendTo);
+}
+
+inline UnicodeString&
+ChoiceFormat::format(int32_t number,
+                     UnicodeString& appendTo) const {
+    return NumberFormat::format(number, appendTo);
+}
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _CHOICFMT
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/coleitr.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/coleitr.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/coleitr.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,400 +0,0 @@
-/*
- ******************************************************************************
- *   Copyright (C) 1997-2008, International Business Machines
- *   Corporation and others.  All Rights Reserved.
- ******************************************************************************
- */
-
-/**
- * \file 
- * \brief C++ API: Collation Element Iterator.
- */
-
-/**
-* File coleitr.h
-*
-* 
-*
-* Created by: Helena Shih
-*
-* Modification History:
-*
-*  Date       Name        Description
-*
-*  8/18/97    helena      Added internal API documentation.
-* 08/03/98    erm         Synched with 1.2 version CollationElementIterator.java
-* 12/10/99    aliu        Ported Thai collation support from Java.
-* 01/25/01    swquek      Modified into a C++ wrapper calling C APIs (ucoliter.h)
-* 02/19/01    swquek      Removed CollationElementsIterator() since it is 
-*                         private constructor and no calls are made to it
-*/
-
-#ifndef COLEITR_H
-#define COLEITR_H
-
-#include "unicode/utypes.h"
-
- 
-#if !UCONFIG_NO_COLLATION
-
-#include "unicode/uobject.h"
-#include "unicode/tblcoll.h"
-#include "unicode/ucoleitr.h"
-
-/** 
- * The UCollationElements struct.
- * For usage in C programs.
- * @stable ICU 2.0
- */
-typedef struct UCollationElements UCollationElements;
-
-U_NAMESPACE_BEGIN
-
-/**
-* The CollationElementIterator class is used as an iterator to walk through     
-* each character of an international string. Use the iterator to return the
-* ordering priority of the positioned character. The ordering priority of a 
-* character, which we refer to as a key, defines how a character is collated in 
-* the given collation object.
-* For example, consider the following in Spanish:
-* <pre>
-*        "ca" -> the first key is key('c') and second key is key('a').
-*        "cha" -> the first key is key('ch') and second key is key('a').</pre>
-* And in German,
-* <pre> \htmlonly       "&#x00E6;b"-> the first key is key('a'), the second key is key('e'), and
-*        the third key is key('b'). \endhtmlonly </pre>
-* The key of a character, is an integer composed of primary order(short),
-* secondary order(char), and tertiary order(char). Java strictly defines the 
-* size and signedness of its primitive data types. Therefore, the static
-* functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return 
-* int32_t to ensure the correctness of the key value.
-* <p>Example of the iterator usage: (without error checking)
-* <pre>
-* \code
-*   void CollationElementIterator_Example()
-*   {
-*       UnicodeString str = "This is a test";
-*       UErrorCode success = U_ZERO_ERROR;
-*       RuleBasedCollator* rbc =
-*           (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
-*       CollationElementIterator* c =
-*           rbc->createCollationElementIterator( str );
-*       int32_t order = c->next(success);
-*       c->reset();
-*       order = c->previous(success);
-*       delete c;
-*       delete rbc;
-*   }
-* \endcode
-* </pre>
-* <p>
-* CollationElementIterator::next returns the collation order of the next
-* character based on the comparison level of the collator. 
-* CollationElementIterator::previous returns the collation order of the 
-* previous character based on the comparison level of the collator. 
-* The Collation Element Iterator moves only in one direction between calls to
-* CollationElementIterator::reset. That is, CollationElementIterator::next() 
-* and CollationElementIterator::previous can not be inter-used. Whenever 
-* CollationElementIterator::previous is to be called after 
-* CollationElementIterator::next() or vice versa, 
-* CollationElementIterator::reset has to be called first to reset the status, 
-* shifting pointers to either the end or the start of the string. Hence at the 
-* next call of CollationElementIterator::previous or 
-* CollationElementIterator::next(), the first or last collation order will be 
-* returned. 
-* If a change of direction is done without a CollationElementIterator::reset(), 
-* the result is undefined.
-* The result of a forward iterate (CollationElementIterator::next) and 
-* reversed result of the backward iterate (CollationElementIterator::previous) 
-* on the same string are equivalent, if collation orders with the value 
-* UCOL_IGNORABLE are ignored.
-* Character based on the comparison level of the collator.  A collation order 
-* consists of primary order, secondary order and tertiary order.  The data 
-* type of the collation order is <strong>t_int32</strong>. 
-*
-* Note, CollationElementIterator should not be subclassed.
-* @see     Collator
-* @see     RuleBasedCollator
-* @version 1.8 Jan 16 2001
-*/
-class U_I18N_API CollationElementIterator : public UObject {
-public: 
-
-    // CollationElementIterator public data member ------------------------------
-
-    enum {
-        /**
-         * NULLORDER indicates that an error has occured while processing
-         * @stable ICU 2.0
-         */
-        NULLORDER = (int32_t)0xffffffff
-    };
-
-    // CollationElementIterator public constructor/destructor -------------------
-
-    /**
-    * Copy constructor.
-    *
-    * @param other    the object to be copied from
-    * @stable ICU 2.0
-    */
-    CollationElementIterator(const CollationElementIterator& other);
-
-    /** 
-    * Destructor
-    * @stable ICU 2.0
-    */
-    virtual ~CollationElementIterator();
-
-    // CollationElementIterator public methods ----------------------------------
-
-    /**
-    * Returns true if "other" is the same as "this"
-    *
-    * @param other    the object to be compared
-    * @return         true if "other" is the same as "this"
-    * @stable ICU 2.0
-    */
-    UBool operator==(const CollationElementIterator& other) const;
-
-    /**
-    * Returns true if "other" is not the same as "this".
-    *
-    * @param other    the object to be compared
-    * @return         true if "other" is not the same as "this"
-    * @stable ICU 2.0
-    */
-    UBool operator!=(const CollationElementIterator& other) const;
-
-    /**
-    * Resets the cursor to the beginning of the string.
-    * @stable ICU 2.0
-    */
-    void reset(void);
-
-    /**
-    * Gets the ordering priority of the next character in the string.
-    * @param status the error code status.
-    * @return the next character's ordering. otherwise returns NULLORDER if an 
-    *         error has occured or if the end of string has been reached
-    * @stable ICU 2.0
-    */
-    int32_t next(UErrorCode& status);
-
-    /**
-    * Get the ordering priority of the previous collation element in the string.
-    * @param status the error code status.
-    * @return the previous element's ordering. otherwise returns NULLORDER if an 
-    *         error has occured or if the start of string has been reached
-    * @stable ICU 2.0
-    */
-    int32_t previous(UErrorCode& status);
-
-    /**
-    * Gets the primary order of a collation order.
-    * @param order the collation order
-    * @return the primary order of a collation order.
-    * @stable ICU 2.0
-    */
-    static inline int32_t primaryOrder(int32_t order);
-
-    /**
-    * Gets the secondary order of a collation order.
-    * @param order the collation order
-    * @return the secondary order of a collation order.
-    * @stable ICU 2.0
-    */
-    static inline int32_t secondaryOrder(int32_t order);
-
-    /**
-    * Gets the tertiary order of a collation order.
-    * @param order the collation order
-    * @return the tertiary order of a collation order.
-    * @stable ICU 2.0
-    */
-    static inline int32_t tertiaryOrder(int32_t order);
-
-    /**
-    * Return the maximum length of any expansion sequences that end with the 
-    * specified comparison order.
-    * @param order a collation order returned by previous or next.
-    * @return maximum size of the expansion sequences ending with the collation 
-    *         element or 1 if collation element does not occur at the end of any 
-    *         expansion sequence
-    * @stable ICU 2.0
-    */
-    int32_t getMaxExpansion(int32_t order) const;
-
-    /**
-    * Gets the comparison order in the desired strength. Ignore the other
-    * differences.
-    * @param order The order value
-    * @stable ICU 2.0
-    */
-    int32_t strengthOrder(int32_t order) const;
-
-    /**
-    * Sets the source string.
-    * @param str the source string.
-    * @param status the error code status.
-    * @stable ICU 2.0
-    */
-    void setText(const UnicodeString& str, UErrorCode& status);
-
-    /**
-    * Sets the source string.
-    * @param str the source character iterator.
-    * @param status the error code status.
-    * @stable ICU 2.0
-    */
-    void setText(CharacterIterator& str, UErrorCode& status);
-
-    /**
-    * Checks if a comparison order is ignorable.
-    * @param order the collation order.
-    * @return TRUE if a character is ignorable, FALSE otherwise.
-    * @stable ICU 2.0
-    */
-    static inline UBool isIgnorable(int32_t order);
-
-    /**
-    * Gets the offset of the currently processed character in the source string.
-    * @return the offset of the character.
-    * @stable ICU 2.0
-    */
-    int32_t getOffset(void) const;
-
-    /**
-    * Sets the offset of the currently processed character in the source string.
-    * @param newOffset the new offset.
-    * @param status the error code status.
-    * @return the offset of the character.
-    * @stable ICU 2.0
-    */
-    void setOffset(int32_t newOffset, UErrorCode& status);
-
-    /**
-    * ICU "poor man's RTTI", returns a UClassID for the actual class.
-    *
-    * @stable ICU 2.2
-    */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-    * ICU "poor man's RTTI", returns a UClassID for this class.
-    *
-    * @stable ICU 2.2
-    */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-protected:
-  
-    // CollationElementIterator protected constructors --------------------------
-    /**
-    * @stable ICU 2.0
-    */
-    friend class RuleBasedCollator;
-
-    /**
-    * CollationElementIterator constructor. This takes the source string and the 
-    * collation object. The cursor will walk thru the source string based on the 
-    * predefined collation rules. If the source string is empty, NULLORDER will 
-    * be returned on the calls to next().
-    * @param sourceText    the source string.
-    * @param order         the collation object.
-    * @param status        the error code status.
-    * @stable ICU 2.0
-    */
-    CollationElementIterator(const UnicodeString& sourceText,
-        const RuleBasedCollator* order, UErrorCode& status);
-
-    /**
-    * CollationElementIterator constructor. This takes the source string and the 
-    * collation object.  The cursor will walk thru the source string based on the 
-    * predefined collation rules.  If the source string is empty, NULLORDER will 
-    * be returned on the calls to next().
-    * @param sourceText    the source string.
-    * @param order         the collation object.
-    * @param status        the error code status.
-    * @stable ICU 2.0
-    */
-    CollationElementIterator(const CharacterIterator& sourceText,
-        const RuleBasedCollator* order, UErrorCode& status);
-
-    // CollationElementIterator protected methods -------------------------------
-
-    /**
-    * Assignment operator
-    *
-    * @param other    the object to be copied
-    * @stable ICU 2.0
-    */
-    const CollationElementIterator&
-        operator=(const CollationElementIterator& other);
-
-private:
-    CollationElementIterator(); // default constructor not implemented
-
-    // CollationElementIterator private data members ----------------------------
-
-    /**
-    * Data wrapper for collation elements
-    */
-    UCollationElements *m_data_;
-
-    /**
-    * Indicates if m_data_ belongs to this object.
-    */
-    UBool isDataOwned_;
-
-};
-
-// CollationElementIterator inline method defination --------------------------
-
-/**
-* Get the primary order of a collation order.
-* @param order the collation order
-* @return the primary order of a collation order.
-*/
-inline int32_t CollationElementIterator::primaryOrder(int32_t order)
-{
-    order &= RuleBasedCollator::PRIMARYORDERMASK;
-    return (order >> RuleBasedCollator::PRIMARYORDERSHIFT);
-}
-
-/**
-* Get the secondary order of a collation order.
-* @param order the collation order
-* @return the secondary order of a collation order.
-*/
-inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
-{
-    order = order & RuleBasedCollator::SECONDARYORDERMASK;
-    return (order >> RuleBasedCollator::SECONDARYORDERSHIFT);
-}
-
-/**
-* Get the tertiary order of a collation order.
-* @param order the collation order
-* @return the tertiary order of a collation order.
-*/
-inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
-{
-    return (order &= RuleBasedCollator::TERTIARYORDERMASK);
-}
-
-inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const
-{
-    return ucol_getMaxExpansion(m_data_, (uint32_t)order);
-}
-
-inline UBool CollationElementIterator::isIgnorable(int32_t order)
-{
-    return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/coleitr.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/coleitr.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/coleitr.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/coleitr.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,400 @@
+/*
+ ******************************************************************************
+ *   Copyright (C) 1997-2008, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ ******************************************************************************
+ */
+
+/**
+ * \file 
+ * \brief C++ API: Collation Element Iterator.
+ */
+
+/**
+* File coleitr.h
+*
+* 
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+*  Date       Name        Description
+*
+*  8/18/97    helena      Added internal API documentation.
+* 08/03/98    erm         Synched with 1.2 version CollationElementIterator.java
+* 12/10/99    aliu        Ported Thai collation support from Java.
+* 01/25/01    swquek      Modified into a C++ wrapper calling C APIs (ucoliter.h)
+* 02/19/01    swquek      Removed CollationElementsIterator() since it is 
+*                         private constructor and no calls are made to it
+*/
+
+#ifndef COLEITR_H
+#define COLEITR_H
+
+#include "unicode/utypes.h"
+
+ 
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/uobject.h"
+#include "unicode/tblcoll.h"
+#include "unicode/ucoleitr.h"
+
+/** 
+ * The UCollationElements struct.
+ * For usage in C programs.
+ * @stable ICU 2.0
+ */
+typedef struct UCollationElements UCollationElements;
+
+U_NAMESPACE_BEGIN
+
+/**
+* The CollationElementIterator class is used as an iterator to walk through     
+* each character of an international string. Use the iterator to return the
+* ordering priority of the positioned character. The ordering priority of a 
+* character, which we refer to as a key, defines how a character is collated in 
+* the given collation object.
+* For example, consider the following in Spanish:
+* <pre>
+*        "ca" -> the first key is key('c') and second key is key('a').
+*        "cha" -> the first key is key('ch') and second key is key('a').</pre>
+* And in German,
+* <pre> \htmlonly       "&#x00E6;b"-> the first key is key('a'), the second key is key('e'), and
+*        the third key is key('b'). \endhtmlonly </pre>
+* The key of a character, is an integer composed of primary order(short),
+* secondary order(char), and tertiary order(char). Java strictly defines the 
+* size and signedness of its primitive data types. Therefore, the static
+* functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return 
+* int32_t to ensure the correctness of the key value.
+* <p>Example of the iterator usage: (without error checking)
+* <pre>
+* \code
+*   void CollationElementIterator_Example()
+*   {
+*       UnicodeString str = "This is a test";
+*       UErrorCode success = U_ZERO_ERROR;
+*       RuleBasedCollator* rbc =
+*           (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
+*       CollationElementIterator* c =
+*           rbc->createCollationElementIterator( str );
+*       int32_t order = c->next(success);
+*       c->reset();
+*       order = c->previous(success);
+*       delete c;
+*       delete rbc;
+*   }
+* \endcode
+* </pre>
+* <p>
+* CollationElementIterator::next returns the collation order of the next
+* character based on the comparison level of the collator. 
+* CollationElementIterator::previous returns the collation order of the 
+* previous character based on the comparison level of the collator. 
+* The Collation Element Iterator moves only in one direction between calls to
+* CollationElementIterator::reset. That is, CollationElementIterator::next() 
+* and CollationElementIterator::previous can not be inter-used. Whenever 
+* CollationElementIterator::previous is to be called after 
+* CollationElementIterator::next() or vice versa, 
+* CollationElementIterator::reset has to be called first to reset the status, 
+* shifting pointers to either the end or the start of the string. Hence at the 
+* next call of CollationElementIterator::previous or 
+* CollationElementIterator::next(), the first or last collation order will be 
+* returned. 
+* If a change of direction is done without a CollationElementIterator::reset(), 
+* the result is undefined.
+* The result of a forward iterate (CollationElementIterator::next) and 
+* reversed result of the backward iterate (CollationElementIterator::previous) 
+* on the same string are equivalent, if collation orders with the value 
+* UCOL_IGNORABLE are ignored.
+* Character based on the comparison level of the collator.  A collation order 
+* consists of primary order, secondary order and tertiary order.  The data 
+* type of the collation order is <strong>t_int32</strong>. 
+*
+* Note, CollationElementIterator should not be subclassed.
+* @see     Collator
+* @see     RuleBasedCollator
+* @version 1.8 Jan 16 2001
+*/
+class U_I18N_API CollationElementIterator : public UObject {
+public: 
+
+    // CollationElementIterator public data member ------------------------------
+
+    enum {
+        /**
+         * NULLORDER indicates that an error has occured while processing
+         * @stable ICU 2.0
+         */
+        NULLORDER = (int32_t)0xffffffff
+    };
+
+    // CollationElementIterator public constructor/destructor -------------------
+
+    /**
+    * Copy constructor.
+    *
+    * @param other    the object to be copied from
+    * @stable ICU 2.0
+    */
+    CollationElementIterator(const CollationElementIterator& other);
+
+    /** 
+    * Destructor
+    * @stable ICU 2.0
+    */
+    virtual ~CollationElementIterator();
+
+    // CollationElementIterator public methods ----------------------------------
+
+    /**
+    * Returns true if "other" is the same as "this"
+    *
+    * @param other    the object to be compared
+    * @return         true if "other" is the same as "this"
+    * @stable ICU 2.0
+    */
+    UBool operator==(const CollationElementIterator& other) const;
+
+    /**
+    * Returns true if "other" is not the same as "this".
+    *
+    * @param other    the object to be compared
+    * @return         true if "other" is not the same as "this"
+    * @stable ICU 2.0
+    */
+    UBool operator!=(const CollationElementIterator& other) const;
+
+    /**
+    * Resets the cursor to the beginning of the string.
+    * @stable ICU 2.0
+    */
+    void reset(void);
+
+    /**
+    * Gets the ordering priority of the next character in the string.
+    * @param status the error code status.
+    * @return the next character's ordering. otherwise returns NULLORDER if an 
+    *         error has occured or if the end of string has been reached
+    * @stable ICU 2.0
+    */
+    int32_t next(UErrorCode& status);
+
+    /**
+    * Get the ordering priority of the previous collation element in the string.
+    * @param status the error code status.
+    * @return the previous element's ordering. otherwise returns NULLORDER if an 
+    *         error has occured or if the start of string has been reached
+    * @stable ICU 2.0
+    */
+    int32_t previous(UErrorCode& status);
+
+    /**
+    * Gets the primary order of a collation order.
+    * @param order the collation order
+    * @return the primary order of a collation order.
+    * @stable ICU 2.0
+    */
+    static inline int32_t primaryOrder(int32_t order);
+
+    /**
+    * Gets the secondary order of a collation order.
+    * @param order the collation order
+    * @return the secondary order of a collation order.
+    * @stable ICU 2.0
+    */
+    static inline int32_t secondaryOrder(int32_t order);
+
+    /**
+    * Gets the tertiary order of a collation order.
+    * @param order the collation order
+    * @return the tertiary order of a collation order.
+    * @stable ICU 2.0
+    */
+    static inline int32_t tertiaryOrder(int32_t order);
+
+    /**
+    * Return the maximum length of any expansion sequences that end with the 
+    * specified comparison order.
+    * @param order a collation order returned by previous or next.
+    * @return maximum size of the expansion sequences ending with the collation 
+    *         element or 1 if collation element does not occur at the end of any 
+    *         expansion sequence
+    * @stable ICU 2.0
+    */
+    int32_t getMaxExpansion(int32_t order) const;
+
+    /**
+    * Gets the comparison order in the desired strength. Ignore the other
+    * differences.
+    * @param order The order value
+    * @stable ICU 2.0
+    */
+    int32_t strengthOrder(int32_t order) const;
+
+    /**
+    * Sets the source string.
+    * @param str the source string.
+    * @param status the error code status.
+    * @stable ICU 2.0
+    */
+    void setText(const UnicodeString& str, UErrorCode& status);
+
+    /**
+    * Sets the source string.
+    * @param str the source character iterator.
+    * @param status the error code status.
+    * @stable ICU 2.0
+    */
+    void setText(CharacterIterator& str, UErrorCode& status);
+
+    /**
+    * Checks if a comparison order is ignorable.
+    * @param order the collation order.
+    * @return TRUE if a character is ignorable, FALSE otherwise.
+    * @stable ICU 2.0
+    */
+    static inline UBool isIgnorable(int32_t order);
+
+    /**
+    * Gets the offset of the currently processed character in the source string.
+    * @return the offset of the character.
+    * @stable ICU 2.0
+    */
+    int32_t getOffset(void) const;
+
+    /**
+    * Sets the offset of the currently processed character in the source string.
+    * @param newOffset the new offset.
+    * @param status the error code status.
+    * @return the offset of the character.
+    * @stable ICU 2.0
+    */
+    void setOffset(int32_t newOffset, UErrorCode& status);
+
+    /**
+    * ICU "poor man's RTTI", returns a UClassID for the actual class.
+    *
+    * @stable ICU 2.2
+    */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+    * ICU "poor man's RTTI", returns a UClassID for this class.
+    *
+    * @stable ICU 2.2
+    */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+protected:
+  
+    // CollationElementIterator protected constructors --------------------------
+    /**
+    * @stable ICU 2.0
+    */
+    friend class RuleBasedCollator;
+
+    /**
+    * CollationElementIterator constructor. This takes the source string and the 
+    * collation object. The cursor will walk thru the source string based on the 
+    * predefined collation rules. If the source string is empty, NULLORDER will 
+    * be returned on the calls to next().
+    * @param sourceText    the source string.
+    * @param order         the collation object.
+    * @param status        the error code status.
+    * @stable ICU 2.0
+    */
+    CollationElementIterator(const UnicodeString& sourceText,
+        const RuleBasedCollator* order, UErrorCode& status);
+
+    /**
+    * CollationElementIterator constructor. This takes the source string and the 
+    * collation object.  The cursor will walk thru the source string based on the 
+    * predefined collation rules.  If the source string is empty, NULLORDER will 
+    * be returned on the calls to next().
+    * @param sourceText    the source string.
+    * @param order         the collation object.
+    * @param status        the error code status.
+    * @stable ICU 2.0
+    */
+    CollationElementIterator(const CharacterIterator& sourceText,
+        const RuleBasedCollator* order, UErrorCode& status);
+
+    // CollationElementIterator protected methods -------------------------------
+
+    /**
+    * Assignment operator
+    *
+    * @param other    the object to be copied
+    * @stable ICU 2.0
+    */
+    const CollationElementIterator&
+        operator=(const CollationElementIterator& other);
+
+private:
+    CollationElementIterator(); // default constructor not implemented
+
+    // CollationElementIterator private data members ----------------------------
+
+    /**
+    * Data wrapper for collation elements
+    */
+    UCollationElements *m_data_;
+
+    /**
+    * Indicates if m_data_ belongs to this object.
+    */
+    UBool isDataOwned_;
+
+};
+
+// CollationElementIterator inline method defination --------------------------
+
+/**
+* Get the primary order of a collation order.
+* @param order the collation order
+* @return the primary order of a collation order.
+*/
+inline int32_t CollationElementIterator::primaryOrder(int32_t order)
+{
+    order &= RuleBasedCollator::PRIMARYORDERMASK;
+    return (order >> RuleBasedCollator::PRIMARYORDERSHIFT);
+}
+
+/**
+* Get the secondary order of a collation order.
+* @param order the collation order
+* @return the secondary order of a collation order.
+*/
+inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
+{
+    order = order & RuleBasedCollator::SECONDARYORDERMASK;
+    return (order >> RuleBasedCollator::SECONDARYORDERSHIFT);
+}
+
+/**
+* Get the tertiary order of a collation order.
+* @param order the collation order
+* @return the tertiary order of a collation order.
+*/
+inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
+{
+    return (order &= RuleBasedCollator::TERTIARYORDERMASK);
+}
+
+inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const
+{
+    return ucol_getMaxExpansion(m_data_, (uint32_t)order);
+}
+
+inline UBool CollationElementIterator::isIgnorable(int32_t order)
+{
+    return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/coll.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/coll.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/coll.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1035 +0,0 @@
-/*
-******************************************************************************
-*   Copyright (C) 1996-2008, International Business Machines                 *
-*   Corporation and others.  All Rights Reserved.                            *
-******************************************************************************
-*/
-
-/**
- * \file 
- * \brief C++ API: Collation Service.
- */
- 
-/**
-* File coll.h
-*
-* Created by: Helena Shih
-*
-* Modification History:
-*
-*  Date        Name        Description
-* 02/5/97      aliu        Modified createDefault to load collation data from
-*                          binary files when possible.  Added related methods
-*                          createCollationFromFile, chopLocale, createPathName.
-* 02/11/97     aliu        Added members addToCache, findInCache, and fgCache.
-* 02/12/97     aliu        Modified to create objects from RuleBasedCollator cache.
-*                          Moved cache out of Collation class.
-* 02/13/97     aliu        Moved several methods out of this class and into
-*                          RuleBasedCollator, with modifications.  Modified
-*                          createDefault() to call new RuleBasedCollator(Locale&)
-*                          constructor.  General clean up and documentation.
-* 02/20/97     helena      Added clone, operator==, operator!=, operator=, copy
-*                          constructor and getDynamicClassID.
-* 03/25/97     helena      Updated with platform independent data types.
-* 05/06/97     helena      Added memory allocation error detection.
-* 06/20/97     helena      Java class name change.
-* 09/03/97     helena      Added createCollationKeyValues().
-* 02/10/98     damiba      Added compare() with length as parameter.
-* 04/23/99     stephen     Removed EDecompositionMode, merged with
-*                          Normalizer::EMode.
-* 11/02/99     helena      Collator performance enhancements.  Eliminates the
-*                          UnicodeString construction and special case for NO_OP.
-* 11/23/99     srl         More performance enhancements. Inlining of
-*                          critical accessors.
-* 05/15/00     helena      Added version information API.
-* 01/29/01     synwee      Modified into a C++ wrapper which calls C apis
-*                          (ucoll.h).
-*/
-
-#ifndef COLL_H
-#define COLL_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION
-
-#include "unicode/uobject.h"
-#include "unicode/ucol.h"
-#include "unicode/normlzr.h"
-#include "unicode/locid.h"
-#include "unicode/uniset.h"
-#include "unicode/umisc.h"
-
-U_NAMESPACE_BEGIN
-
-class StringEnumeration;
-
-#if !UCONFIG_NO_SERVICE
-/**
- * @stable ICU 2.6
- */
-class CollatorFactory;
-#endif
-
-/**
-* @stable ICU 2.0
-*/
-class CollationKey;
-
-/**
-* The <code>Collator</code> class performs locale-sensitive string
-* comparison.<br>
-* You use this class to build searching and sorting routines for natural
-* language text.<br>
-* <em>Important: </em>The ICU collation service has been reimplemented
-* in order to achieve better performance and UCA compliance.
-* For details, see the
-* <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
-* collation design document</a>.
-* <p>
-* <code>Collator</code> is an abstract base class. Subclasses implement
-* specific collation strategies. One subclass,
-* <code>RuleBasedCollator</code>, is currently provided and is applicable
-* to a wide set of languages. Other subclasses may be created to handle more
-* specialized needs.
-* <p>
-* Like other locale-sensitive classes, you can use the static factory method,
-* <code>createInstance</code>, to obtain the appropriate
-* <code>Collator</code> object for a given locale. You will only need to
-* look at the subclasses of <code>Collator</code> if you need to
-* understand the details of a particular collation strategy or if you need to
-* modify that strategy.
-* <p>
-* The following example shows how to compare two strings using the
-* <code>Collator</code> for the default locale.
-* \htmlonly<blockquote>\endhtmlonly
-* <pre>
-* \code
-* // Compare two strings in the default locale
-* UErrorCode success = U_ZERO_ERROR;
-* Collator* myCollator = Collator::createInstance(success);
-* if (myCollator->compare("abc", "ABC") < 0)
-*   cout << "abc is less than ABC" << endl;
-* else
-*   cout << "abc is greater than or equal to ABC" << endl;
-* \endcode
-* </pre>
-* \htmlonly</blockquote>\endhtmlonly
-* <p>
-* You can set a <code>Collator</code>'s <em>strength</em> property to
-* determine the level of difference considered significant in comparisons.
-* Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>,
-* <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>.
-* The exact assignment of strengths to language features is locale dependant.
-* For example, in Czech, "e" and "f" are considered primary differences,
-* while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary
-* differences and "e" and "e" are identical. The following shows how both case
-* and accents could be ignored for US English.
-* \htmlonly<blockquote>\endhtmlonly
-* <pre>
-* \code
-* //Get the Collator for US English and set its strength to PRIMARY
-* UErrorCode success = U_ZERO_ERROR;
-* Collator* usCollator = Collator::createInstance(Locale::US, success);
-* usCollator->setStrength(Collator::PRIMARY);
-* if (usCollator->compare("abc", "ABC") == 0)
-*     cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl;
-* \endcode
-* </pre>
-* \htmlonly</blockquote>\endhtmlonly
-* <p>
-* For comparing strings exactly once, the <code>compare</code> method
-* provides the best performance. When sorting a list of strings however, it
-* is generally necessary to compare each string multiple times. In this case,
-* sort keys provide better performance. The <code>getSortKey</code> methods
-* convert a string to a series of bytes that can be compared bitwise against
-* other sort keys using <code>strcmp()</code>. Sort keys are written as
-* zero-terminated byte strings. They consist of several substrings, one for
-* each collation strength level, that are delimited by 0x01 bytes.
-* If the string code points are appended for UCOL_IDENTICAL, then they are
-* processed for correct code point order comparison and may contain 0x01
-* bytes but not zero bytes.
-* </p>
-* <p>
-* An older set of APIs returns a <code>CollationKey</code> object that wraps
-* the sort key bytes instead of returning the bytes themselves.
-* Its use is deprecated, but it is still available for compatibility with
-* Java.
-* </p>
-* <p>
-* <strong>Note:</strong> <code>Collator</code>s with different Locale,
-* and CollationStrength settings will return different sort
-* orders for the same set of strings. Locales have specific collation rules,
-* and the way in which secondary and tertiary differences are taken into
-* account, for example, will result in a different sorting order for same
-* strings.
-* </p>
-* @see         RuleBasedCollator
-* @see         CollationKey
-* @see         CollationElementIterator
-* @see         Locale
-* @see         Normalizer
-* @version     2.0 11/15/01
-*/
-
-class U_I18N_API Collator : public UObject {
-public:
-
-    // Collator public enums -----------------------------------------------
-
-    /**
-     * Base letter represents a primary difference. Set comparison level to
-     * PRIMARY to ignore secondary and tertiary differences.<br>
-     * Use this to set the strength of a Collator object.<br>
-     * Example of primary difference, "abc" &lt; "abd"
-     *
-     * Diacritical differences on the same base letter represent a secondary
-     * difference. Set comparison level to SECONDARY to ignore tertiary
-     * differences. Use this to set the strength of a Collator object.<br>
-     * Example of secondary difference, "&auml;" >> "a".
-     *
-     * Uppercase and lowercase versions of the same character represents a
-     * tertiary difference.  Set comparison level to TERTIARY to include all
-     * comparison differences. Use this to set the strength of a Collator
-     * object.<br>
-     * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
-     *
-     * Two characters are considered "identical" when they have the same unicode
-     * spellings.<br>
-     * For example, "&auml;" == "&auml;".
-     *
-     * UCollationStrength is also used to determine the strength of sort keys
-     * generated from Collator objects.
-     * @stable ICU 2.0
-     */
-    enum ECollationStrength
-    {
-        PRIMARY    = 0,
-        SECONDARY  = 1,
-        TERTIARY   = 2,
-        QUATERNARY = 3,
-        IDENTICAL  = 15
-    };
-
-    /**
-     * LESS is returned if source string is compared to be less than target
-     * string in the compare() method.
-     * EQUAL is returned if source string is compared to be equal to target
-     * string in the compare() method.
-     * GREATER is returned if source string is compared to be greater than
-     * target string in the compare() method.
-     * @see Collator#compare
-     * @deprecated ICU 2.6. Use C enum UCollationResult defined in ucol.h
-     */
-    enum EComparisonResult
-    {
-        LESS = -1,
-        EQUAL = 0,
-        GREATER = 1
-    };
-
-    // Collator public destructor -----------------------------------------
-
-    /**
-     * Destructor
-     * @stable ICU 2.0
-     */
-    virtual ~Collator();
-
-    // Collator public methods --------------------------------------------
-
-    /**
-     * Returns true if "other" is the same as "this"
-     * @param other Collator object to be compared
-     * @return true if other is the same as this.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const Collator& other) const;
-
-    /**
-     * Returns true if "other" is not the same as "this".
-     * @param other Collator object to be compared
-     * @return true if other is not the same as this.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator!=(const Collator& other) const;
-
-    /**
-     * Makes a shallow copy of the current object.
-     * @return a copy of this object
-     * @stable ICU 2.0
-     */
-    virtual Collator* clone(void) const = 0;
-
-    /**
-     * Creates the Collator object for the current default locale.
-     * The default locale is determined by Locale::getDefault.
-     * The UErrorCode& err parameter is used to return status information to the user.
-     * To check whether the construction succeeded or not, you should check the
-     * value of U_SUCCESS(err).  If you wish more detailed information, you can
-     * check for informational error results which still indicate success.
-     * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
-     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
-     * used. U_USING_DEFAULT_ERROR indicates that the default locale data was
-     * used; neither the requested locale nor any of its fall back locales
-     * could be found.
-     * The caller owns the returned object and is responsible for deleting it.
-     *
-     * @param err    the error code status.
-     * @return       the collation object of the default locale.(for example, en_US)
-     * @see Locale#getDefault
-     * @stable ICU 2.0
-     */
-    static Collator* U_EXPORT2 createInstance(UErrorCode&  err);
-
-    /**
-     * Gets the table-based collation object for the desired locale. The
-     * resource of the desired locale will be loaded by ResourceLoader.
-     * Locale::ENGLISH is the base collation table and all other languages are
-     * built on top of it with additional language-specific modifications.
-     * The UErrorCode& err parameter is used to return status information to the user.
-     * To check whether the construction succeeded or not, you should check
-     * the value of U_SUCCESS(err).  If you wish more detailed information, you
-     * can check for informational error results which still indicate success.
-     * U_USING_FALLBACK_ERROR indicates that a fall back locale was used.  For
-     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
-     * used.  U_USING_DEFAULT_ERROR indicates that the default locale data was
-     * used; neither the requested locale nor any of its fall back locales
-     * could be found.
-     * The caller owns the returned object and is responsible for deleting it.
-     * @param loc    The locale ID for which to open a collator.
-     * @param err    the error code status.
-     * @return       the created table-based collation object based on the desired
-     *               locale.
-     * @see Locale
-     * @see ResourceLoader
-     * @stable ICU 2.2
-     */
-    static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err);
-
-#ifdef U_USE_COLLATION_OBSOLETE_2_6
-    /**
-     * Create a Collator with a specific version.
-     * This is the same as createInstance(loc, err) except that getVersion() of
-     * the returned object is guaranteed to be the same as the version
-     * parameter.
-     * This is designed to be used to open the same collator for a given
-     * locale even when ICU is updated.
-     * The same locale and version guarantees the same sort keys and
-     * comparison results.
-     * <p>
-     * Note: this API will be removed in a future release.  Use
-     * <tt>createInstance(const Locale&, UErrorCode&) instead.</tt></p>
-     *
-     * @param loc The locale ID for which to open a collator.
-     * @param version The requested collator version.
-     * @param err A reference to a UErrorCode,
-     *            must not indicate a failure before calling this function.
-     * @return A pointer to a Collator, or 0 if an error occurred
-     *         or a collator with the requested version is not available.
-     *
-     * @see getVersion
-     * @obsolete ICU 2.6
-     */
-    static Collator *createInstance(const Locale &loc, UVersionInfo version, UErrorCode &err);
-#endif
-
-    /**
-     * The comparison function compares the character data stored in two
-     * different strings. Returns information about whether a string is less
-     * than, greater than or equal to another string.
-     * @param source the source string to be compared with.
-     * @param target the string that is to be compared with the source string.
-     * @return Returns a byte value. GREATER if source is greater
-     * than target; EQUAL if source is equal to target; LESS if source is less
-     * than target
-     * @deprecated ICU 2.6 use the overload with UErrorCode &
-     */
-    virtual EComparisonResult compare(const UnicodeString& source,
-                                      const UnicodeString& target) const;
-
-    /**
-     * The comparison function compares the character data stored in two
-     * different strings. Returns information about whether a string is less
-     * than, greater than or equal to another string.
-     * @param source the source string to be compared with.
-     * @param target the string that is to be compared with the source string.
-     * @param status possible error code
-     * @return Returns an enum value. UCOL_GREATER if source is greater
-     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
-     * than target
-     * @stable ICU 2.6
-     */
-    virtual UCollationResult compare(const UnicodeString& source,
-                                      const UnicodeString& target,
-                                      UErrorCode &status) const = 0;
-
-    /**
-     * Does the same thing as compare but limits the comparison to a specified
-     * length
-     * @param source the source string to be compared with.
-     * @param target the string that is to be compared with the source string.
-     * @param length the length the comparison is limited to
-     * @return Returns a byte value. GREATER if source (up to the specified
-     *         length) is greater than target; EQUAL if source (up to specified
-     *         length) is equal to target; LESS if source (up to the specified
-     *         length) is less  than target.
-     * @deprecated ICU 2.6 use the overload with UErrorCode &
-     */
-    virtual EComparisonResult compare(const UnicodeString& source,
-                                      const UnicodeString& target,
-                                      int32_t length) const;
-
-    /**
-     * Does the same thing as compare but limits the comparison to a specified
-     * length
-     * @param source the source string to be compared with.
-     * @param target the string that is to be compared with the source string.
-     * @param length the length the comparison is limited to
-     * @param status possible error code
-     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
-     *         length) is greater than target; UCOL_EQUAL if source (up to specified
-     *         length) is equal to target; UCOL_LESS if source (up to the specified
-     *         length) is less  than target.
-     * @stable ICU 2.6
-     */
-    virtual UCollationResult compare(const UnicodeString& source,
-                                      const UnicodeString& target,
-                                      int32_t length,
-                                      UErrorCode &status) const = 0;
-
-    /**
-     * The comparison function compares the character data stored in two
-     * different string arrays. Returns information about whether a string array
-     * is less than, greater than or equal to another string array.
-     * @param source the source string array to be compared with.
-     * @param sourceLength the length of the source string array.  If this value
-     *        is equal to -1, the string array is null-terminated.
-     * @param target the string that is to be compared with the source string.
-     * @param targetLength the length of the target string array.  If this value
-     *        is equal to -1, the string array is null-terminated.
-     * @return Returns a byte value. GREATER if source is greater than target;
-     *         EQUAL if source is equal to target; LESS if source is less than
-     *         target
-     * @deprecated ICU 2.6 use the overload with UErrorCode &
-     */
-    virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
-                                      const UChar* target, int32_t targetLength)
-                                      const;
-
-    /**
-     * The comparison function compares the character data stored in two
-     * different string arrays. Returns information about whether a string array
-     * is less than, greater than or equal to another string array.
-     * @param source the source string array to be compared with.
-     * @param sourceLength the length of the source string array.  If this value
-     *        is equal to -1, the string array is null-terminated.
-     * @param target the string that is to be compared with the source string.
-     * @param targetLength the length of the target string array.  If this value
-     *        is equal to -1, the string array is null-terminated.
-     * @param status possible error code
-     * @return Returns an enum value. UCOL_GREATER if source is greater
-     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
-     * than target
-     * @stable ICU 2.6
-     */
-    virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
-                                      const UChar* target, int32_t targetLength,
-                                      UErrorCode &status) const = 0;
-
-    /**
-     * Transforms the string into a series of characters that can be compared
-     * with CollationKey::compareTo. It is not possible to restore the original
-     * string from the chars in the sort key.  The generated sort key handles
-     * only a limited number of ignorable characters.
-     * <p>Use CollationKey::equals or CollationKey::compare to compare the
-     * generated sort keys.
-     * If the source string is null, a null collation key will be returned.
-     * @param source the source string to be transformed into a sort key.
-     * @param key the collation key to be filled in
-     * @param status the error code status.
-     * @return the collation key of the string based on the collation rules.
-     * @see CollationKey#compare
-     * @deprecated ICU 2.8 Use getSortKey(...) instead
-     */
-    virtual CollationKey& getCollationKey(const UnicodeString&  source,
-                                          CollationKey& key,
-                                          UErrorCode& status) const = 0;
-
-    /**
-     * Transforms the string into a series of characters that can be compared
-     * with CollationKey::compareTo. It is not possible to restore the original
-     * string from the chars in the sort key.  The generated sort key handles
-     * only a limited number of ignorable characters.
-     * <p>Use CollationKey::equals or CollationKey::compare to compare the
-     * generated sort keys.
-     * <p>If the source string is null, a null collation key will be returned.
-     * @param source the source string to be transformed into a sort key.
-     * @param sourceLength length of the collation key
-     * @param key the collation key to be filled in
-     * @param status the error code status.
-     * @return the collation key of the string based on the collation rules.
-     * @see CollationKey#compare
-     * @deprecated ICU 2.8 Use getSortKey(...) instead
-     */
-    virtual CollationKey& getCollationKey(const UChar*source,
-                                          int32_t sourceLength,
-                                          CollationKey& key,
-                                          UErrorCode& status) const = 0;
-    /**
-     * Generates the hash code for the collation object
-     * @stable ICU 2.0
-     */
-    virtual int32_t hashCode(void) const = 0;
-
-    /**
-     * Gets the locale of the Collator
-     *
-     * @param type can be either requested, valid or actual locale. For more
-     *             information see the definition of ULocDataLocaleType in
-     *             uloc.h
-     * @param status the error code status.
-     * @return locale where the collation data lives. If the collator
-     *         was instantiated from rules, locale is empty.
-     * @deprecated ICU 2.8 This API is under consideration for revision
-     * in ICU 3.0.
-     */
-    virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0;
-
-    /**
-     * Convenience method for comparing two strings based on the collation rules.
-     * @param source the source string to be compared with.
-     * @param target the target string to be compared with.
-     * @return true if the first string is greater than the second one,
-     *         according to the collation rules. false, otherwise.
-     * @see Collator#compare
-     * @stable ICU 2.0
-     */
-    UBool greater(const UnicodeString& source, const UnicodeString& target)
-                  const;
-
-    /**
-     * Convenience method for comparing two strings based on the collation rules.
-     * @param source the source string to be compared with.
-     * @param target the target string to be compared with.
-     * @return true if the first string is greater than or equal to the second
-     *         one, according to the collation rules. false, otherwise.
-     * @see Collator#compare
-     * @stable ICU 2.0
-     */
-    UBool greaterOrEqual(const UnicodeString& source,
-                         const UnicodeString& target) const;
-
-    /**
-     * Convenience method for comparing two strings based on the collation rules.
-     * @param source the source string to be compared with.
-     * @param target the target string to be compared with.
-     * @return true if the strings are equal according to the collation rules.
-     *         false, otherwise.
-     * @see Collator#compare
-     * @stable ICU 2.0
-     */
-    UBool equals(const UnicodeString& source, const UnicodeString& target) const;
-
-    /**
-     * Determines the minimum strength that will be use in comparison or
-     * transformation.
-     * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
-     * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
-     * are ignored.
-     * @return the current comparison level.
-     * @see Collator#setStrength
-     * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
-     */
-    virtual ECollationStrength getStrength(void) const = 0;
-
-    /**
-     * Sets the minimum strength to be used in comparison or transformation.
-     * <p>Example of use:
-     * <pre>
-     *  \code
-     *  UErrorCode status = U_ZERO_ERROR;
-     *  Collator*myCollation = Collator::createInstance(Locale::US, status);
-     *  if (U_FAILURE(status)) return;
-     *  myCollation->setStrength(Collator::PRIMARY);
-     *  // result will be "abc" == "ABC"
-     *  // tertiary differences will be ignored
-     *  Collator::ComparisonResult result = myCollation->compare("abc", "ABC");
-     * \endcode
-     * </pre>
-     * @see Collator#getStrength
-     * @param newStrength the new comparison level.
-     * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
-     */
-    virtual void setStrength(ECollationStrength newStrength) = 0;
-
-    /**
-     * Get name of the object for the desired Locale, in the desired langauge
-     * @param objectLocale must be from getAvailableLocales
-     * @param displayLocale specifies the desired locale for output
-     * @param name the fill-in parameter of the return value
-     * @return display-able name of the object for the object locale in the
-     *         desired language
-     * @stable ICU 2.0
-     */
-    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
-                                         const Locale& displayLocale,
-                                         UnicodeString& name);
-
-    /**
-    * Get name of the object for the desired Locale, in the langauge of the
-    * default locale.
-    * @param objectLocale must be from getAvailableLocales
-    * @param name the fill-in parameter of the return value
-    * @return name of the object for the desired locale in the default language
-    * @stable ICU 2.0
-    */
-    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
-                                         UnicodeString& name);
-
-    /**
-     * Get the set of Locales for which Collations are installed.
-     *
-     * <p>Note this does not include locales supported by registered collators.
-     * If collators might have been registered, use the overload of getAvailableLocales
-     * that returns a StringEnumeration.</p>
-     *
-     * @param count the output parameter of number of elements in the locale list
-     * @return the list of available locales for which collations are installed
-     * @stable ICU 2.0
-     */
-    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
-
-    /**
-     * Return a StringEnumeration over the locales available at the time of the call,
-     * including registered locales.  If a severe error occurs (such as out of memory
-     * condition) this will return null. If there is no locale data, an empty enumeration
-     * will be returned.
-     * @return a StringEnumeration over the locales available at the time of the call
-     * @stable ICU 2.6
-     */
-    static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
-
-    /**
-     * Create a string enumerator of all possible keywords that are relevant to
-     * collation. At this point, the only recognized keyword for this
-     * service is "collation".
-     * @param status input-output error code
-     * @return a string enumeration over locale strings. The caller is
-     * responsible for closing the result.
-     * @stable ICU 3.0
-     */
-    static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status);
-
-    /**
-     * Given a keyword, create a string enumeration of all values
-     * for that keyword that are currently in use.
-     * @param keyword a particular keyword as enumerated by
-     * ucol_getKeywords. If any other keyword is passed in, status is set
-     * to U_ILLEGAL_ARGUMENT_ERROR.
-     * @param status input-output error code
-     * @return a string enumeration over collation keyword values, or NULL
-     * upon error. The caller is responsible for deleting the result.
-     * @stable ICU 3.0
-     */
-    static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status);
-
-    /**
-     * Return the functionally equivalent locale for the given
-     * requested locale, with respect to given keyword, for the
-     * collation service.  If two locales return the same result, then
-     * collators instantiated for these locales will behave
-     * equivalently.  The converse is not always true; two collators
-     * may in fact be equivalent, but return different results, due to
-     * internal details.  The return result has no other meaning than
-     * that stated above, and implies nothing as to the relationship
-     * between the two locales.  This is intended for use by
-     * applications who wish to cache collators, or otherwise reuse
-     * collators when possible.  The functional equivalent may change
-     * over time.  For more information, please see the <a
-     * href="http://icu-project.org/userguide/locale.html#services">
-     * Locales and Services</a> section of the ICU User Guide.
-     * @param keyword a particular keyword as enumerated by
-     * ucol_getKeywords.
-     * @param locale the requested locale
-     * @param isAvailable reference to a fillin parameter that
-     * indicates whether the requested locale was 'available' to the
-     * collation service. A locale is defined as 'available' if it
-     * physically exists within the collation locale data.
-     * @param status reference to input-output error code
-     * @return the functionally equivalent collation locale, or the root
-     * locale upon error.
-     * @stable ICU 3.0
-     */
-    static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale,
-                                          UBool& isAvailable, UErrorCode& status);
-
-#if !UCONFIG_NO_SERVICE
-    /**
-     * Register a new Collator.  The collator will be adopted.
-     * @param toAdopt the Collator instance to be adopted
-     * @param locale the locale with which the collator will be associated
-     * @param status the in/out status code, no special meanings are assigned
-     * @return a registry key that can be used to unregister this collator
-     * @stable ICU 2.6
-     */
-    static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status);
-
-    /**
-     * Register a new CollatorFactory.  The factory will be adopted.
-     * @param toAdopt the CollatorFactory instance to be adopted
-     * @param status the in/out status code, no special meanings are assigned
-     * @return a registry key that can be used to unregister this collator
-     * @stable ICU 2.6
-     */
-    static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status);
-
-    /**
-     * Unregister a previously-registered Collator or CollatorFactory
-     * using the key returned from the register call.  Key becomes
-     * invalid after a successful call and should not be used again.
-     * The object corresponding to the key will be deleted.
-     * @param key the registry key returned by a previous call to registerInstance
-     * @param status the in/out status code, no special meanings are assigned
-     * @return TRUE if the collator for the key was successfully unregistered
-     * @stable ICU 2.6
-     */
-    static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
-#endif /* UCONFIG_NO_SERVICE */
-
-    /**
-     * Gets the version information for a Collator.
-     * @param info the version # information, the result will be filled in
-     * @stable ICU 2.0
-     */
-    virtual void getVersion(UVersionInfo info) const = 0;
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual method.
-     * This method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone()
-     * methods call this method.
-     * @return The class ID for this object. All objects of a given class have
-     *         the same class ID.  Objects of other classes have different class
-     *         IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const = 0;
-
-    /**
-     * Universal attribute setter
-     * @param attr attribute type
-     * @param value attribute value
-     * @param status to indicate whether the operation went on smoothly or
-     *        there were errors
-     * @stable ICU 2.2
-     */
-    virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
-                              UErrorCode &status) = 0;
-
-    /**
-     * Universal attribute getter
-     * @param attr attribute type
-     * @param status to indicate whether the operation went on smoothly or
-     *        there were errors
-     * @return attribute value
-     * @stable ICU 2.2
-     */
-    virtual UColAttributeValue getAttribute(UColAttribute attr,
-                                            UErrorCode &status) = 0;
-
-    /**
-     * Sets the variable top to a collation element value of a string supplied.
-     * @param varTop one or more (if contraction) UChars to which the variable top should be set
-     * @param len length of variable top string. If -1 it is considered to be zero terminated.
-     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
-     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
-     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
-     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
-     * @stable ICU 2.0
-     */
-    virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0;
-
-    /**
-     * Sets the variable top to a collation element value of a string supplied.
-     * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
-     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
-     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
-     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
-     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
-     * @stable ICU 2.0
-     */
-    virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status) = 0;
-
-    /**
-     * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
-     * Lower 16 bits are ignored.
-     * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
-     * @param status error code (not changed by function)
-     * @stable ICU 2.0
-     */
-    virtual void setVariableTop(const uint32_t varTop, UErrorCode &status) = 0;
-
-    /**
-     * Gets the variable top value of a Collator.
-     * Lower 16 bits are undefined and should be ignored.
-     * @param status error code (not changed by function). If error code is set, the return value is undefined.
-     * @stable ICU 2.0
-     */
-    virtual uint32_t getVariableTop(UErrorCode &status) const = 0;
-
-    /**
-     * Get an UnicodeSet that contains all the characters and sequences
-     * tailored in this collator.
-     * @param status      error code of the operation
-     * @return a pointer to a UnicodeSet object containing all the
-     *         code points and sequences that may sort differently than
-     *         in the UCA. The object must be disposed of by using delete
-     * @stable ICU 2.4
-     */
-    virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
-
-
-    /**
-     * Thread safe cloning operation
-     * @return pointer to the new clone, user should remove it.
-     * @stable ICU 2.2
-     */
-    virtual Collator* safeClone(void) = 0;
-
-    /**
-     * Get the sort key as an array of bytes from an UnicodeString.
-     * Sort key byte arrays are zero-terminated and can be compared using
-     * strcmp().
-     * @param source string to be processed.
-     * @param result buffer to store result in. If NULL, number of bytes needed
-     *        will be returned.
-     * @param resultLength length of the result buffer. If if not enough the
-     *        buffer will be filled to capacity.
-     * @return Number of bytes needed for storing the sort key
-     * @stable ICU 2.2
-     */
-    virtual int32_t getSortKey(const UnicodeString& source,
-                              uint8_t* result,
-                              int32_t resultLength) const = 0;
-
-    /**
-     * Get the sort key as an array of bytes from an UChar buffer.
-     * Sort key byte arrays are zero-terminated and can be compared using
-     * strcmp().
-     * @param source string to be processed.
-     * @param sourceLength length of string to be processed.
-     *        If -1, the string is 0 terminated and length will be decided by the
-     *        function.
-     * @param result buffer to store result in. If NULL, number of bytes needed
-     *        will be returned.
-     * @param resultLength length of the result buffer. If if not enough the
-     *        buffer will be filled to capacity.
-     * @return Number of bytes needed for storing the sort key
-     * @stable ICU 2.2
-     */
-    virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
-                               uint8_t*result, int32_t resultLength) const = 0;
-
-    /**
-     * Produce a bound for a given sortkey and a number of levels.
-     * Return value is always the number of bytes needed, regardless of
-     * whether the result buffer was big enough or even valid.<br>
-     * Resulting bounds can be used to produce a range of strings that are
-     * between upper and lower bounds. For example, if bounds are produced
-     * for a sortkey of string "smith", strings between upper and lower
-     * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
-     * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
-     * is produced, strings matched would be as above. However, if bound
-     * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
-     * also match "Smithsonian" and similar.<br>
-     * For more on usage, see example in cintltst/capitst.c in procedure
-     * TestBounds.
-     * Sort keys may be compared using <TT>strcmp</TT>.
-     * @param source The source sortkey.
-     * @param sourceLength The length of source, or -1 if null-terminated.
-     *                     (If an unmodified sortkey is passed, it is always null
-     *                      terminated).
-     * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
-     *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that
-     *                  produces upper bound that matches strings of the same length
-     *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the
-     *                  same starting substring as the source string.
-     * @param noOfLevels  Number of levels required in the resulting bound (for most
-     *                    uses, the recommended value is 1). See users guide for
-     *                    explanation on number of levels a sortkey can have.
-     * @param result A pointer to a buffer to receive the resulting sortkey.
-     * @param resultLength The maximum size of result.
-     * @param status Used for returning error code if something went wrong. If the
-     *               number of levels requested is higher than the number of levels
-     *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
-     *               issued.
-     * @return The size needed to fully store the bound.
-     * @see ucol_keyHashCode
-     * @stable ICU 2.1
-     */
-    static int32_t U_EXPORT2 getBound(const uint8_t       *source,
-            int32_t             sourceLength,
-            UColBoundMode       boundType,
-            uint32_t            noOfLevels,
-            uint8_t             *result,
-            int32_t             resultLength,
-            UErrorCode          &status);
-
-
-protected:
-
-    // Collator protected constructors -------------------------------------
-
-    /**
-    * Default constructor.
-    * Constructor is different from the old default Collator constructor.
-    * The task for determing the default collation strength and normalization
-    * mode is left to the child class.
-    * @stable ICU 2.0
-    */
-    Collator();
-
-    /**
-    * Constructor.
-    * Empty constructor, does not handle the arguments.
-    * This constructor is done for backward compatibility with 1.7 and 1.8.
-    * The task for handling the argument collation strength and normalization
-    * mode is left to the child class.
-    * @param collationStrength collation strength
-    * @param decompositionMode
-    * @deprecated ICU 2.4. Subclasses should use the default constructor
-    * instead and handle the strength and normalization mode themselves.
-    */
-    Collator(UCollationStrength collationStrength,
-             UNormalizationMode decompositionMode);
-
-    /**
-    * Copy constructor.
-    * @param other Collator object to be copied from
-    * @stable ICU 2.0
-    */
-    Collator(const Collator& other);
-
-    // Collator protected methods -----------------------------------------
-
-
-   /**
-    * Used internally by registraton to define the requested and valid locales.
-    * @param requestedLocale the requsted locale
-    * @param validLocale the valid locale
-    * @internal
-    */
-    virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
-
-public:
-#if !UCONFIG_NO_SERVICE
-    /**
-     * used only by ucol_open, not for public use
-     * @internal
-     */
-    static UCollator* createUCollator(const char* loc, UErrorCode* status);
-#endif
-private:
-    /**
-     * Assignment operator. Private for now.
-     * @internal
-     */
-    Collator& operator=(const Collator& other);
-
-    friend class CFactory;
-    friend class SimpleCFactory;
-    friend class ICUCollatorFactory;
-    friend class ICUCollatorService;
-    static Collator* makeInstance(const Locale& desiredLocale,
-                                  UErrorCode& status);
-
-    // Collator private data members ---------------------------------------
-
-    /*
-    synwee : removed as attributes to be handled by child class
-    UCollationStrength  strength;
-    Normalizer::EMode  decmp;
-    */
-    /* This is useless information */
-/*  static const UVersionInfo fVersion;*/
-};
-
-#if !UCONFIG_NO_SERVICE
-/**
- * A factory, used with registerFactory, the creates multiple collators and provides
- * display names for them.  A factory supports some number of locales-- these are the
- * locales for which it can create collators.  The factory can be visible, in which
- * case the supported locales will be enumerated by getAvailableLocales, or invisible,
- * in which they are not.  Invisible locales are still supported, they are just not
- * listed by getAvailableLocales.
- * <p>
- * If standard locale display names are sufficient, Collator instances can
- * be registered using registerInstance instead.</p>
- * <p>
- * Note: if the collators are to be used from C APIs, they must be instances
- * of RuleBasedCollator.</p>
- *
- * @stable ICU 2.6
- */
-class U_I18N_API CollatorFactory : public UObject {
-public:
-
-    /**
-     * Destructor
-     * @stable ICU 3.0
-     */
-    virtual ~CollatorFactory();
-
-    /**
-     * Return true if this factory is visible.  Default is true.
-     * If not visible, the locales supported by this factory will not
-     * be listed by getAvailableLocales.
-     * @return true if the factory is visible.
-     * @stable ICU 2.6
-     */
-    virtual UBool visible(void) const;
-
-    /**
-     * Return a collator for the provided locale.  If the locale
-     * is not supported, return NULL.
-     * @param loc the locale identifying the collator to be created.
-     * @return a new collator if the locale is supported, otherwise NULL.
-     * @stable ICU 2.6
-     */
-    virtual Collator* createCollator(const Locale& loc) = 0;
-
-    /**
-     * Return the name of the collator for the objectLocale, localized for the displayLocale.
-     * If objectLocale is not supported, or the factory is not visible, set the result string
-     * to bogus.
-     * @param objectLocale the locale identifying the collator
-     * @param displayLocale the locale for which the display name of the collator should be localized
-     * @param result an output parameter for the display name, set to bogus if not supported.
-     * @return the display name
-     * @stable ICU 2.6
-     */
-    virtual  UnicodeString& getDisplayName(const Locale& objectLocale,
-                                           const Locale& displayLocale,
-                                           UnicodeString& result);
-
-    /**
-     * Return an array of all the locale names directly supported by this factory.
-     * The number of names is returned in count.  This array is owned by the factory.
-     * Its contents must never change.
-     * @param count output parameter for the number of locales supported by the factory
-     * @param status the in/out error code
-     * @return a pointer to an array of count UnicodeStrings.
-     * @stable ICU 2.6
-     */
-    virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0;
-};
-#endif /* UCONFIG_NO_SERVICE */
-
-// Collator inline methods -----------------------------------------------
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/coll.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/coll.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/coll.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/coll.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1035 @@
+/*
+******************************************************************************
+*   Copyright (C) 1996-2008, International Business Machines                 *
+*   Corporation and others.  All Rights Reserved.                            *
+******************************************************************************
+*/
+
+/**
+ * \file 
+ * \brief C++ API: Collation Service.
+ */
+ 
+/**
+* File coll.h
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+*  Date        Name        Description
+* 02/5/97      aliu        Modified createDefault to load collation data from
+*                          binary files when possible.  Added related methods
+*                          createCollationFromFile, chopLocale, createPathName.
+* 02/11/97     aliu        Added members addToCache, findInCache, and fgCache.
+* 02/12/97     aliu        Modified to create objects from RuleBasedCollator cache.
+*                          Moved cache out of Collation class.
+* 02/13/97     aliu        Moved several methods out of this class and into
+*                          RuleBasedCollator, with modifications.  Modified
+*                          createDefault() to call new RuleBasedCollator(Locale&)
+*                          constructor.  General clean up and documentation.
+* 02/20/97     helena      Added clone, operator==, operator!=, operator=, copy
+*                          constructor and getDynamicClassID.
+* 03/25/97     helena      Updated with platform independent data types.
+* 05/06/97     helena      Added memory allocation error detection.
+* 06/20/97     helena      Java class name change.
+* 09/03/97     helena      Added createCollationKeyValues().
+* 02/10/98     damiba      Added compare() with length as parameter.
+* 04/23/99     stephen     Removed EDecompositionMode, merged with
+*                          Normalizer::EMode.
+* 11/02/99     helena      Collator performance enhancements.  Eliminates the
+*                          UnicodeString construction and special case for NO_OP.
+* 11/23/99     srl         More performance enhancements. Inlining of
+*                          critical accessors.
+* 05/15/00     helena      Added version information API.
+* 01/29/01     synwee      Modified into a C++ wrapper which calls C apis
+*                          (ucoll.h).
+*/
+
+#ifndef COLL_H
+#define COLL_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/uobject.h"
+#include "unicode/ucol.h"
+#include "unicode/normlzr.h"
+#include "unicode/locid.h"
+#include "unicode/uniset.h"
+#include "unicode/umisc.h"
+
+U_NAMESPACE_BEGIN
+
+class StringEnumeration;
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * @stable ICU 2.6
+ */
+class CollatorFactory;
+#endif
+
+/**
+* @stable ICU 2.0
+*/
+class CollationKey;
+
+/**
+* The <code>Collator</code> class performs locale-sensitive string
+* comparison.<br>
+* You use this class to build searching and sorting routines for natural
+* language text.<br>
+* <em>Important: </em>The ICU collation service has been reimplemented
+* in order to achieve better performance and UCA compliance.
+* For details, see the
+* <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
+* collation design document</a>.
+* <p>
+* <code>Collator</code> is an abstract base class. Subclasses implement
+* specific collation strategies. One subclass,
+* <code>RuleBasedCollator</code>, is currently provided and is applicable
+* to a wide set of languages. Other subclasses may be created to handle more
+* specialized needs.
+* <p>
+* Like other locale-sensitive classes, you can use the static factory method,
+* <code>createInstance</code>, to obtain the appropriate
+* <code>Collator</code> object for a given locale. You will only need to
+* look at the subclasses of <code>Collator</code> if you need to
+* understand the details of a particular collation strategy or if you need to
+* modify that strategy.
+* <p>
+* The following example shows how to compare two strings using the
+* <code>Collator</code> for the default locale.
+* \htmlonly<blockquote>\endhtmlonly
+* <pre>
+* \code
+* // Compare two strings in the default locale
+* UErrorCode success = U_ZERO_ERROR;
+* Collator* myCollator = Collator::createInstance(success);
+* if (myCollator->compare("abc", "ABC") < 0)
+*   cout << "abc is less than ABC" << endl;
+* else
+*   cout << "abc is greater than or equal to ABC" << endl;
+* \endcode
+* </pre>
+* \htmlonly</blockquote>\endhtmlonly
+* <p>
+* You can set a <code>Collator</code>'s <em>strength</em> property to
+* determine the level of difference considered significant in comparisons.
+* Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>,
+* <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>.
+* The exact assignment of strengths to language features is locale dependant.
+* For example, in Czech, "e" and "f" are considered primary differences,
+* while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary
+* differences and "e" and "e" are identical. The following shows how both case
+* and accents could be ignored for US English.
+* \htmlonly<blockquote>\endhtmlonly
+* <pre>
+* \code
+* //Get the Collator for US English and set its strength to PRIMARY
+* UErrorCode success = U_ZERO_ERROR;
+* Collator* usCollator = Collator::createInstance(Locale::US, success);
+* usCollator->setStrength(Collator::PRIMARY);
+* if (usCollator->compare("abc", "ABC") == 0)
+*     cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl;
+* \endcode
+* </pre>
+* \htmlonly</blockquote>\endhtmlonly
+* <p>
+* For comparing strings exactly once, the <code>compare</code> method
+* provides the best performance. When sorting a list of strings however, it
+* is generally necessary to compare each string multiple times. In this case,
+* sort keys provide better performance. The <code>getSortKey</code> methods
+* convert a string to a series of bytes that can be compared bitwise against
+* other sort keys using <code>strcmp()</code>. Sort keys are written as
+* zero-terminated byte strings. They consist of several substrings, one for
+* each collation strength level, that are delimited by 0x01 bytes.
+* If the string code points are appended for UCOL_IDENTICAL, then they are
+* processed for correct code point order comparison and may contain 0x01
+* bytes but not zero bytes.
+* </p>
+* <p>
+* An older set of APIs returns a <code>CollationKey</code> object that wraps
+* the sort key bytes instead of returning the bytes themselves.
+* Its use is deprecated, but it is still available for compatibility with
+* Java.
+* </p>
+* <p>
+* <strong>Note:</strong> <code>Collator</code>s with different Locale,
+* and CollationStrength settings will return different sort
+* orders for the same set of strings. Locales have specific collation rules,
+* and the way in which secondary and tertiary differences are taken into
+* account, for example, will result in a different sorting order for same
+* strings.
+* </p>
+* @see         RuleBasedCollator
+* @see         CollationKey
+* @see         CollationElementIterator
+* @see         Locale
+* @see         Normalizer
+* @version     2.0 11/15/01
+*/
+
+class U_I18N_API Collator : public UObject {
+public:
+
+    // Collator public enums -----------------------------------------------
+
+    /**
+     * Base letter represents a primary difference. Set comparison level to
+     * PRIMARY to ignore secondary and tertiary differences.<br>
+     * Use this to set the strength of a Collator object.<br>
+     * Example of primary difference, "abc" &lt; "abd"
+     *
+     * Diacritical differences on the same base letter represent a secondary
+     * difference. Set comparison level to SECONDARY to ignore tertiary
+     * differences. Use this to set the strength of a Collator object.<br>
+     * Example of secondary difference, "&auml;" >> "a".
+     *
+     * Uppercase and lowercase versions of the same character represents a
+     * tertiary difference.  Set comparison level to TERTIARY to include all
+     * comparison differences. Use this to set the strength of a Collator
+     * object.<br>
+     * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
+     *
+     * Two characters are considered "identical" when they have the same unicode
+     * spellings.<br>
+     * For example, "&auml;" == "&auml;".
+     *
+     * UCollationStrength is also used to determine the strength of sort keys
+     * generated from Collator objects.
+     * @stable ICU 2.0
+     */
+    enum ECollationStrength
+    {
+        PRIMARY    = 0,
+        SECONDARY  = 1,
+        TERTIARY   = 2,
+        QUATERNARY = 3,
+        IDENTICAL  = 15
+    };
+
+    /**
+     * LESS is returned if source string is compared to be less than target
+     * string in the compare() method.
+     * EQUAL is returned if source string is compared to be equal to target
+     * string in the compare() method.
+     * GREATER is returned if source string is compared to be greater than
+     * target string in the compare() method.
+     * @see Collator#compare
+     * @deprecated ICU 2.6. Use C enum UCollationResult defined in ucol.h
+     */
+    enum EComparisonResult
+    {
+        LESS = -1,
+        EQUAL = 0,
+        GREATER = 1
+    };
+
+    // Collator public destructor -----------------------------------------
+
+    /**
+     * Destructor
+     * @stable ICU 2.0
+     */
+    virtual ~Collator();
+
+    // Collator public methods --------------------------------------------
+
+    /**
+     * Returns true if "other" is the same as "this"
+     * @param other Collator object to be compared
+     * @return true if other is the same as this.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const Collator& other) const;
+
+    /**
+     * Returns true if "other" is not the same as "this".
+     * @param other Collator object to be compared
+     * @return true if other is not the same as this.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator!=(const Collator& other) const;
+
+    /**
+     * Makes a shallow copy of the current object.
+     * @return a copy of this object
+     * @stable ICU 2.0
+     */
+    virtual Collator* clone(void) const = 0;
+
+    /**
+     * Creates the Collator object for the current default locale.
+     * The default locale is determined by Locale::getDefault.
+     * The UErrorCode& err parameter is used to return status information to the user.
+     * To check whether the construction succeeded or not, you should check the
+     * value of U_SUCCESS(err).  If you wish more detailed information, you can
+     * check for informational error results which still indicate success.
+     * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
+     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+     * used. U_USING_DEFAULT_ERROR indicates that the default locale data was
+     * used; neither the requested locale nor any of its fall back locales
+     * could be found.
+     * The caller owns the returned object and is responsible for deleting it.
+     *
+     * @param err    the error code status.
+     * @return       the collation object of the default locale.(for example, en_US)
+     * @see Locale#getDefault
+     * @stable ICU 2.0
+     */
+    static Collator* U_EXPORT2 createInstance(UErrorCode&  err);
+
+    /**
+     * Gets the table-based collation object for the desired locale. The
+     * resource of the desired locale will be loaded by ResourceLoader.
+     * Locale::ENGLISH is the base collation table and all other languages are
+     * built on top of it with additional language-specific modifications.
+     * The UErrorCode& err parameter is used to return status information to the user.
+     * To check whether the construction succeeded or not, you should check
+     * the value of U_SUCCESS(err).  If you wish more detailed information, you
+     * can check for informational error results which still indicate success.
+     * U_USING_FALLBACK_ERROR indicates that a fall back locale was used.  For
+     * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+     * used.  U_USING_DEFAULT_ERROR indicates that the default locale data was
+     * used; neither the requested locale nor any of its fall back locales
+     * could be found.
+     * The caller owns the returned object and is responsible for deleting it.
+     * @param loc    The locale ID for which to open a collator.
+     * @param err    the error code status.
+     * @return       the created table-based collation object based on the desired
+     *               locale.
+     * @see Locale
+     * @see ResourceLoader
+     * @stable ICU 2.2
+     */
+    static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err);
+
+#ifdef U_USE_COLLATION_OBSOLETE_2_6
+    /**
+     * Create a Collator with a specific version.
+     * This is the same as createInstance(loc, err) except that getVersion() of
+     * the returned object is guaranteed to be the same as the version
+     * parameter.
+     * This is designed to be used to open the same collator for a given
+     * locale even when ICU is updated.
+     * The same locale and version guarantees the same sort keys and
+     * comparison results.
+     * <p>
+     * Note: this API will be removed in a future release.  Use
+     * <tt>createInstance(const Locale&, UErrorCode&) instead.</tt></p>
+     *
+     * @param loc The locale ID for which to open a collator.
+     * @param version The requested collator version.
+     * @param err A reference to a UErrorCode,
+     *            must not indicate a failure before calling this function.
+     * @return A pointer to a Collator, or 0 if an error occurred
+     *         or a collator with the requested version is not available.
+     *
+     * @see getVersion
+     * @obsolete ICU 2.6
+     */
+    static Collator *createInstance(const Locale &loc, UVersionInfo version, UErrorCode &err);
+#endif
+
+    /**
+     * The comparison function compares the character data stored in two
+     * different strings. Returns information about whether a string is less
+     * than, greater than or equal to another string.
+     * @param source the source string to be compared with.
+     * @param target the string that is to be compared with the source string.
+     * @return Returns a byte value. GREATER if source is greater
+     * than target; EQUAL if source is equal to target; LESS if source is less
+     * than target
+     * @deprecated ICU 2.6 use the overload with UErrorCode &
+     */
+    virtual EComparisonResult compare(const UnicodeString& source,
+                                      const UnicodeString& target) const;
+
+    /**
+     * The comparison function compares the character data stored in two
+     * different strings. Returns information about whether a string is less
+     * than, greater than or equal to another string.
+     * @param source the source string to be compared with.
+     * @param target the string that is to be compared with the source string.
+     * @param status possible error code
+     * @return Returns an enum value. UCOL_GREATER if source is greater
+     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
+     * than target
+     * @stable ICU 2.6
+     */
+    virtual UCollationResult compare(const UnicodeString& source,
+                                      const UnicodeString& target,
+                                      UErrorCode &status) const = 0;
+
+    /**
+     * Does the same thing as compare but limits the comparison to a specified
+     * length
+     * @param source the source string to be compared with.
+     * @param target the string that is to be compared with the source string.
+     * @param length the length the comparison is limited to
+     * @return Returns a byte value. GREATER if source (up to the specified
+     *         length) is greater than target; EQUAL if source (up to specified
+     *         length) is equal to target; LESS if source (up to the specified
+     *         length) is less  than target.
+     * @deprecated ICU 2.6 use the overload with UErrorCode &
+     */
+    virtual EComparisonResult compare(const UnicodeString& source,
+                                      const UnicodeString& target,
+                                      int32_t length) const;
+
+    /**
+     * Does the same thing as compare but limits the comparison to a specified
+     * length
+     * @param source the source string to be compared with.
+     * @param target the string that is to be compared with the source string.
+     * @param length the length the comparison is limited to
+     * @param status possible error code
+     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
+     *         length) is greater than target; UCOL_EQUAL if source (up to specified
+     *         length) is equal to target; UCOL_LESS if source (up to the specified
+     *         length) is less  than target.
+     * @stable ICU 2.6
+     */
+    virtual UCollationResult compare(const UnicodeString& source,
+                                      const UnicodeString& target,
+                                      int32_t length,
+                                      UErrorCode &status) const = 0;
+
+    /**
+     * The comparison function compares the character data stored in two
+     * different string arrays. Returns information about whether a string array
+     * is less than, greater than or equal to another string array.
+     * @param source the source string array to be compared with.
+     * @param sourceLength the length of the source string array.  If this value
+     *        is equal to -1, the string array is null-terminated.
+     * @param target the string that is to be compared with the source string.
+     * @param targetLength the length of the target string array.  If this value
+     *        is equal to -1, the string array is null-terminated.
+     * @return Returns a byte value. GREATER if source is greater than target;
+     *         EQUAL if source is equal to target; LESS if source is less than
+     *         target
+     * @deprecated ICU 2.6 use the overload with UErrorCode &
+     */
+    virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
+                                      const UChar* target, int32_t targetLength)
+                                      const;
+
+    /**
+     * The comparison function compares the character data stored in two
+     * different string arrays. Returns information about whether a string array
+     * is less than, greater than or equal to another string array.
+     * @param source the source string array to be compared with.
+     * @param sourceLength the length of the source string array.  If this value
+     *        is equal to -1, the string array is null-terminated.
+     * @param target the string that is to be compared with the source string.
+     * @param targetLength the length of the target string array.  If this value
+     *        is equal to -1, the string array is null-terminated.
+     * @param status possible error code
+     * @return Returns an enum value. UCOL_GREATER if source is greater
+     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
+     * than target
+     * @stable ICU 2.6
+     */
+    virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
+                                      const UChar* target, int32_t targetLength,
+                                      UErrorCode &status) const = 0;
+
+    /**
+     * Transforms the string into a series of characters that can be compared
+     * with CollationKey::compareTo. It is not possible to restore the original
+     * string from the chars in the sort key.  The generated sort key handles
+     * only a limited number of ignorable characters.
+     * <p>Use CollationKey::equals or CollationKey::compare to compare the
+     * generated sort keys.
+     * If the source string is null, a null collation key will be returned.
+     * @param source the source string to be transformed into a sort key.
+     * @param key the collation key to be filled in
+     * @param status the error code status.
+     * @return the collation key of the string based on the collation rules.
+     * @see CollationKey#compare
+     * @deprecated ICU 2.8 Use getSortKey(...) instead
+     */
+    virtual CollationKey& getCollationKey(const UnicodeString&  source,
+                                          CollationKey& key,
+                                          UErrorCode& status) const = 0;
+
+    /**
+     * Transforms the string into a series of characters that can be compared
+     * with CollationKey::compareTo. It is not possible to restore the original
+     * string from the chars in the sort key.  The generated sort key handles
+     * only a limited number of ignorable characters.
+     * <p>Use CollationKey::equals or CollationKey::compare to compare the
+     * generated sort keys.
+     * <p>If the source string is null, a null collation key will be returned.
+     * @param source the source string to be transformed into a sort key.
+     * @param sourceLength length of the collation key
+     * @param key the collation key to be filled in
+     * @param status the error code status.
+     * @return the collation key of the string based on the collation rules.
+     * @see CollationKey#compare
+     * @deprecated ICU 2.8 Use getSortKey(...) instead
+     */
+    virtual CollationKey& getCollationKey(const UChar*source,
+                                          int32_t sourceLength,
+                                          CollationKey& key,
+                                          UErrorCode& status) const = 0;
+    /**
+     * Generates the hash code for the collation object
+     * @stable ICU 2.0
+     */
+    virtual int32_t hashCode(void) const = 0;
+
+    /**
+     * Gets the locale of the Collator
+     *
+     * @param type can be either requested, valid or actual locale. For more
+     *             information see the definition of ULocDataLocaleType in
+     *             uloc.h
+     * @param status the error code status.
+     * @return locale where the collation data lives. If the collator
+     *         was instantiated from rules, locale is empty.
+     * @deprecated ICU 2.8 This API is under consideration for revision
+     * in ICU 3.0.
+     */
+    virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0;
+
+    /**
+     * Convenience method for comparing two strings based on the collation rules.
+     * @param source the source string to be compared with.
+     * @param target the target string to be compared with.
+     * @return true if the first string is greater than the second one,
+     *         according to the collation rules. false, otherwise.
+     * @see Collator#compare
+     * @stable ICU 2.0
+     */
+    UBool greater(const UnicodeString& source, const UnicodeString& target)
+                  const;
+
+    /**
+     * Convenience method for comparing two strings based on the collation rules.
+     * @param source the source string to be compared with.
+     * @param target the target string to be compared with.
+     * @return true if the first string is greater than or equal to the second
+     *         one, according to the collation rules. false, otherwise.
+     * @see Collator#compare
+     * @stable ICU 2.0
+     */
+    UBool greaterOrEqual(const UnicodeString& source,
+                         const UnicodeString& target) const;
+
+    /**
+     * Convenience method for comparing two strings based on the collation rules.
+     * @param source the source string to be compared with.
+     * @param target the target string to be compared with.
+     * @return true if the strings are equal according to the collation rules.
+     *         false, otherwise.
+     * @see Collator#compare
+     * @stable ICU 2.0
+     */
+    UBool equals(const UnicodeString& source, const UnicodeString& target) const;
+
+    /**
+     * Determines the minimum strength that will be use in comparison or
+     * transformation.
+     * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
+     * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
+     * are ignored.
+     * @return the current comparison level.
+     * @see Collator#setStrength
+     * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
+     */
+    virtual ECollationStrength getStrength(void) const = 0;
+
+    /**
+     * Sets the minimum strength to be used in comparison or transformation.
+     * <p>Example of use:
+     * <pre>
+     *  \code
+     *  UErrorCode status = U_ZERO_ERROR;
+     *  Collator*myCollation = Collator::createInstance(Locale::US, status);
+     *  if (U_FAILURE(status)) return;
+     *  myCollation->setStrength(Collator::PRIMARY);
+     *  // result will be "abc" == "ABC"
+     *  // tertiary differences will be ignored
+     *  Collator::ComparisonResult result = myCollation->compare("abc", "ABC");
+     * \endcode
+     * </pre>
+     * @see Collator#getStrength
+     * @param newStrength the new comparison level.
+     * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
+     */
+    virtual void setStrength(ECollationStrength newStrength) = 0;
+
+    /**
+     * Get name of the object for the desired Locale, in the desired langauge
+     * @param objectLocale must be from getAvailableLocales
+     * @param displayLocale specifies the desired locale for output
+     * @param name the fill-in parameter of the return value
+     * @return display-able name of the object for the object locale in the
+     *         desired language
+     * @stable ICU 2.0
+     */
+    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
+                                         const Locale& displayLocale,
+                                         UnicodeString& name);
+
+    /**
+    * Get name of the object for the desired Locale, in the langauge of the
+    * default locale.
+    * @param objectLocale must be from getAvailableLocales
+    * @param name the fill-in parameter of the return value
+    * @return name of the object for the desired locale in the default language
+    * @stable ICU 2.0
+    */
+    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
+                                         UnicodeString& name);
+
+    /**
+     * Get the set of Locales for which Collations are installed.
+     *
+     * <p>Note this does not include locales supported by registered collators.
+     * If collators might have been registered, use the overload of getAvailableLocales
+     * that returns a StringEnumeration.</p>
+     *
+     * @param count the output parameter of number of elements in the locale list
+     * @return the list of available locales for which collations are installed
+     * @stable ICU 2.0
+     */
+    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+
+    /**
+     * Return a StringEnumeration over the locales available at the time of the call,
+     * including registered locales.  If a severe error occurs (such as out of memory
+     * condition) this will return null. If there is no locale data, an empty enumeration
+     * will be returned.
+     * @return a StringEnumeration over the locales available at the time of the call
+     * @stable ICU 2.6
+     */
+    static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
+
+    /**
+     * Create a string enumerator of all possible keywords that are relevant to
+     * collation. At this point, the only recognized keyword for this
+     * service is "collation".
+     * @param status input-output error code
+     * @return a string enumeration over locale strings. The caller is
+     * responsible for closing the result.
+     * @stable ICU 3.0
+     */
+    static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status);
+
+    /**
+     * Given a keyword, create a string enumeration of all values
+     * for that keyword that are currently in use.
+     * @param keyword a particular keyword as enumerated by
+     * ucol_getKeywords. If any other keyword is passed in, status is set
+     * to U_ILLEGAL_ARGUMENT_ERROR.
+     * @param status input-output error code
+     * @return a string enumeration over collation keyword values, or NULL
+     * upon error. The caller is responsible for deleting the result.
+     * @stable ICU 3.0
+     */
+    static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status);
+
+    /**
+     * Return the functionally equivalent locale for the given
+     * requested locale, with respect to given keyword, for the
+     * collation service.  If two locales return the same result, then
+     * collators instantiated for these locales will behave
+     * equivalently.  The converse is not always true; two collators
+     * may in fact be equivalent, but return different results, due to
+     * internal details.  The return result has no other meaning than
+     * that stated above, and implies nothing as to the relationship
+     * between the two locales.  This is intended for use by
+     * applications who wish to cache collators, or otherwise reuse
+     * collators when possible.  The functional equivalent may change
+     * over time.  For more information, please see the <a
+     * href="http://icu-project.org/userguide/locale.html#services">
+     * Locales and Services</a> section of the ICU User Guide.
+     * @param keyword a particular keyword as enumerated by
+     * ucol_getKeywords.
+     * @param locale the requested locale
+     * @param isAvailable reference to a fillin parameter that
+     * indicates whether the requested locale was 'available' to the
+     * collation service. A locale is defined as 'available' if it
+     * physically exists within the collation locale data.
+     * @param status reference to input-output error code
+     * @return the functionally equivalent collation locale, or the root
+     * locale upon error.
+     * @stable ICU 3.0
+     */
+    static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale,
+                                          UBool& isAvailable, UErrorCode& status);
+
+#if !UCONFIG_NO_SERVICE
+    /**
+     * Register a new Collator.  The collator will be adopted.
+     * @param toAdopt the Collator instance to be adopted
+     * @param locale the locale with which the collator will be associated
+     * @param status the in/out status code, no special meanings are assigned
+     * @return a registry key that can be used to unregister this collator
+     * @stable ICU 2.6
+     */
+    static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status);
+
+    /**
+     * Register a new CollatorFactory.  The factory will be adopted.
+     * @param toAdopt the CollatorFactory instance to be adopted
+     * @param status the in/out status code, no special meanings are assigned
+     * @return a registry key that can be used to unregister this collator
+     * @stable ICU 2.6
+     */
+    static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status);
+
+    /**
+     * Unregister a previously-registered Collator or CollatorFactory
+     * using the key returned from the register call.  Key becomes
+     * invalid after a successful call and should not be used again.
+     * The object corresponding to the key will be deleted.
+     * @param key the registry key returned by a previous call to registerInstance
+     * @param status the in/out status code, no special meanings are assigned
+     * @return TRUE if the collator for the key was successfully unregistered
+     * @stable ICU 2.6
+     */
+    static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
+#endif /* UCONFIG_NO_SERVICE */
+
+    /**
+     * Gets the version information for a Collator.
+     * @param info the version # information, the result will be filled in
+     * @stable ICU 2.0
+     */
+    virtual void getVersion(UVersionInfo info) const = 0;
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual method.
+     * This method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     * @return The class ID for this object. All objects of a given class have
+     *         the same class ID.  Objects of other classes have different class
+     *         IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const = 0;
+
+    /**
+     * Universal attribute setter
+     * @param attr attribute type
+     * @param value attribute value
+     * @param status to indicate whether the operation went on smoothly or
+     *        there were errors
+     * @stable ICU 2.2
+     */
+    virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
+                              UErrorCode &status) = 0;
+
+    /**
+     * Universal attribute getter
+     * @param attr attribute type
+     * @param status to indicate whether the operation went on smoothly or
+     *        there were errors
+     * @return attribute value
+     * @stable ICU 2.2
+     */
+    virtual UColAttributeValue getAttribute(UColAttribute attr,
+                                            UErrorCode &status) = 0;
+
+    /**
+     * Sets the variable top to a collation element value of a string supplied.
+     * @param varTop one or more (if contraction) UChars to which the variable top should be set
+     * @param len length of variable top string. If -1 it is considered to be zero terminated.
+     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
+     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
+     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
+     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
+     * @stable ICU 2.0
+     */
+    virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0;
+
+    /**
+     * Sets the variable top to a collation element value of a string supplied.
+     * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
+     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
+     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
+     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
+     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
+     * @stable ICU 2.0
+     */
+    virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status) = 0;
+
+    /**
+     * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
+     * Lower 16 bits are ignored.
+     * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
+     * @param status error code (not changed by function)
+     * @stable ICU 2.0
+     */
+    virtual void setVariableTop(const uint32_t varTop, UErrorCode &status) = 0;
+
+    /**
+     * Gets the variable top value of a Collator.
+     * Lower 16 bits are undefined and should be ignored.
+     * @param status error code (not changed by function). If error code is set, the return value is undefined.
+     * @stable ICU 2.0
+     */
+    virtual uint32_t getVariableTop(UErrorCode &status) const = 0;
+
+    /**
+     * Get an UnicodeSet that contains all the characters and sequences
+     * tailored in this collator.
+     * @param status      error code of the operation
+     * @return a pointer to a UnicodeSet object containing all the
+     *         code points and sequences that may sort differently than
+     *         in the UCA. The object must be disposed of by using delete
+     * @stable ICU 2.4
+     */
+    virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
+
+
+    /**
+     * Thread safe cloning operation
+     * @return pointer to the new clone, user should remove it.
+     * @stable ICU 2.2
+     */
+    virtual Collator* safeClone(void) = 0;
+
+    /**
+     * Get the sort key as an array of bytes from an UnicodeString.
+     * Sort key byte arrays are zero-terminated and can be compared using
+     * strcmp().
+     * @param source string to be processed.
+     * @param result buffer to store result in. If NULL, number of bytes needed
+     *        will be returned.
+     * @param resultLength length of the result buffer. If if not enough the
+     *        buffer will be filled to capacity.
+     * @return Number of bytes needed for storing the sort key
+     * @stable ICU 2.2
+     */
+    virtual int32_t getSortKey(const UnicodeString& source,
+                              uint8_t* result,
+                              int32_t resultLength) const = 0;
+
+    /**
+     * Get the sort key as an array of bytes from an UChar buffer.
+     * Sort key byte arrays are zero-terminated and can be compared using
+     * strcmp().
+     * @param source string to be processed.
+     * @param sourceLength length of string to be processed.
+     *        If -1, the string is 0 terminated and length will be decided by the
+     *        function.
+     * @param result buffer to store result in. If NULL, number of bytes needed
+     *        will be returned.
+     * @param resultLength length of the result buffer. If if not enough the
+     *        buffer will be filled to capacity.
+     * @return Number of bytes needed for storing the sort key
+     * @stable ICU 2.2
+     */
+    virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
+                               uint8_t*result, int32_t resultLength) const = 0;
+
+    /**
+     * Produce a bound for a given sortkey and a number of levels.
+     * Return value is always the number of bytes needed, regardless of
+     * whether the result buffer was big enough or even valid.<br>
+     * Resulting bounds can be used to produce a range of strings that are
+     * between upper and lower bounds. For example, if bounds are produced
+     * for a sortkey of string "smith", strings between upper and lower
+     * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
+     * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
+     * is produced, strings matched would be as above. However, if bound
+     * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
+     * also match "Smithsonian" and similar.<br>
+     * For more on usage, see example in cintltst/capitst.c in procedure
+     * TestBounds.
+     * Sort keys may be compared using <TT>strcmp</TT>.
+     * @param source The source sortkey.
+     * @param sourceLength The length of source, or -1 if null-terminated.
+     *                     (If an unmodified sortkey is passed, it is always null
+     *                      terminated).
+     * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
+     *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that
+     *                  produces upper bound that matches strings of the same length
+     *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the
+     *                  same starting substring as the source string.
+     * @param noOfLevels  Number of levels required in the resulting bound (for most
+     *                    uses, the recommended value is 1). See users guide for
+     *                    explanation on number of levels a sortkey can have.
+     * @param result A pointer to a buffer to receive the resulting sortkey.
+     * @param resultLength The maximum size of result.
+     * @param status Used for returning error code if something went wrong. If the
+     *               number of levels requested is higher than the number of levels
+     *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
+     *               issued.
+     * @return The size needed to fully store the bound.
+     * @see ucol_keyHashCode
+     * @stable ICU 2.1
+     */
+    static int32_t U_EXPORT2 getBound(const uint8_t       *source,
+            int32_t             sourceLength,
+            UColBoundMode       boundType,
+            uint32_t            noOfLevels,
+            uint8_t             *result,
+            int32_t             resultLength,
+            UErrorCode          &status);
+
+
+protected:
+
+    // Collator protected constructors -------------------------------------
+
+    /**
+    * Default constructor.
+    * Constructor is different from the old default Collator constructor.
+    * The task for determing the default collation strength and normalization
+    * mode is left to the child class.
+    * @stable ICU 2.0
+    */
+    Collator();
+
+    /**
+    * Constructor.
+    * Empty constructor, does not handle the arguments.
+    * This constructor is done for backward compatibility with 1.7 and 1.8.
+    * The task for handling the argument collation strength and normalization
+    * mode is left to the child class.
+    * @param collationStrength collation strength
+    * @param decompositionMode
+    * @deprecated ICU 2.4. Subclasses should use the default constructor
+    * instead and handle the strength and normalization mode themselves.
+    */
+    Collator(UCollationStrength collationStrength,
+             UNormalizationMode decompositionMode);
+
+    /**
+    * Copy constructor.
+    * @param other Collator object to be copied from
+    * @stable ICU 2.0
+    */
+    Collator(const Collator& other);
+
+    // Collator protected methods -----------------------------------------
+
+
+   /**
+    * Used internally by registraton to define the requested and valid locales.
+    * @param requestedLocale the requsted locale
+    * @param validLocale the valid locale
+    * @internal
+    */
+    virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
+
+public:
+#if !UCONFIG_NO_SERVICE
+    /**
+     * used only by ucol_open, not for public use
+     * @internal
+     */
+    static UCollator* createUCollator(const char* loc, UErrorCode* status);
+#endif
+private:
+    /**
+     * Assignment operator. Private for now.
+     * @internal
+     */
+    Collator& operator=(const Collator& other);
+
+    friend class CFactory;
+    friend class SimpleCFactory;
+    friend class ICUCollatorFactory;
+    friend class ICUCollatorService;
+    static Collator* makeInstance(const Locale& desiredLocale,
+                                  UErrorCode& status);
+
+    // Collator private data members ---------------------------------------
+
+    /*
+    synwee : removed as attributes to be handled by child class
+    UCollationStrength  strength;
+    Normalizer::EMode  decmp;
+    */
+    /* This is useless information */
+/*  static const UVersionInfo fVersion;*/
+};
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * A factory, used with registerFactory, the creates multiple collators and provides
+ * display names for them.  A factory supports some number of locales-- these are the
+ * locales for which it can create collators.  The factory can be visible, in which
+ * case the supported locales will be enumerated by getAvailableLocales, or invisible,
+ * in which they are not.  Invisible locales are still supported, they are just not
+ * listed by getAvailableLocales.
+ * <p>
+ * If standard locale display names are sufficient, Collator instances can
+ * be registered using registerInstance instead.</p>
+ * <p>
+ * Note: if the collators are to be used from C APIs, they must be instances
+ * of RuleBasedCollator.</p>
+ *
+ * @stable ICU 2.6
+ */
+class U_I18N_API CollatorFactory : public UObject {
+public:
+
+    /**
+     * Destructor
+     * @stable ICU 3.0
+     */
+    virtual ~CollatorFactory();
+
+    /**
+     * Return true if this factory is visible.  Default is true.
+     * If not visible, the locales supported by this factory will not
+     * be listed by getAvailableLocales.
+     * @return true if the factory is visible.
+     * @stable ICU 2.6
+     */
+    virtual UBool visible(void) const;
+
+    /**
+     * Return a collator for the provided locale.  If the locale
+     * is not supported, return NULL.
+     * @param loc the locale identifying the collator to be created.
+     * @return a new collator if the locale is supported, otherwise NULL.
+     * @stable ICU 2.6
+     */
+    virtual Collator* createCollator(const Locale& loc) = 0;
+
+    /**
+     * Return the name of the collator for the objectLocale, localized for the displayLocale.
+     * If objectLocale is not supported, or the factory is not visible, set the result string
+     * to bogus.
+     * @param objectLocale the locale identifying the collator
+     * @param displayLocale the locale for which the display name of the collator should be localized
+     * @param result an output parameter for the display name, set to bogus if not supported.
+     * @return the display name
+     * @stable ICU 2.6
+     */
+    virtual  UnicodeString& getDisplayName(const Locale& objectLocale,
+                                           const Locale& displayLocale,
+                                           UnicodeString& result);
+
+    /**
+     * Return an array of all the locale names directly supported by this factory.
+     * The number of names is returned in count.  This array is owned by the factory.
+     * Its contents must never change.
+     * @param count output parameter for the number of locales supported by the factory
+     * @param status the in/out error code
+     * @return a pointer to an array of count UnicodeStrings.
+     * @stable ICU 2.6
+     */
+    virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0;
+};
+#endif /* UCONFIG_NO_SERVICE */
+
+// Collator inline methods -----------------------------------------------
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/curramt.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/curramt.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/curramt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,130 +0,0 @@
-/*
-**********************************************************************
-* Copyright (c) 2004-2006, International Business Machines
-* Corporation and others.  All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: April 26, 2004
-* Since: ICU 3.0
-**********************************************************************
-*/
-#ifndef __CURRENCYAMOUNT_H__
-#define __CURRENCYAMOUNT_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/measure.h"
-#include "unicode/currunit.h"
-
-/**
- * \file 
- * \brief C++ API: Currency Amount Object.
- */
- 
-U_NAMESPACE_BEGIN
-
-/**
- *
- * A currency together with a numeric amount, such as 200 USD.
- *
- * @author Alan Liu
- * @stable ICU 3.0
- */
-class U_I18N_API CurrencyAmount: public Measure {
- public:
-    /**
-     * Construct an object with the given numeric amount and the given
-     * ISO currency code.
-     * @param amount a numeric object; amount.isNumeric() must be TRUE
-     * @param isoCode the 3-letter ISO 4217 currency code; must not be
-     * NULL and must have length 3
-     * @param ec input-output error code. If the amount or the isoCode
-     * is invalid, then this will be set to a failing value.
-     * @stable ICU 3.0
-     */
-    CurrencyAmount(const Formattable& amount, const UChar* isoCode,
-                   UErrorCode &ec);
-
-    /**
-     * Construct an object with the given numeric amount and the given
-     * ISO currency code.
-     * @param amount the amount of the given currency
-     * @param isoCode the 3-letter ISO 4217 currency code; must not be
-     * NULL and must have length 3
-     * @param ec input-output error code. If the isoCode is invalid,
-     * then this will be set to a failing value.
-     * @stable ICU 3.0
-     */
-    CurrencyAmount(double amount, const UChar* isoCode,
-                   UErrorCode &ec);
-
-    /**
-     * Copy constructor
-     * @stable ICU 3.0
-     */
-    CurrencyAmount(const CurrencyAmount& other);
- 
-    /**
-     * Assignment operator
-     * @stable ICU 3.0
-     */
-    CurrencyAmount& operator=(const CurrencyAmount& other);
-
-    /**
-     * Return a polymorphic clone of this object.  The result will
-     * have the same class as returned by getDynamicClassID().
-     * @stable ICU 3.0
-     */
-    virtual UObject* clone() const;
-
-    /**
-     * Destructor
-     * @stable ICU 3.0
-     */
-    virtual ~CurrencyAmount();
-    
-    /**
-     * Returns a unique class ID for this object POLYMORPHICALLY.
-     * This method implements a simple form of RTTI used by ICU.
-     * @return The class ID for this object. All objects of a given
-     * class have the same class ID.  Objects of other classes have
-     * different class IDs.
-     * @stable ICU 3.0
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-     * Returns the class ID for this class. This is used to compare to
-     * the return value of getDynamicClassID().
-     * @return The class ID for all objects of this class.
-     * @stable ICU 3.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-    /**
-     * Return the currency unit object of this object.
-     * @stable ICU 3.0
-     */
-    inline const CurrencyUnit& getCurrency() const;
-
-    /**
-     * Return the ISO currency code of this object.
-     * @stable ICU 3.0
-     */
-    inline const UChar* getISOCurrency() const;
-};
-
-inline const CurrencyUnit& CurrencyAmount::getCurrency() const {
-    return (const CurrencyUnit&) getUnit();
-}
-
-inline const UChar* CurrencyAmount::getISOCurrency() const {
-    return getCurrency().getISOCurrency();
-}
-
-U_NAMESPACE_END
-
-#endif // !UCONFIG_NO_FORMATTING
-#endif // __CURRENCYAMOUNT_H__

Copied: MacRuby/trunk/icu-1060/unicode/curramt.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/curramt.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/curramt.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/curramt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,130 @@
+/*
+**********************************************************************
+* Copyright (c) 2004-2006, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: April 26, 2004
+* Since: ICU 3.0
+**********************************************************************
+*/
+#ifndef __CURRENCYAMOUNT_H__
+#define __CURRENCYAMOUNT_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/measure.h"
+#include "unicode/currunit.h"
+
+/**
+ * \file 
+ * \brief C++ API: Currency Amount Object.
+ */
+ 
+U_NAMESPACE_BEGIN
+
+/**
+ *
+ * A currency together with a numeric amount, such as 200 USD.
+ *
+ * @author Alan Liu
+ * @stable ICU 3.0
+ */
+class U_I18N_API CurrencyAmount: public Measure {
+ public:
+    /**
+     * Construct an object with the given numeric amount and the given
+     * ISO currency code.
+     * @param amount a numeric object; amount.isNumeric() must be TRUE
+     * @param isoCode the 3-letter ISO 4217 currency code; must not be
+     * NULL and must have length 3
+     * @param ec input-output error code. If the amount or the isoCode
+     * is invalid, then this will be set to a failing value.
+     * @stable ICU 3.0
+     */
+    CurrencyAmount(const Formattable& amount, const UChar* isoCode,
+                   UErrorCode &ec);
+
+    /**
+     * Construct an object with the given numeric amount and the given
+     * ISO currency code.
+     * @param amount the amount of the given currency
+     * @param isoCode the 3-letter ISO 4217 currency code; must not be
+     * NULL and must have length 3
+     * @param ec input-output error code. If the isoCode is invalid,
+     * then this will be set to a failing value.
+     * @stable ICU 3.0
+     */
+    CurrencyAmount(double amount, const UChar* isoCode,
+                   UErrorCode &ec);
+
+    /**
+     * Copy constructor
+     * @stable ICU 3.0
+     */
+    CurrencyAmount(const CurrencyAmount& other);
+ 
+    /**
+     * Assignment operator
+     * @stable ICU 3.0
+     */
+    CurrencyAmount& operator=(const CurrencyAmount& other);
+
+    /**
+     * Return a polymorphic clone of this object.  The result will
+     * have the same class as returned by getDynamicClassID().
+     * @stable ICU 3.0
+     */
+    virtual UObject* clone() const;
+
+    /**
+     * Destructor
+     * @stable ICU 3.0
+     */
+    virtual ~CurrencyAmount();
+    
+    /**
+     * Returns a unique class ID for this object POLYMORPHICALLY.
+     * This method implements a simple form of RTTI used by ICU.
+     * @return The class ID for this object. All objects of a given
+     * class have the same class ID.  Objects of other classes have
+     * different class IDs.
+     * @stable ICU 3.0
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * Returns the class ID for this class. This is used to compare to
+     * the return value of getDynamicClassID().
+     * @return The class ID for all objects of this class.
+     * @stable ICU 3.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+    /**
+     * Return the currency unit object of this object.
+     * @stable ICU 3.0
+     */
+    inline const CurrencyUnit& getCurrency() const;
+
+    /**
+     * Return the ISO currency code of this object.
+     * @stable ICU 3.0
+     */
+    inline const UChar* getISOCurrency() const;
+};
+
+inline const CurrencyUnit& CurrencyAmount::getCurrency() const {
+    return (const CurrencyUnit&) getUnit();
+}
+
+inline const UChar* CurrencyAmount::getISOCurrency() const {
+    return getCurrency().getISOCurrency();
+}
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_FORMATTING
+#endif // __CURRENCYAMOUNT_H__

Deleted: MacRuby/trunk/icu-1060/unicode/currunit.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/currunit.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/currunit.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,117 +0,0 @@
-/*
-**********************************************************************
-* Copyright (c) 2004-2006, International Business Machines
-* Corporation and others.  All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: April 26, 2004
-* Since: ICU 3.0
-**********************************************************************
-*/
-#ifndef __CURRENCYUNIT_H__
-#define __CURRENCYUNIT_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/measunit.h"
-
-/**
- * \file 
- * \brief C++ API: Currency Unit Information.
- */
- 
-U_NAMESPACE_BEGIN
-
-/**
- * A unit of currency, such as USD (U.S. dollars) or JPY (Japanese
- * yen).  This class is a thin wrapper over a UChar string that
- * subclasses MeasureUnit, for use with Measure and MeasureFormat.
- *
- * @author Alan Liu
- * @stable ICU 3.0
- */
-class U_I18N_API CurrencyUnit: public MeasureUnit {
- public:
-    /**
-     * Construct an object with the given ISO currency code.
-     * @param isoCode the 3-letter ISO 4217 currency code; must not be
-     * NULL and must have length 3
-     * @param ec input-output error code. If the isoCode is invalid,
-     * then this will be set to a failing value.
-     * @stable ICU 3.0
-     */
-    CurrencyUnit(const UChar* isoCode, UErrorCode &ec);
-
-    /**
-     * Copy constructor
-     * @stable ICU 3.0
-     */
-    CurrencyUnit(const CurrencyUnit& other);
-
-    /**
-     * Assignment operator
-     * @stable ICU 3.0
-     */
-    CurrencyUnit& operator=(const CurrencyUnit& other);
-
-    /**
-     * Return a polymorphic clone of this object.  The result will
-     * have the same class as returned by getDynamicClassID().
-     * @stable ICU 3.0
-     */
-    virtual UObject* clone() const;
-
-    /**
-     * Destructor
-     * @stable ICU 3.0
-     */
-    virtual ~CurrencyUnit();
-
-    /**
-     * Equality operator.  Return true if this object is equal
-     * to the given object.
-     * @stable ICU 3.0
-     */
-    UBool operator==(const UObject& other) const;
-
-    /**
-     * Returns a unique class ID for this object POLYMORPHICALLY.
-     * This method implements a simple form of RTTI used by ICU.
-     * @return The class ID for this object. All objects of a given
-     * class have the same class ID.  Objects of other classes have
-     * different class IDs.
-     * @stable ICU 3.0
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-     * Returns the class ID for this class. This is used to compare to
-     * the return value of getDynamicClassID().
-     * @return The class ID for all objects of this class.
-     * @stable ICU 3.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-    /**
-     * Return the ISO currency code of this object.
-     * @stable ICU 3.0
-     */
-    inline const UChar* getISOCurrency() const;
-
- private:
-    /**
-     * The ISO 4217 code of this object.
-     */
-    UChar isoCode[4];
-};
-
-inline const UChar* CurrencyUnit::getISOCurrency() const {
-    return isoCode;
-}
-
-U_NAMESPACE_END
-
-#endif // !UCONFIG_NO_FORMATTING
-#endif // __CURRENCYUNIT_H__

Copied: MacRuby/trunk/icu-1060/unicode/currunit.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/currunit.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/currunit.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/currunit.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,117 @@
+/*
+**********************************************************************
+* Copyright (c) 2004-2006, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: April 26, 2004
+* Since: ICU 3.0
+**********************************************************************
+*/
+#ifndef __CURRENCYUNIT_H__
+#define __CURRENCYUNIT_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/measunit.h"
+
+/**
+ * \file 
+ * \brief C++ API: Currency Unit Information.
+ */
+ 
+U_NAMESPACE_BEGIN
+
+/**
+ * A unit of currency, such as USD (U.S. dollars) or JPY (Japanese
+ * yen).  This class is a thin wrapper over a UChar string that
+ * subclasses MeasureUnit, for use with Measure and MeasureFormat.
+ *
+ * @author Alan Liu
+ * @stable ICU 3.0
+ */
+class U_I18N_API CurrencyUnit: public MeasureUnit {
+ public:
+    /**
+     * Construct an object with the given ISO currency code.
+     * @param isoCode the 3-letter ISO 4217 currency code; must not be
+     * NULL and must have length 3
+     * @param ec input-output error code. If the isoCode is invalid,
+     * then this will be set to a failing value.
+     * @stable ICU 3.0
+     */
+    CurrencyUnit(const UChar* isoCode, UErrorCode &ec);
+
+    /**
+     * Copy constructor
+     * @stable ICU 3.0
+     */
+    CurrencyUnit(const CurrencyUnit& other);
+
+    /**
+     * Assignment operator
+     * @stable ICU 3.0
+     */
+    CurrencyUnit& operator=(const CurrencyUnit& other);
+
+    /**
+     * Return a polymorphic clone of this object.  The result will
+     * have the same class as returned by getDynamicClassID().
+     * @stable ICU 3.0
+     */
+    virtual UObject* clone() const;
+
+    /**
+     * Destructor
+     * @stable ICU 3.0
+     */
+    virtual ~CurrencyUnit();
+
+    /**
+     * Equality operator.  Return true if this object is equal
+     * to the given object.
+     * @stable ICU 3.0
+     */
+    UBool operator==(const UObject& other) const;
+
+    /**
+     * Returns a unique class ID for this object POLYMORPHICALLY.
+     * This method implements a simple form of RTTI used by ICU.
+     * @return The class ID for this object. All objects of a given
+     * class have the same class ID.  Objects of other classes have
+     * different class IDs.
+     * @stable ICU 3.0
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * Returns the class ID for this class. This is used to compare to
+     * the return value of getDynamicClassID().
+     * @return The class ID for all objects of this class.
+     * @stable ICU 3.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+    /**
+     * Return the ISO currency code of this object.
+     * @stable ICU 3.0
+     */
+    inline const UChar* getISOCurrency() const;
+
+ private:
+    /**
+     * The ISO 4217 code of this object.
+     */
+    UChar isoCode[4];
+};
+
+inline const UChar* CurrencyUnit::getISOCurrency() const {
+    return isoCode;
+}
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_FORMATTING
+#endif // __CURRENCYUNIT_H__

Deleted: MacRuby/trunk/icu-1060/unicode/datefmt.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/datefmt.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/datefmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,700 +0,0 @@
-/*
- ********************************************************************************
- *   Copyright (C) 1997-2008, International Business Machines
- *   Corporation and others.  All Rights Reserved.
- ********************************************************************************
- *
- * File DATEFMT.H
- *
- * Modification History:
- *
- *   Date        Name        Description
- *   02/19/97    aliu        Converted from java.
- *   04/01/97    aliu        Added support for centuries.
- *   07/23/98    stephen     JDK 1.2 sync
- *   11/15/99    weiv        Added support for week of year/day of week formatting
- ********************************************************************************
- */
-
-#ifndef DATEFMT_H
-#define DATEFMT_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/udat.h"
-#include "unicode/calendar.h"
-#include "unicode/numfmt.h"
-#include "unicode/format.h"
-#include "unicode/locid.h"
-
-/**
- * \file 
- * \brief C++ API: Abstract class for converting dates.
- */
-
-U_NAMESPACE_BEGIN
-
-class TimeZone;
-
-/**
- * DateFormat is an abstract class for a family of classes that convert dates and
- * times from their internal representations to textual form and back again in a
- * language-independent manner. Converting from the internal representation (milliseconds
- * since midnight, January 1, 1970) to text is known as "formatting," and converting
- * from text to millis is known as "parsing."  We currently define only one concrete
- * subclass of DateFormat: SimpleDateFormat, which can handle pretty much all normal
- * date formatting and parsing actions.
- * <P>
- * DateFormat helps you to format and parse dates for any locale. Your code can
- * be completely independent of the locale conventions for months, days of the
- * week, or even the calendar format: lunar vs. solar.
- * <P>
- * To format a date for the current Locale, use one of the static factory
- * methods:
- * <pre>
- * \code
- *      DateFormat* dfmt = DateFormat::createDateInstance();
- *      UDate myDate = Calendar::getNow();
- *      UnicodeString myString;
- *      myString = dfmt->format( myDate, myString );
- * \endcode
- * </pre>
- * If you are formatting multiple numbers, it is more efficient to get the
- * format and use it multiple times so that the system doesn't have to fetch the
- * information about the local language and country conventions multiple times.
- * <pre>
- * \code
- *      DateFormat* df = DateFormat::createDateInstance();
- *      UnicodeString myString;
- *      UDate myDateArr[] = { 0.0, 100000000.0, 2000000000.0 }; // test values
- *      for (int32_t i = 0; i < 3; ++i) {
- *          myString.remove();
- *          cout << df->format( myDateArr[i], myString ) << endl;
- *      }
- * \endcode
- * </pre>
- * To get specific fields of a date, you can use UFieldPosition to
- * get specific fields.
- * <pre>
- * \code
- *      DateFormat* dfmt = DateFormat::createDateInstance();
- *      FieldPosition pos(DateFormat::YEAR_FIELD);
- *      UnicodeString myString;
- *      myString = dfmt->format( myDate, myString );
- *      cout << myString << endl;
- *      cout << pos.getBeginIndex() << "," << pos. getEndIndex() << endl;
- * \endcode
- * </pre>
- * To format a date for a different Locale, specify it in the call to
- * createDateInstance().
- * <pre>
- * \code
- *       DateFormat* df =
- *           DateFormat::createDateInstance( DateFormat::SHORT, Locale::getFrance());
- * \endcode
- * </pre>
- * You can use a DateFormat to parse also.
- * <pre>
- * \code
- *       UErrorCode status = U_ZERO_ERROR;
- *       UDate myDate = df->parse(myString, status);
- * \endcode
- * </pre>
- * Use createDateInstance() to produce the normal date format for that country.
- * There are other static factory methods available. Use createTimeInstance()
- * to produce the normal time format for that country. Use createDateTimeInstance()
- * to produce a DateFormat that formats both date and time. You can pass in
- * different options to these factory methods to control the length of the
- * result; from SHORT to MEDIUM to LONG to FULL. The exact result depends on the
- * locale, but generally:
- * <ul type=round>
- *   <li>   SHORT is completely numeric, such as 12/13/52 or 3:30pm
- *   <li>   MEDIUM is longer, such as Jan 12, 1952
- *   <li>   LONG is longer, such as January 12, 1952 or 3:30:32pm
- *   <li>   FULL is pretty completely specified, such as
- *          Tuesday, April 12, 1952 AD or 3:30:42pm PST.
- * </ul>
- * You can also set the time zone on the format if you wish. If you want even
- * more control over the format or parsing, (or want to give your users more
- * control), you can try casting the DateFormat you get from the factory methods
- * to a SimpleDateFormat. This will work for the majority of countries; just
- * remember to chck getDynamicClassID() before carrying out the cast.
- * <P>
- * You can also use forms of the parse and format methods with ParsePosition and
- * FieldPosition to allow you to
- * <ul type=round>
- *   <li>   Progressively parse through pieces of a string.
- *   <li>   Align any particular field, or find out where it is for selection
- *          on the screen.
- * </ul>
- *
- * <p><em>User subclasses are not supported.</em> While clients may write
- * subclasses, such code will not necessarily work and will not be
- * guaranteed to work stably from release to release.
- */
-class U_I18N_API DateFormat : public Format {
-public:
-
-    /**
-     * Constants for various style patterns. These reflect the order of items in
-     * the DateTimePatterns resource. There are 4 time patterns, 4 date patterns,
-     * and then the date-time pattern. Each block of 4 values in the resource occurs
-     * in the order full, long, medium, short.
-     * @stable ICU 2.4
-     */
-    enum EStyle
-    {
-        kNone   = -1,
-
-        kFull   = 0,
-        kLong   = 1,
-        kMedium = 2,
-        kShort  = 3,
-
-        kDateOffset   = kShort + 1,
-     // kFull   + kDateOffset = 4
-     // kLong   + kDateOffset = 5
-     // kMedium + kDateOffset = 6
-     // kShort  + kDateOffset = 7
-
-        kDateTime             = 8,
-        
-
-        // relative dates
-        kRelative = (1 << 7),
-        
-        kFullRelative = (kFull | kRelative),
-            
-        kLongRelative = kLong | kRelative,
-        
-        kMediumRelative = kMedium | kRelative,
-        
-        kShortRelative = kShort | kRelative,
-        
-
-        kDefault      = kMedium,
-
-
-
-    /**
-     * These constants are provided for backwards compatibility only.
-     * Please use the C++ style constants defined above.
-     */
-        FULL        = kFull,
-        LONG        = kLong,
-        MEDIUM        = kMedium,
-        SHORT        = kShort,
-        DEFAULT        = kDefault,
-        DATE_OFFSET    = kDateOffset,
-        NONE        = kNone,
-        DATE_TIME    = kDateTime
-    };
-
-    /**
-     * Destructor.
-     * @stable ICU 2.0
-     */
-    virtual ~DateFormat();
-
-    /**
-     * Equality operator.  Returns true if the two formats have the same behavior.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const Format&) const;
-
-    /**
-     * Format an object to produce a string. This method handles Formattable
-     * objects with a UDate type. If a the Formattable object type is not a Date,
-     * then it returns a failing UErrorCode.
-     *
-     * @param obj       The object to format. Must be a Date.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @param status    Output param filled with success/failure status.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& format(const Formattable& obj,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos,
-                                  UErrorCode& status) const;
-
-    /**
-     * Formats a date into a date/time string. This is an abstract method which
-     * concrete subclasses must implement.
-     * <P>
-     * On input, the FieldPosition parameter may have its "field" member filled with
-     * an enum value specifying a field.  On output, the FieldPosition will be filled
-     * in with the text offsets for that field.
-     * <P> For example, given a time text
-     * "1996.07.10 AD at 15:08:56 PDT", if the given fieldPosition.field is
-     * UDAT_YEAR_FIELD, the offsets fieldPosition.beginIndex and
-     * statfieldPositionus.getEndIndex will be set to 0 and 4, respectively.
-     * <P> Notice
-     * that if the same time field appears more than once in a pattern, the status will
-     * be set for the first occurence of that time field. For instance,
-     * formatting a UDate to the time string "1 PM PDT (Pacific Daylight Time)"
-     * using the pattern "h a z (zzzz)" and the alignment field
-     * DateFormat::TIMEZONE_FIELD, the offsets fieldPosition.beginIndex and
-     * fieldPosition.getEndIndex will be set to 5 and 8, respectively, for the first
-     * occurence of the timezone pattern character 'z'.
-     *
-     * @param cal           Calendar set to the date and time to be formatted
-     *                      into a date/time string.
-     * @param appendTo      Output parameter to receive result.
-     *                      Result is appended to existing contents.
-     * @param fieldPosition On input: an alignment field, if desired (see examples above)
-     *                      On output: the offsets of the alignment field (see examples above)
-     * @return              Reference to 'appendTo' parameter.
-     * @stable ICU 2.1
-     */
-    virtual UnicodeString& format(  Calendar& cal,
-                                    UnicodeString& appendTo,
-                                    FieldPosition& fieldPosition) const = 0;
-
-    /**
-     * Formats a UDate into a date/time string.
-     * <P>
-     * On input, the FieldPosition parameter may have its "field" member filled with
-     * an enum value specifying a field.  On output, the FieldPosition will be filled
-     * in with the text offsets for that field.
-     * <P> For example, given a time text
-     * "1996.07.10 AD at 15:08:56 PDT", if the given fieldPosition.field is
-     * UDAT_YEAR_FIELD, the offsets fieldPosition.beginIndex and
-     * statfieldPositionus.getEndIndex will be set to 0 and 4, respectively.
-     * <P> Notice
-     * that if the same time field appears more than once in a pattern, the status will
-     * be set for the first occurence of that time field. For instance,
-     * formatting a UDate to the time string "1 PM PDT (Pacific Daylight Time)"
-     * using the pattern "h a z (zzzz)" and the alignment field
-     * DateFormat::TIMEZONE_FIELD, the offsets fieldPosition.beginIndex and
-     * fieldPosition.getEndIndex will be set to 5 and 8, respectively, for the first
-     * occurence of the timezone pattern character 'z'.
-     *
-     * @param date          UDate to be formatted into a date/time string.
-     * @param appendTo      Output parameter to receive result.
-     *                      Result is appended to existing contents.
-     * @param fieldPosition On input: an alignment field, if desired (see examples above)
-     *                      On output: the offsets of the alignment field (see examples above)
-     * @return              Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(  UDate date,
-                            UnicodeString& appendTo,
-                            FieldPosition& fieldPosition) const;
-
-    /**
-     * Formats a UDate into a date/time string. If there is a problem, you won't
-     * know, using this method. Use the overloaded format() method which takes a
-     * FieldPosition& to detect formatting problems.
-     *
-     * @param date      The UDate value to be formatted into a string.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(UDate date, UnicodeString& appendTo) const;
-
-    /**
-     * Redeclared Format method.
-     *
-     * @param obj       The object to be formatted into a string.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param status    Output param filled with success/failure status.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(const Formattable& obj,
-                          UnicodeString& appendTo,
-                          UErrorCode& status) const;
-
-    /**
-     * Parse a date/time string.
-     *
-     * @param text      The string to be parsed into a UDate value.
-     * @param status    Output param to be set to success/failure code. If
-     *                  'text' cannot be parsed, it will be set to a failure
-     *                  code.
-     * @result          The parsed UDate value, if successful.
-     * @stable ICU 2.0
-     */
-    virtual UDate parse( const UnicodeString& text,
-                        UErrorCode& status) const;
-
-    /**
-     * Parse a date/time string beginning at the given parse position. For
-     * example, a time text "07/10/96 4:5 PM, PDT" will be parsed into a Date
-     * that is equivalent to Date(837039928046).
-     * <P>
-     * By default, parsing is lenient: If the input is not in the form used by
-     * this object's format method but can still be parsed as a date, then the
-     * parse succeeds. Clients may insist on strict adherence to the format by
-     * calling setLenient(false).
-     *
-     * @see DateFormat::setLenient(boolean)
-     *
-     * @param text  The date/time string to be parsed
-     * @param cal   a Calendar set to the date and time to be formatted
-     *              into a date/time string.
-     * @param pos   On input, the position at which to start parsing; on
-     *              output, the position at which parsing terminated, or the
-     *              start position if the parse failed.
-     * @return      A valid UDate if the input could be parsed.
-     * @stable ICU 2.1
-     */
-    virtual void parse( const UnicodeString& text,
-                        Calendar& cal,
-                        ParsePosition& pos) const = 0;
-
-    /**
-     * Parse a date/time string beginning at the given parse position. For
-     * example, a time text "07/10/96 4:5 PM, PDT" will be parsed into a Date
-     * that is equivalent to Date(837039928046).
-     * <P>
-     * By default, parsing is lenient: If the input is not in the form used by
-     * this object's format method but can still be parsed as a date, then the
-     * parse succeeds. Clients may insist on strict adherence to the format by
-     * calling setLenient(false).
-     *
-     * @see DateFormat::setLenient(boolean)
-     *
-     * @param text  The date/time string to be parsed
-     * @param pos   On input, the position at which to start parsing; on
-     *              output, the position at which parsing terminated, or the
-     *              start position if the parse failed.
-     * @return      A valid UDate if the input could be parsed.
-     * @stable ICU 2.0
-     */
-    UDate parse( const UnicodeString& text,
-                 ParsePosition& pos) const;
-
-    /**
-     * Parse a string to produce an object. This methods handles parsing of
-     * date/time strings into Formattable objects with UDate types.
-     * <P>
-     * Before calling, set parse_pos.index to the offset you want to start
-     * parsing at in the source. After calling, parse_pos.index is the end of
-     * the text you parsed. If error occurs, index is unchanged.
-     * <P>
-     * When parsing, leading whitespace is discarded (with a successful parse),
-     * while trailing whitespace is left as is.
-     * <P>
-     * See Format::parseObject() for more.
-     *
-     * @param source    The string to be parsed into an object.
-     * @param result    Formattable to be set to the parse result.
-     *                  If parse fails, return contents are undefined.
-     * @param parse_pos The position to start parsing at. Upon return
-     *                  this param is set to the position after the
-     *                  last character successfully parsed. If the
-     *                  source is not parsed successfully, this param
-     *                  will remain unchanged.
-     * @return          A newly created Formattable* object, or NULL
-     *                  on failure.  The caller owns this and should
-     *                  delete it when done.
-     * @stable ICU 2.0
-     */
-    virtual void parseObject(const UnicodeString& source,
-                             Formattable& result,
-                             ParsePosition& parse_pos) const;
-
-    /**
-     * Create a default date/time formatter that uses the SHORT style for both
-     * the date and the time.
-     *
-     * @return A date/time formatter which the caller owns.
-     * @stable ICU 2.0
-     */
-    static DateFormat* U_EXPORT2 createInstance(void);
-
-    /**
-     * This is for ICU internal use only. Please do not use.
-     * Create a date/time formatter from skeleton and a given locale.
-     *
-     * Users are encouraged to use the skeleton macros defined in udat.h.
-     * For example, MONTH_WEEKDAY_DAY, which is "MMMMEEEEd",
-     * and which means the pattern should have day, month, and day-of-week 
-     * fields, and follow the long date format defined in date time pattern.
-     * For example, for English, the full pattern should be 
-     * "EEEE, MMMM d".
-     * 
-     * Temporarily, this is an internal API, used by DateIntevalFormat only.
-     * There will be a new set of APIs for the same purpose coming soon.
-     * After which, this API will be replaced.
-     *
-     * @param skeleton  the skeleton on which date format based.
-     * @param locale    the given locale.
-     * @param status    Output param to be set to success/failure code.
-     *                  If it is failure, the returned date formatter will
-     *                  be NULL.
-     * @return          a simple date formatter which the caller owns.
-     * @internal ICU 4.0
-     */
-    static DateFormat* U_EXPORT2 createPatternInstance(
-                                                const UnicodeString& skeleton,
-                                                const Locale& locale,
-                                                UErrorCode& status);
-
-    /**
-     * Creates a time formatter with the given formatting style for the given
-     * locale.
-     *
-     * @param style     The given formatting style. For example,
-     *                  SHORT for "h:mm a" in the US locale.
-     * @param aLocale   The given locale.
-     * @return          A time formatter which the caller owns.
-     * @stable ICU 2.0
-     */
-    static DateFormat* U_EXPORT2 createTimeInstance(EStyle style = kDefault,
-                                          const Locale& aLocale = Locale::getDefault());
-
-    /**
-     * Creates a date formatter with the given formatting style for the given
-     * const locale.
-     *
-     * @param style     The given formatting style. For example,
-     *                  SHORT for "M/d/yy" in the US locale.
-     * @param aLocale   The given locale.
-     * @return          A date formatter which the caller owns.
-     * @stable ICU 2.0
-     */
-    static DateFormat* U_EXPORT2 createDateInstance(EStyle style = kDefault,
-                                          const Locale& aLocale = Locale::getDefault());
-
-    /**
-     * Creates a date/time formatter with the given formatting styles for the
-     * given locale.
-     *
-     * @param dateStyle The given formatting style for the date portion of the result.
-     *                  For example, SHORT for "M/d/yy" in the US locale.
-     * @param timeStyle The given formatting style for the time portion of the result.
-     *                  For example, SHORT for "h:mm a" in the US locale.
-     * @param aLocale   The given locale.
-     * @return          A date/time formatter which the caller owns.
-     * @stable ICU 2.0
-     */
-    static DateFormat* U_EXPORT2 createDateTimeInstance(EStyle dateStyle = kDefault,
-                                              EStyle timeStyle = kDefault,
-                                              const Locale& aLocale = Locale::getDefault());
-
-    /**
-     * Gets the set of locales for which DateFormats are installed.
-     * @param count Filled in with the number of locales in the list that is returned.
-     * @return the set of locales for which DateFormats are installed.  The caller
-     *  does NOT own this list and must not delete it.
-     * @stable ICU 2.0
-     */
-    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
-
-    /**
-     * Returns true if the formatter is set for lenient parsing.
-     * @stable ICU 2.0
-     */
-    virtual UBool isLenient(void) const;
-
-    /**
-     * Specify whether or not date/time parsing is to be lenient. With lenient
-     * parsing, the parser may use heuristics to interpret inputs that do not
-     * precisely match this object's format. With strict parsing, inputs must
-     * match this object's format.
-     *
-     * @param lenient  True specifies date/time interpretation to be lenient.
-     * @see Calendar::setLenient
-     * @stable ICU 2.0
-     */
-    virtual void setLenient(UBool lenient);
-
-    /**
-     * Gets the calendar associated with this date/time formatter.
-     * @return the calendar associated with this date/time formatter.
-     * @stable ICU 2.0
-     */
-    virtual const Calendar* getCalendar(void) const;
-
-    /**
-     * Set the calendar to be used by this date format. Initially, the default
-     * calendar for the specified or default locale is used.  The caller should
-     * not delete the Calendar object after it is adopted by this call.
-     * Adopting a new calendar will change to the default symbols.
-     *
-     * @param calendarToAdopt    Calendar object to be adopted.
-     * @stable ICU 2.0
-     */
-    virtual void adoptCalendar(Calendar* calendarToAdopt);
-
-    /**
-     * Set the calendar to be used by this date format. Initially, the default
-     * calendar for the specified or default locale is used.
-     *
-     * @param newCalendar Calendar object to be set.
-     * @stable ICU 2.0
-     */
-    virtual void setCalendar(const Calendar& newCalendar);
-
-
-    /**
-     * Gets the number formatter which this date/time formatter uses to format
-     * and parse the numeric portions of the pattern.
-     * @return the number formatter which this date/time formatter uses.
-     * @stable ICU 2.0
-     */
-    virtual const NumberFormat* getNumberFormat(void) const;
-
-    /**
-     * Allows you to set the number formatter.  The caller should
-     * not delete the NumberFormat object after it is adopted by this call.
-     * @param formatToAdopt     NumberFormat object to be adopted.
-     * @stable ICU 2.0
-     */
-    virtual void adoptNumberFormat(NumberFormat* formatToAdopt);
-
-    /**
-     * Allows you to set the number formatter.
-     * @param newNumberFormat  NumberFormat object to be set.
-     * @stable ICU 2.0
-     */
-    virtual void setNumberFormat(const NumberFormat& newNumberFormat);
-
-    /**
-     * Returns a reference to the TimeZone used by this DateFormat's calendar.
-     * @return the time zone associated with the calendar of DateFormat.
-     * @stable ICU 2.0
-     */
-    virtual const TimeZone& getTimeZone(void) const;
-
-    /**
-     * Sets the time zone for the calendar of this DateFormat object. The caller
-     * no longer owns the TimeZone object and should not delete it after this call.
-     * @param zoneToAdopt the TimeZone to be adopted.
-     * @stable ICU 2.0
-     */
-    virtual void adoptTimeZone(TimeZone* zoneToAdopt);
-
-    /**
-     * Sets the time zone for the calendar of this DateFormat object.
-     * @param zone the new time zone.
-     * @stable ICU 2.0
-     */
-    virtual void setTimeZone(const TimeZone& zone);
-
-protected:
-    /**
-     * Default constructor.  Creates a DateFormat with no Calendar or NumberFormat
-     * associated with it.  This constructor depends on the subclasses to fill in
-     * the calendar and numberFormat fields.
-     * @stable ICU 2.0
-     */
-    DateFormat();
-
-    /**
-     * Copy constructor.
-     * @stable ICU 2.0
-     */
-    DateFormat(const DateFormat&);
-
-    /**
-     * Default assignment operator.
-     * @stable ICU 2.0
-     */
-    DateFormat& operator=(const DateFormat&);
-
-    /**
-     * The calendar that DateFormat uses to produce the time field values needed
-     * to implement date/time formatting. Subclasses should generally initialize
-     * this to the default calendar for the locale associated with this DateFormat.
-     * @stable ICU 2.4
-     */
-    Calendar* fCalendar;
-
-    /**
-     * The number formatter that DateFormat uses to format numbers in dates and
-     * times. Subclasses should generally initialize this to the default number
-     * format for the locale associated with this DateFormat.
-     * @stable ICU 2.4
-     */
-    NumberFormat* fNumberFormat;
-
-private:
-    /**
-     * Gets the date/time formatter with the given formatting styles for the
-     * given locale.
-     * @param dateStyle the given date formatting style.
-     * @param timeStyle the given time formatting style.
-     * @param inLocale the given locale.
-     * @return a date/time formatter, or 0 on failure.
-     */
-    static DateFormat* U_EXPORT2 create(EStyle timeStyle, EStyle dateStyle, const Locale&);
-
-public:
-    /**
-     * Field selector for FieldPosition for DateFormat fields.
-     * @obsolete ICU 3.4 use UDateFormatField instead, since this API will be
-     * removed in that release
-     */
-    enum EField
-    {
-        // Obsolete; use UDateFormatField instead
-        kEraField = UDAT_ERA_FIELD,
-        kYearField = UDAT_YEAR_FIELD,
-        kMonthField = UDAT_MONTH_FIELD,
-        kDateField = UDAT_DATE_FIELD,
-        kHourOfDay1Field = UDAT_HOUR_OF_DAY1_FIELD,
-        kHourOfDay0Field = UDAT_HOUR_OF_DAY0_FIELD,
-        kMinuteField = UDAT_MINUTE_FIELD,
-        kSecondField = UDAT_SECOND_FIELD,
-        kMillisecondField = UDAT_FRACTIONAL_SECOND_FIELD,
-        kDayOfWeekField = UDAT_DAY_OF_WEEK_FIELD,
-        kDayOfYearField = UDAT_DAY_OF_YEAR_FIELD,
-        kDayOfWeekInMonthField = UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
-        kWeekOfYearField = UDAT_WEEK_OF_YEAR_FIELD,
-        kWeekOfMonthField = UDAT_WEEK_OF_MONTH_FIELD,
-        kAmPmField = UDAT_AM_PM_FIELD,
-        kHour1Field = UDAT_HOUR1_FIELD,
-        kHour0Field = UDAT_HOUR0_FIELD,
-        kTimezoneField = UDAT_TIMEZONE_FIELD,
-        kYearWOYField = UDAT_YEAR_WOY_FIELD,
-        kDOWLocalField = UDAT_DOW_LOCAL_FIELD,
-        kExtendedYearField = UDAT_EXTENDED_YEAR_FIELD,
-        kJulianDayField = UDAT_JULIAN_DAY_FIELD,
-        kMillisecondsInDayField = UDAT_MILLISECONDS_IN_DAY_FIELD,
-
-        // Obsolete; use UDateFormatField instead
-        ERA_FIELD = UDAT_ERA_FIELD,
-        YEAR_FIELD = UDAT_YEAR_FIELD,
-        MONTH_FIELD = UDAT_MONTH_FIELD,
-        DATE_FIELD = UDAT_DATE_FIELD,
-        HOUR_OF_DAY1_FIELD = UDAT_HOUR_OF_DAY1_FIELD,
-        HOUR_OF_DAY0_FIELD = UDAT_HOUR_OF_DAY0_FIELD,
-        MINUTE_FIELD = UDAT_MINUTE_FIELD,
-        SECOND_FIELD = UDAT_SECOND_FIELD,
-        MILLISECOND_FIELD = UDAT_FRACTIONAL_SECOND_FIELD,
-        DAY_OF_WEEK_FIELD = UDAT_DAY_OF_WEEK_FIELD,
-        DAY_OF_YEAR_FIELD = UDAT_DAY_OF_YEAR_FIELD,
-        DAY_OF_WEEK_IN_MONTH_FIELD = UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
-        WEEK_OF_YEAR_FIELD = UDAT_WEEK_OF_YEAR_FIELD,
-        WEEK_OF_MONTH_FIELD = UDAT_WEEK_OF_MONTH_FIELD,
-        AM_PM_FIELD = UDAT_AM_PM_FIELD,
-        HOUR1_FIELD = UDAT_HOUR1_FIELD,
-        HOUR0_FIELD = UDAT_HOUR0_FIELD,
-        TIMEZONE_FIELD = UDAT_TIMEZONE_FIELD
-    };
-};
-
-inline UnicodeString&
-DateFormat::format(const Formattable& obj,
-                   UnicodeString& appendTo,
-                   UErrorCode& status) const {
-    return Format::format(obj, appendTo, status);
-}
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _DATEFMT
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/datefmt.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/datefmt.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/datefmt.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/datefmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,700 @@
+/*
+ ********************************************************************************
+ *   Copyright (C) 1997-2008, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ ********************************************************************************
+ *
+ * File DATEFMT.H
+ *
+ * Modification History:
+ *
+ *   Date        Name        Description
+ *   02/19/97    aliu        Converted from java.
+ *   04/01/97    aliu        Added support for centuries.
+ *   07/23/98    stephen     JDK 1.2 sync
+ *   11/15/99    weiv        Added support for week of year/day of week formatting
+ ********************************************************************************
+ */
+
+#ifndef DATEFMT_H
+#define DATEFMT_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/udat.h"
+#include "unicode/calendar.h"
+#include "unicode/numfmt.h"
+#include "unicode/format.h"
+#include "unicode/locid.h"
+
+/**
+ * \file 
+ * \brief C++ API: Abstract class for converting dates.
+ */
+
+U_NAMESPACE_BEGIN
+
+class TimeZone;
+
+/**
+ * DateFormat is an abstract class for a family of classes that convert dates and
+ * times from their internal representations to textual form and back again in a
+ * language-independent manner. Converting from the internal representation (milliseconds
+ * since midnight, January 1, 1970) to text is known as "formatting," and converting
+ * from text to millis is known as "parsing."  We currently define only one concrete
+ * subclass of DateFormat: SimpleDateFormat, which can handle pretty much all normal
+ * date formatting and parsing actions.
+ * <P>
+ * DateFormat helps you to format and parse dates for any locale. Your code can
+ * be completely independent of the locale conventions for months, days of the
+ * week, or even the calendar format: lunar vs. solar.
+ * <P>
+ * To format a date for the current Locale, use one of the static factory
+ * methods:
+ * <pre>
+ * \code
+ *      DateFormat* dfmt = DateFormat::createDateInstance();
+ *      UDate myDate = Calendar::getNow();
+ *      UnicodeString myString;
+ *      myString = dfmt->format( myDate, myString );
+ * \endcode
+ * </pre>
+ * If you are formatting multiple numbers, it is more efficient to get the
+ * format and use it multiple times so that the system doesn't have to fetch the
+ * information about the local language and country conventions multiple times.
+ * <pre>
+ * \code
+ *      DateFormat* df = DateFormat::createDateInstance();
+ *      UnicodeString myString;
+ *      UDate myDateArr[] = { 0.0, 100000000.0, 2000000000.0 }; // test values
+ *      for (int32_t i = 0; i < 3; ++i) {
+ *          myString.remove();
+ *          cout << df->format( myDateArr[i], myString ) << endl;
+ *      }
+ * \endcode
+ * </pre>
+ * To get specific fields of a date, you can use UFieldPosition to
+ * get specific fields.
+ * <pre>
+ * \code
+ *      DateFormat* dfmt = DateFormat::createDateInstance();
+ *      FieldPosition pos(DateFormat::YEAR_FIELD);
+ *      UnicodeString myString;
+ *      myString = dfmt->format( myDate, myString );
+ *      cout << myString << endl;
+ *      cout << pos.getBeginIndex() << "," << pos. getEndIndex() << endl;
+ * \endcode
+ * </pre>
+ * To format a date for a different Locale, specify it in the call to
+ * createDateInstance().
+ * <pre>
+ * \code
+ *       DateFormat* df =
+ *           DateFormat::createDateInstance( DateFormat::SHORT, Locale::getFrance());
+ * \endcode
+ * </pre>
+ * You can use a DateFormat to parse also.
+ * <pre>
+ * \code
+ *       UErrorCode status = U_ZERO_ERROR;
+ *       UDate myDate = df->parse(myString, status);
+ * \endcode
+ * </pre>
+ * Use createDateInstance() to produce the normal date format for that country.
+ * There are other static factory methods available. Use createTimeInstance()
+ * to produce the normal time format for that country. Use createDateTimeInstance()
+ * to produce a DateFormat that formats both date and time. You can pass in
+ * different options to these factory methods to control the length of the
+ * result; from SHORT to MEDIUM to LONG to FULL. The exact result depends on the
+ * locale, but generally:
+ * <ul type=round>
+ *   <li>   SHORT is completely numeric, such as 12/13/52 or 3:30pm
+ *   <li>   MEDIUM is longer, such as Jan 12, 1952
+ *   <li>   LONG is longer, such as January 12, 1952 or 3:30:32pm
+ *   <li>   FULL is pretty completely specified, such as
+ *          Tuesday, April 12, 1952 AD or 3:30:42pm PST.
+ * </ul>
+ * You can also set the time zone on the format if you wish. If you want even
+ * more control over the format or parsing, (or want to give your users more
+ * control), you can try casting the DateFormat you get from the factory methods
+ * to a SimpleDateFormat. This will work for the majority of countries; just
+ * remember to chck getDynamicClassID() before carrying out the cast.
+ * <P>
+ * You can also use forms of the parse and format methods with ParsePosition and
+ * FieldPosition to allow you to
+ * <ul type=round>
+ *   <li>   Progressively parse through pieces of a string.
+ *   <li>   Align any particular field, or find out where it is for selection
+ *          on the screen.
+ * </ul>
+ *
+ * <p><em>User subclasses are not supported.</em> While clients may write
+ * subclasses, such code will not necessarily work and will not be
+ * guaranteed to work stably from release to release.
+ */
+class U_I18N_API DateFormat : public Format {
+public:
+
+    /**
+     * Constants for various style patterns. These reflect the order of items in
+     * the DateTimePatterns resource. There are 4 time patterns, 4 date patterns,
+     * and then the date-time pattern. Each block of 4 values in the resource occurs
+     * in the order full, long, medium, short.
+     * @stable ICU 2.4
+     */
+    enum EStyle
+    {
+        kNone   = -1,
+
+        kFull   = 0,
+        kLong   = 1,
+        kMedium = 2,
+        kShort  = 3,
+
+        kDateOffset   = kShort + 1,
+     // kFull   + kDateOffset = 4
+     // kLong   + kDateOffset = 5
+     // kMedium + kDateOffset = 6
+     // kShort  + kDateOffset = 7
+
+        kDateTime             = 8,
+        
+
+        // relative dates
+        kRelative = (1 << 7),
+        
+        kFullRelative = (kFull | kRelative),
+            
+        kLongRelative = kLong | kRelative,
+        
+        kMediumRelative = kMedium | kRelative,
+        
+        kShortRelative = kShort | kRelative,
+        
+
+        kDefault      = kMedium,
+
+
+
+    /**
+     * These constants are provided for backwards compatibility only.
+     * Please use the C++ style constants defined above.
+     */
+        FULL        = kFull,
+        LONG        = kLong,
+        MEDIUM        = kMedium,
+        SHORT        = kShort,
+        DEFAULT        = kDefault,
+        DATE_OFFSET    = kDateOffset,
+        NONE        = kNone,
+        DATE_TIME    = kDateTime
+    };
+
+    /**
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~DateFormat();
+
+    /**
+     * Equality operator.  Returns true if the two formats have the same behavior.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const Format&) const;
+
+    /**
+     * Format an object to produce a string. This method handles Formattable
+     * objects with a UDate type. If a the Formattable object type is not a Date,
+     * then it returns a failing UErrorCode.
+     *
+     * @param obj       The object to format. Must be a Date.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @param status    Output param filled with success/failure status.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& format(const Formattable& obj,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos,
+                                  UErrorCode& status) const;
+
+    /**
+     * Formats a date into a date/time string. This is an abstract method which
+     * concrete subclasses must implement.
+     * <P>
+     * On input, the FieldPosition parameter may have its "field" member filled with
+     * an enum value specifying a field.  On output, the FieldPosition will be filled
+     * in with the text offsets for that field.
+     * <P> For example, given a time text
+     * "1996.07.10 AD at 15:08:56 PDT", if the given fieldPosition.field is
+     * UDAT_YEAR_FIELD, the offsets fieldPosition.beginIndex and
+     * statfieldPositionus.getEndIndex will be set to 0 and 4, respectively.
+     * <P> Notice
+     * that if the same time field appears more than once in a pattern, the status will
+     * be set for the first occurence of that time field. For instance,
+     * formatting a UDate to the time string "1 PM PDT (Pacific Daylight Time)"
+     * using the pattern "h a z (zzzz)" and the alignment field
+     * DateFormat::TIMEZONE_FIELD, the offsets fieldPosition.beginIndex and
+     * fieldPosition.getEndIndex will be set to 5 and 8, respectively, for the first
+     * occurence of the timezone pattern character 'z'.
+     *
+     * @param cal           Calendar set to the date and time to be formatted
+     *                      into a date/time string.
+     * @param appendTo      Output parameter to receive result.
+     *                      Result is appended to existing contents.
+     * @param fieldPosition On input: an alignment field, if desired (see examples above)
+     *                      On output: the offsets of the alignment field (see examples above)
+     * @return              Reference to 'appendTo' parameter.
+     * @stable ICU 2.1
+     */
+    virtual UnicodeString& format(  Calendar& cal,
+                                    UnicodeString& appendTo,
+                                    FieldPosition& fieldPosition) const = 0;
+
+    /**
+     * Formats a UDate into a date/time string.
+     * <P>
+     * On input, the FieldPosition parameter may have its "field" member filled with
+     * an enum value specifying a field.  On output, the FieldPosition will be filled
+     * in with the text offsets for that field.
+     * <P> For example, given a time text
+     * "1996.07.10 AD at 15:08:56 PDT", if the given fieldPosition.field is
+     * UDAT_YEAR_FIELD, the offsets fieldPosition.beginIndex and
+     * statfieldPositionus.getEndIndex will be set to 0 and 4, respectively.
+     * <P> Notice
+     * that if the same time field appears more than once in a pattern, the status will
+     * be set for the first occurence of that time field. For instance,
+     * formatting a UDate to the time string "1 PM PDT (Pacific Daylight Time)"
+     * using the pattern "h a z (zzzz)" and the alignment field
+     * DateFormat::TIMEZONE_FIELD, the offsets fieldPosition.beginIndex and
+     * fieldPosition.getEndIndex will be set to 5 and 8, respectively, for the first
+     * occurence of the timezone pattern character 'z'.
+     *
+     * @param date          UDate to be formatted into a date/time string.
+     * @param appendTo      Output parameter to receive result.
+     *                      Result is appended to existing contents.
+     * @param fieldPosition On input: an alignment field, if desired (see examples above)
+     *                      On output: the offsets of the alignment field (see examples above)
+     * @return              Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(  UDate date,
+                            UnicodeString& appendTo,
+                            FieldPosition& fieldPosition) const;
+
+    /**
+     * Formats a UDate into a date/time string. If there is a problem, you won't
+     * know, using this method. Use the overloaded format() method which takes a
+     * FieldPosition& to detect formatting problems.
+     *
+     * @param date      The UDate value to be formatted into a string.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(UDate date, UnicodeString& appendTo) const;
+
+    /**
+     * Redeclared Format method.
+     *
+     * @param obj       The object to be formatted into a string.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param status    Output param filled with success/failure status.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(const Formattable& obj,
+                          UnicodeString& appendTo,
+                          UErrorCode& status) const;
+
+    /**
+     * Parse a date/time string.
+     *
+     * @param text      The string to be parsed into a UDate value.
+     * @param status    Output param to be set to success/failure code. If
+     *                  'text' cannot be parsed, it will be set to a failure
+     *                  code.
+     * @result          The parsed UDate value, if successful.
+     * @stable ICU 2.0
+     */
+    virtual UDate parse( const UnicodeString& text,
+                        UErrorCode& status) const;
+
+    /**
+     * Parse a date/time string beginning at the given parse position. For
+     * example, a time text "07/10/96 4:5 PM, PDT" will be parsed into a Date
+     * that is equivalent to Date(837039928046).
+     * <P>
+     * By default, parsing is lenient: If the input is not in the form used by
+     * this object's format method but can still be parsed as a date, then the
+     * parse succeeds. Clients may insist on strict adherence to the format by
+     * calling setLenient(false).
+     *
+     * @see DateFormat::setLenient(boolean)
+     *
+     * @param text  The date/time string to be parsed
+     * @param cal   a Calendar set to the date and time to be formatted
+     *              into a date/time string.
+     * @param pos   On input, the position at which to start parsing; on
+     *              output, the position at which parsing terminated, or the
+     *              start position if the parse failed.
+     * @return      A valid UDate if the input could be parsed.
+     * @stable ICU 2.1
+     */
+    virtual void parse( const UnicodeString& text,
+                        Calendar& cal,
+                        ParsePosition& pos) const = 0;
+
+    /**
+     * Parse a date/time string beginning at the given parse position. For
+     * example, a time text "07/10/96 4:5 PM, PDT" will be parsed into a Date
+     * that is equivalent to Date(837039928046).
+     * <P>
+     * By default, parsing is lenient: If the input is not in the form used by
+     * this object's format method but can still be parsed as a date, then the
+     * parse succeeds. Clients may insist on strict adherence to the format by
+     * calling setLenient(false).
+     *
+     * @see DateFormat::setLenient(boolean)
+     *
+     * @param text  The date/time string to be parsed
+     * @param pos   On input, the position at which to start parsing; on
+     *              output, the position at which parsing terminated, or the
+     *              start position if the parse failed.
+     * @return      A valid UDate if the input could be parsed.
+     * @stable ICU 2.0
+     */
+    UDate parse( const UnicodeString& text,
+                 ParsePosition& pos) const;
+
+    /**
+     * Parse a string to produce an object. This methods handles parsing of
+     * date/time strings into Formattable objects with UDate types.
+     * <P>
+     * Before calling, set parse_pos.index to the offset you want to start
+     * parsing at in the source. After calling, parse_pos.index is the end of
+     * the text you parsed. If error occurs, index is unchanged.
+     * <P>
+     * When parsing, leading whitespace is discarded (with a successful parse),
+     * while trailing whitespace is left as is.
+     * <P>
+     * See Format::parseObject() for more.
+     *
+     * @param source    The string to be parsed into an object.
+     * @param result    Formattable to be set to the parse result.
+     *                  If parse fails, return contents are undefined.
+     * @param parse_pos The position to start parsing at. Upon return
+     *                  this param is set to the position after the
+     *                  last character successfully parsed. If the
+     *                  source is not parsed successfully, this param
+     *                  will remain unchanged.
+     * @return          A newly created Formattable* object, or NULL
+     *                  on failure.  The caller owns this and should
+     *                  delete it when done.
+     * @stable ICU 2.0
+     */
+    virtual void parseObject(const UnicodeString& source,
+                             Formattable& result,
+                             ParsePosition& parse_pos) const;
+
+    /**
+     * Create a default date/time formatter that uses the SHORT style for both
+     * the date and the time.
+     *
+     * @return A date/time formatter which the caller owns.
+     * @stable ICU 2.0
+     */
+    static DateFormat* U_EXPORT2 createInstance(void);
+
+    /**
+     * This is for ICU internal use only. Please do not use.
+     * Create a date/time formatter from skeleton and a given locale.
+     *
+     * Users are encouraged to use the skeleton macros defined in udat.h.
+     * For example, MONTH_WEEKDAY_DAY, which is "MMMMEEEEd",
+     * and which means the pattern should have day, month, and day-of-week 
+     * fields, and follow the long date format defined in date time pattern.
+     * For example, for English, the full pattern should be 
+     * "EEEE, MMMM d".
+     * 
+     * Temporarily, this is an internal API, used by DateIntevalFormat only.
+     * There will be a new set of APIs for the same purpose coming soon.
+     * After which, this API will be replaced.
+     *
+     * @param skeleton  the skeleton on which date format based.
+     * @param locale    the given locale.
+     * @param status    Output param to be set to success/failure code.
+     *                  If it is failure, the returned date formatter will
+     *                  be NULL.
+     * @return          a simple date formatter which the caller owns.
+     * @internal ICU 4.0
+     */
+    static DateFormat* U_EXPORT2 createPatternInstance(
+                                                const UnicodeString& skeleton,
+                                                const Locale& locale,
+                                                UErrorCode& status);
+
+    /**
+     * Creates a time formatter with the given formatting style for the given
+     * locale.
+     *
+     * @param style     The given formatting style. For example,
+     *                  SHORT for "h:mm a" in the US locale.
+     * @param aLocale   The given locale.
+     * @return          A time formatter which the caller owns.
+     * @stable ICU 2.0
+     */
+    static DateFormat* U_EXPORT2 createTimeInstance(EStyle style = kDefault,
+                                          const Locale& aLocale = Locale::getDefault());
+
+    /**
+     * Creates a date formatter with the given formatting style for the given
+     * const locale.
+     *
+     * @param style     The given formatting style. For example,
+     *                  SHORT for "M/d/yy" in the US locale.
+     * @param aLocale   The given locale.
+     * @return          A date formatter which the caller owns.
+     * @stable ICU 2.0
+     */
+    static DateFormat* U_EXPORT2 createDateInstance(EStyle style = kDefault,
+                                          const Locale& aLocale = Locale::getDefault());
+
+    /**
+     * Creates a date/time formatter with the given formatting styles for the
+     * given locale.
+     *
+     * @param dateStyle The given formatting style for the date portion of the result.
+     *                  For example, SHORT for "M/d/yy" in the US locale.
+     * @param timeStyle The given formatting style for the time portion of the result.
+     *                  For example, SHORT for "h:mm a" in the US locale.
+     * @param aLocale   The given locale.
+     * @return          A date/time formatter which the caller owns.
+     * @stable ICU 2.0
+     */
+    static DateFormat* U_EXPORT2 createDateTimeInstance(EStyle dateStyle = kDefault,
+                                              EStyle timeStyle = kDefault,
+                                              const Locale& aLocale = Locale::getDefault());
+
+    /**
+     * Gets the set of locales for which DateFormats are installed.
+     * @param count Filled in with the number of locales in the list that is returned.
+     * @return the set of locales for which DateFormats are installed.  The caller
+     *  does NOT own this list and must not delete it.
+     * @stable ICU 2.0
+     */
+    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+
+    /**
+     * Returns true if the formatter is set for lenient parsing.
+     * @stable ICU 2.0
+     */
+    virtual UBool isLenient(void) const;
+
+    /**
+     * Specify whether or not date/time parsing is to be lenient. With lenient
+     * parsing, the parser may use heuristics to interpret inputs that do not
+     * precisely match this object's format. With strict parsing, inputs must
+     * match this object's format.
+     *
+     * @param lenient  True specifies date/time interpretation to be lenient.
+     * @see Calendar::setLenient
+     * @stable ICU 2.0
+     */
+    virtual void setLenient(UBool lenient);
+
+    /**
+     * Gets the calendar associated with this date/time formatter.
+     * @return the calendar associated with this date/time formatter.
+     * @stable ICU 2.0
+     */
+    virtual const Calendar* getCalendar(void) const;
+
+    /**
+     * Set the calendar to be used by this date format. Initially, the default
+     * calendar for the specified or default locale is used.  The caller should
+     * not delete the Calendar object after it is adopted by this call.
+     * Adopting a new calendar will change to the default symbols.
+     *
+     * @param calendarToAdopt    Calendar object to be adopted.
+     * @stable ICU 2.0
+     */
+    virtual void adoptCalendar(Calendar* calendarToAdopt);
+
+    /**
+     * Set the calendar to be used by this date format. Initially, the default
+     * calendar for the specified or default locale is used.
+     *
+     * @param newCalendar Calendar object to be set.
+     * @stable ICU 2.0
+     */
+    virtual void setCalendar(const Calendar& newCalendar);
+
+
+    /**
+     * Gets the number formatter which this date/time formatter uses to format
+     * and parse the numeric portions of the pattern.
+     * @return the number formatter which this date/time formatter uses.
+     * @stable ICU 2.0
+     */
+    virtual const NumberFormat* getNumberFormat(void) const;
+
+    /**
+     * Allows you to set the number formatter.  The caller should
+     * not delete the NumberFormat object after it is adopted by this call.
+     * @param formatToAdopt     NumberFormat object to be adopted.
+     * @stable ICU 2.0
+     */
+    virtual void adoptNumberFormat(NumberFormat* formatToAdopt);
+
+    /**
+     * Allows you to set the number formatter.
+     * @param newNumberFormat  NumberFormat object to be set.
+     * @stable ICU 2.0
+     */
+    virtual void setNumberFormat(const NumberFormat& newNumberFormat);
+
+    /**
+     * Returns a reference to the TimeZone used by this DateFormat's calendar.
+     * @return the time zone associated with the calendar of DateFormat.
+     * @stable ICU 2.0
+     */
+    virtual const TimeZone& getTimeZone(void) const;
+
+    /**
+     * Sets the time zone for the calendar of this DateFormat object. The caller
+     * no longer owns the TimeZone object and should not delete it after this call.
+     * @param zoneToAdopt the TimeZone to be adopted.
+     * @stable ICU 2.0
+     */
+    virtual void adoptTimeZone(TimeZone* zoneToAdopt);
+
+    /**
+     * Sets the time zone for the calendar of this DateFormat object.
+     * @param zone the new time zone.
+     * @stable ICU 2.0
+     */
+    virtual void setTimeZone(const TimeZone& zone);
+
+protected:
+    /**
+     * Default constructor.  Creates a DateFormat with no Calendar or NumberFormat
+     * associated with it.  This constructor depends on the subclasses to fill in
+     * the calendar and numberFormat fields.
+     * @stable ICU 2.0
+     */
+    DateFormat();
+
+    /**
+     * Copy constructor.
+     * @stable ICU 2.0
+     */
+    DateFormat(const DateFormat&);
+
+    /**
+     * Default assignment operator.
+     * @stable ICU 2.0
+     */
+    DateFormat& operator=(const DateFormat&);
+
+    /**
+     * The calendar that DateFormat uses to produce the time field values needed
+     * to implement date/time formatting. Subclasses should generally initialize
+     * this to the default calendar for the locale associated with this DateFormat.
+     * @stable ICU 2.4
+     */
+    Calendar* fCalendar;
+
+    /**
+     * The number formatter that DateFormat uses to format numbers in dates and
+     * times. Subclasses should generally initialize this to the default number
+     * format for the locale associated with this DateFormat.
+     * @stable ICU 2.4
+     */
+    NumberFormat* fNumberFormat;
+
+private:
+    /**
+     * Gets the date/time formatter with the given formatting styles for the
+     * given locale.
+     * @param dateStyle the given date formatting style.
+     * @param timeStyle the given time formatting style.
+     * @param inLocale the given locale.
+     * @return a date/time formatter, or 0 on failure.
+     */
+    static DateFormat* U_EXPORT2 create(EStyle timeStyle, EStyle dateStyle, const Locale&);
+
+public:
+    /**
+     * Field selector for FieldPosition for DateFormat fields.
+     * @obsolete ICU 3.4 use UDateFormatField instead, since this API will be
+     * removed in that release
+     */
+    enum EField
+    {
+        // Obsolete; use UDateFormatField instead
+        kEraField = UDAT_ERA_FIELD,
+        kYearField = UDAT_YEAR_FIELD,
+        kMonthField = UDAT_MONTH_FIELD,
+        kDateField = UDAT_DATE_FIELD,
+        kHourOfDay1Field = UDAT_HOUR_OF_DAY1_FIELD,
+        kHourOfDay0Field = UDAT_HOUR_OF_DAY0_FIELD,
+        kMinuteField = UDAT_MINUTE_FIELD,
+        kSecondField = UDAT_SECOND_FIELD,
+        kMillisecondField = UDAT_FRACTIONAL_SECOND_FIELD,
+        kDayOfWeekField = UDAT_DAY_OF_WEEK_FIELD,
+        kDayOfYearField = UDAT_DAY_OF_YEAR_FIELD,
+        kDayOfWeekInMonthField = UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
+        kWeekOfYearField = UDAT_WEEK_OF_YEAR_FIELD,
+        kWeekOfMonthField = UDAT_WEEK_OF_MONTH_FIELD,
+        kAmPmField = UDAT_AM_PM_FIELD,
+        kHour1Field = UDAT_HOUR1_FIELD,
+        kHour0Field = UDAT_HOUR0_FIELD,
+        kTimezoneField = UDAT_TIMEZONE_FIELD,
+        kYearWOYField = UDAT_YEAR_WOY_FIELD,
+        kDOWLocalField = UDAT_DOW_LOCAL_FIELD,
+        kExtendedYearField = UDAT_EXTENDED_YEAR_FIELD,
+        kJulianDayField = UDAT_JULIAN_DAY_FIELD,
+        kMillisecondsInDayField = UDAT_MILLISECONDS_IN_DAY_FIELD,
+
+        // Obsolete; use UDateFormatField instead
+        ERA_FIELD = UDAT_ERA_FIELD,
+        YEAR_FIELD = UDAT_YEAR_FIELD,
+        MONTH_FIELD = UDAT_MONTH_FIELD,
+        DATE_FIELD = UDAT_DATE_FIELD,
+        HOUR_OF_DAY1_FIELD = UDAT_HOUR_OF_DAY1_FIELD,
+        HOUR_OF_DAY0_FIELD = UDAT_HOUR_OF_DAY0_FIELD,
+        MINUTE_FIELD = UDAT_MINUTE_FIELD,
+        SECOND_FIELD = UDAT_SECOND_FIELD,
+        MILLISECOND_FIELD = UDAT_FRACTIONAL_SECOND_FIELD,
+        DAY_OF_WEEK_FIELD = UDAT_DAY_OF_WEEK_FIELD,
+        DAY_OF_YEAR_FIELD = UDAT_DAY_OF_YEAR_FIELD,
+        DAY_OF_WEEK_IN_MONTH_FIELD = UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
+        WEEK_OF_YEAR_FIELD = UDAT_WEEK_OF_YEAR_FIELD,
+        WEEK_OF_MONTH_FIELD = UDAT_WEEK_OF_MONTH_FIELD,
+        AM_PM_FIELD = UDAT_AM_PM_FIELD,
+        HOUR1_FIELD = UDAT_HOUR1_FIELD,
+        HOUR0_FIELD = UDAT_HOUR0_FIELD,
+        TIMEZONE_FIELD = UDAT_TIMEZONE_FIELD
+    };
+};
+
+inline UnicodeString&
+DateFormat::format(const Formattable& obj,
+                   UnicodeString& appendTo,
+                   UErrorCode& status) const {
+    return Format::format(obj, appendTo, status);
+}
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _DATEFMT
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/dbbi.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/dbbi.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/dbbi.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,41 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1999-2006 IBM Corp. All rights reserved.
-**********************************************************************
-*   Date        Name        Description
-*   12/1/99    rgillam     Complete port from Java.
-*   01/13/2000 helena      Added UErrorCode to ctors.
-**********************************************************************
-*/
-
-#ifndef DBBI_H
-#define DBBI_H
-
-#include "unicode/rbbi.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-/**
- * \file
- * \brief C++ API: Dictionary Based Break Iterator
- */
- 
-U_NAMESPACE_BEGIN
-
-/**
- * An obsolete subclass of RuleBasedBreakIterator. Handling of dictionary-
- * based break iteration has been folded into the base class. This class
- * is deprecated as of ICU 3.6.
- */
- 
-#ifndef U_HIDE_DEPRECATED_API
-
-typedef RuleBasedBreakIterator DictionaryBasedBreakIterator;
-
-#endif
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/dbbi.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/dbbi.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/dbbi.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/dbbi.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,41 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999-2006 IBM Corp. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*   12/1/99    rgillam     Complete port from Java.
+*   01/13/2000 helena      Added UErrorCode to ctors.
+**********************************************************************
+*/
+
+#ifndef DBBI_H
+#define DBBI_H
+
+#include "unicode/rbbi.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * \file
+ * \brief C++ API: Dictionary Based Break Iterator
+ */
+ 
+U_NAMESPACE_BEGIN
+
+/**
+ * An obsolete subclass of RuleBasedBreakIterator. Handling of dictionary-
+ * based break iteration has been folded into the base class. This class
+ * is deprecated as of ICU 3.6.
+ */
+ 
+#ifndef U_HIDE_DEPRECATED_API
+
+typedef RuleBasedBreakIterator DictionaryBasedBreakIterator;
+
+#endif
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/dcfmtsym.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/dcfmtsym.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/dcfmtsym.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,370 +0,0 @@
-/*
-********************************************************************************
-*   Copyright (C) 1997-2007, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-********************************************************************************
-*
-* File DCFMTSYM.H
-*
-* Modification History:
-* 
-*   Date        Name        Description
-*   02/19/97    aliu        Converted from java.
-*   03/18/97    clhuang     Updated per C++ implementation.
-*   03/27/97    helena      Updated to pass the simple test after code review.
-*   08/26/97    aliu        Added currency/intl currency symbol support.
-*   07/22/98    stephen     Changed to match C++ style 
-*                            currencySymbol -> fCurrencySymbol
-*                            Constants changed from CAPS to kCaps
-*   06/24/99    helena      Integrated Alan's NF enhancements and Java2 bug fixes
-*   09/22/00    grhoten     Marked deprecation tags with a pointer to replacement
-*                            functions.
-********************************************************************************
-*/
- 
-#ifndef DCFMTSYM_H
-#define DCFMTSYM_H
- 
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/uobject.h"
-#include "unicode/locid.h"
-
-/**
- * \file 
- * \brief C++ API: Symbols for formatting numbers.
- */
-
-
-U_NAMESPACE_BEGIN
-
-/**
- * This class represents the set of symbols needed by DecimalFormat
- * to format numbers. DecimalFormat creates for itself an instance of
- * DecimalFormatSymbols from its locale data.  If you need to change any
- * of these symbols, you can get the DecimalFormatSymbols object from
- * your DecimalFormat and modify it.
- * <P>
- * Here are the special characters used in the parts of the
- * subpattern, with notes on their usage.
- * <pre>
- * \code
- *        Symbol   Meaning
- *          0      a digit
- *          #      a digit, zero shows as absent
- *          .      placeholder for decimal separator
- *          ,      placeholder for grouping separator.
- *          ;      separates formats.
- *          -      default negative prefix.
- *          %      divide by 100 and show as percentage
- *          X      any other characters can be used in the prefix or suffix
- *          '      used to quote special characters in a prefix or suffix.
- * \endcode
- *  </pre>
- * [Notes]
- * <P>
- * If there is no explicit negative subpattern, - is prefixed to the
- * positive form. That is, "0.00" alone is equivalent to "0.00;-0.00".
- * <P>
- * The grouping separator is commonly used for thousands, but in some
- * countries for ten-thousands. The interval is a constant number of
- * digits between the grouping characters, such as 100,000,000 or 1,0000,0000.
- * If you supply a pattern with multiple grouping characters, the interval
- * between the last one and the end of the integer is the one that is
- * used. So "#,##,###,####" == "######,####" == "##,####,####".
- * <P>
- * This class only handles localized digits where the 10 digits are
- * contiguous in Unicode, from 0 to 9. Other digits sets (such as
- * superscripts) would need a different subclass.
- */
-class U_I18N_API DecimalFormatSymbols : public UObject {
-public:
-    /**
-     * Constants for specifying a number format symbol.
-     * @stable ICU 2.0
-     */
-    enum ENumberFormatSymbol {
-        /** The decimal separator */
-        kDecimalSeparatorSymbol,
-        /** The grouping separator */
-        kGroupingSeparatorSymbol,
-        /** The pattern separator */
-        kPatternSeparatorSymbol,
-        /** The percent sign */
-        kPercentSymbol,
-        /** Zero*/
-        kZeroDigitSymbol,
-        /** Character representing a digit in the pattern */
-        kDigitSymbol,
-        /** The minus sign */
-        kMinusSignSymbol,
-        /** The plus sign */
-        kPlusSignSymbol,
-        /** The currency symbol */
-        kCurrencySymbol,
-        /** The international currency symbol */
-        kIntlCurrencySymbol,
-        /** The monetary separator */
-        kMonetarySeparatorSymbol,
-        /** The exponential symbol */
-        kExponentialSymbol,
-        /** Per mill symbol - replaces kPermillSymbol */
-        kPerMillSymbol,
-        /** Escape padding character */
-        kPadEscapeSymbol,
-        /** Infinity symbol */
-        kInfinitySymbol,
-        /** Nan symbol */
-        kNaNSymbol,
-        /** Significant digit symbol
-         * @stable ICU 3.0 */
-        kSignificantDigitSymbol,
-        /** The monetary grouping separator 
-         * @stable ICU 3.6
-         */
-        kMonetaryGroupingSeparatorSymbol,
-        /** count symbol constants */
-        kFormatSymbolCount
-    };
-
-    /**
-     * Create a DecimalFormatSymbols object for the given locale.
-     *
-     * @param locale    The locale to get symbols for.
-     * @param status    Input/output parameter, set to success or
-     *                  failure code upon return.
-     * @stable ICU 2.0
-     */
-    DecimalFormatSymbols(const Locale& locale, UErrorCode& status);
-
-    /**
-     * Create a DecimalFormatSymbols object for the default locale.
-     * This constructor will not fail.  If the resource file data is
-     * not available, it will use hard-coded last-resort data and
-     * set status to U_USING_FALLBACK_ERROR.
-     *
-     * @param status    Input/output parameter, set to success or
-     *                  failure code upon return.
-     * @stable ICU 2.0
-     */
-    DecimalFormatSymbols( UErrorCode& status);
-
-    /**
-     * Copy constructor.
-     * @stable ICU 2.0
-     */
-    DecimalFormatSymbols(const DecimalFormatSymbols&);
-
-    /**
-     * Assignment operator.
-     * @stable ICU 2.0
-     */
-    DecimalFormatSymbols& operator=(const DecimalFormatSymbols&);
-
-    /**
-     * Destructor.
-     * @stable ICU 2.0
-     */
-    virtual ~DecimalFormatSymbols();
-
-    /**
-     * Return true if another object is semantically equal to this one.
-     *
-     * @param other    the object to be compared with.
-     * @return         true if another object is semantically equal to this one.
-     * @stable ICU 2.0
-     */
-    UBool operator==(const DecimalFormatSymbols& other) const;
-
-    /**
-     * Return true if another object is semantically unequal to this one.
-     *
-     * @param other    the object to be compared with.
-     * @return         true if another object is semantically unequal to this one.
-     * @stable ICU 2.0
-     */
-    UBool operator!=(const DecimalFormatSymbols& other) const { return !operator==(other); }
-
-    /**
-     * Get one of the format symbols by its enum constant.
-     * Each symbol is stored as a string so that graphemes
-     * (characters with modifyer letters) can be used.
-     *
-     * @param symbol    Constant to indicate a number format symbol.
-     * @return    the format symbols by the param 'symbol'
-     * @stable ICU 2.0
-     */
-    inline UnicodeString getSymbol(ENumberFormatSymbol symbol) const;
-
-    /**
-     * Set one of the format symbols by its enum constant.
-     * Each symbol is stored as a string so that graphemes
-     * (characters with modifyer letters) can be used.
-     *
-     * @param symbol    Constant to indicate a number format symbol.
-     * @param value     value of the format sybmol
-     * @stable ICU 2.0
-     */
-    void setSymbol(ENumberFormatSymbol symbol, const UnicodeString &value);
-
-    /**
-     * Returns the locale for which this object was constructed.
-     * @stable ICU 2.6
-     */
-    inline Locale getLocale() const;
-
-    /**
-     * Returns the locale for this object. Two flavors are available:
-     * valid and actual locale.
-     * @stable ICU 2.8
-     */
-    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.2
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.2
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-private:
-    DecimalFormatSymbols(); // default constructor not implemented
-
-    /**
-     * Initializes the symbols from the LocaleElements resource bundle.
-     * Note: The organization of LocaleElements badly needs to be
-     * cleaned up.
-     *
-     * @param locale               The locale to get symbols for.
-     * @param success              Input/output parameter, set to success or
-     *                             failure code upon return.
-     * @param useLastResortData    determine if use last resort data
-     */
-    void initialize(const Locale& locale, UErrorCode& success, UBool useLastResortData = FALSE);
-
-    /**
-     * Initialize the symbols from the given array of UnicodeStrings.
-     * The array must be of the correct size.
-     * 
-     * @param numberElements    the number format symbols
-     * @param numberElementsLength length of numberElements
-     */
-    void initialize(const UChar** numberElements, int32_t *numberElementsStrLen, int32_t numberElementsLength);
-
-    /**
-     * Initialize the symbols with default values.
-     */
-    void initialize();
-
-    void setCurrencyForSymbols();
-
-public:
-    /**
-     * _Internal_ function - more efficient version of getSymbol,
-     * returning a const reference to one of the symbol strings.
-     * The returned reference becomes invalid when the symbol is changed
-     * or when the DecimalFormatSymbols are destroyed.
-     * ### TODO markus 2002oct11: Consider proposing getConstSymbol() to be really public.
-     *
-     * @param symbol Constant to indicate a number format symbol.
-     * @return the format symbol by the param 'symbol'
-     * @internal
-     */
-    inline const UnicodeString &getConstSymbol(ENumberFormatSymbol symbol) const;
-
-    /**
-     * Returns that pattern stored in currecy info. Internal API for use by NumberFormat API.
-     * @internal
-     */
-    inline const UChar* getCurrencyPattern(void) const;
-
-private:
-    /**
-     * Private symbol strings.
-     * They are either loaded from a resource bundle or otherwise owned.
-     * setSymbol() clones the symbol string.
-     * Readonly aliases can only come from a resource bundle, so that we can always
-     * use fastCopyFrom() with them.
-     *
-     * If DecimalFormatSymbols becomes subclassable and the status of fSymbols changes
-     * from private to protected,
-     * or when fSymbols can be set any other way that allows them to be readonly aliases
-     * to non-resource bundle strings,
-     * then regular UnicodeString copies must be used instead of fastCopyFrom().
-     *
-     * @internal
-     */
-    UnicodeString fSymbols[kFormatSymbolCount];
-
-    /**
-     * Non-symbol variable for getConstSymbol(). Always empty.
-     * @internal
-     */
-    UnicodeString fNoSymbol;
-
-    Locale locale;
-
-    char actualLocale[ULOC_FULLNAME_CAPACITY];
-    char validLocale[ULOC_FULLNAME_CAPACITY];
-    const UChar* currPattern;
-};
-
-// -------------------------------------
-
-inline UnicodeString
-DecimalFormatSymbols::getSymbol(ENumberFormatSymbol symbol) const {
-    const UnicodeString *strPtr;
-    if(symbol < kFormatSymbolCount) {
-        strPtr = &fSymbols[symbol];
-    } else {
-        strPtr = &fNoSymbol;
-    }
-    return *strPtr;
-}
-
-inline const UnicodeString &
-DecimalFormatSymbols::getConstSymbol(ENumberFormatSymbol symbol) const {
-    const UnicodeString *strPtr;
-    if(symbol < kFormatSymbolCount) {
-        strPtr = &fSymbols[symbol];
-    } else {
-        strPtr = &fNoSymbol;
-    }
-    return *strPtr;
-}
-
-// -------------------------------------
-
-inline void
-DecimalFormatSymbols::setSymbol(ENumberFormatSymbol symbol, const UnicodeString &value) {
-    if(symbol<kFormatSymbolCount) {
-        fSymbols[symbol]=value;
-    }
-}
-
-// -------------------------------------
-
-inline Locale
-DecimalFormatSymbols::getLocale() const {
-    return locale;
-}
-
-inline const UChar*
-DecimalFormatSymbols::getCurrencyPattern() const {
-    return currPattern;
-}
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _DCFMTSYM
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/dcfmtsym.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/dcfmtsym.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/dcfmtsym.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/dcfmtsym.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,370 @@
+/*
+********************************************************************************
+*   Copyright (C) 1997-2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+********************************************************************************
+*
+* File DCFMTSYM.H
+*
+* Modification History:
+* 
+*   Date        Name        Description
+*   02/19/97    aliu        Converted from java.
+*   03/18/97    clhuang     Updated per C++ implementation.
+*   03/27/97    helena      Updated to pass the simple test after code review.
+*   08/26/97    aliu        Added currency/intl currency symbol support.
+*   07/22/98    stephen     Changed to match C++ style 
+*                            currencySymbol -> fCurrencySymbol
+*                            Constants changed from CAPS to kCaps
+*   06/24/99    helena      Integrated Alan's NF enhancements and Java2 bug fixes
+*   09/22/00    grhoten     Marked deprecation tags with a pointer to replacement
+*                            functions.
+********************************************************************************
+*/
+ 
+#ifndef DCFMTSYM_H
+#define DCFMTSYM_H
+ 
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/uobject.h"
+#include "unicode/locid.h"
+
+/**
+ * \file 
+ * \brief C++ API: Symbols for formatting numbers.
+ */
+
+
+U_NAMESPACE_BEGIN
+
+/**
+ * This class represents the set of symbols needed by DecimalFormat
+ * to format numbers. DecimalFormat creates for itself an instance of
+ * DecimalFormatSymbols from its locale data.  If you need to change any
+ * of these symbols, you can get the DecimalFormatSymbols object from
+ * your DecimalFormat and modify it.
+ * <P>
+ * Here are the special characters used in the parts of the
+ * subpattern, with notes on their usage.
+ * <pre>
+ * \code
+ *        Symbol   Meaning
+ *          0      a digit
+ *          #      a digit, zero shows as absent
+ *          .      placeholder for decimal separator
+ *          ,      placeholder for grouping separator.
+ *          ;      separates formats.
+ *          -      default negative prefix.
+ *          %      divide by 100 and show as percentage
+ *          X      any other characters can be used in the prefix or suffix
+ *          '      used to quote special characters in a prefix or suffix.
+ * \endcode
+ *  </pre>
+ * [Notes]
+ * <P>
+ * If there is no explicit negative subpattern, - is prefixed to the
+ * positive form. That is, "0.00" alone is equivalent to "0.00;-0.00".
+ * <P>
+ * The grouping separator is commonly used for thousands, but in some
+ * countries for ten-thousands. The interval is a constant number of
+ * digits between the grouping characters, such as 100,000,000 or 1,0000,0000.
+ * If you supply a pattern with multiple grouping characters, the interval
+ * between the last one and the end of the integer is the one that is
+ * used. So "#,##,###,####" == "######,####" == "##,####,####".
+ * <P>
+ * This class only handles localized digits where the 10 digits are
+ * contiguous in Unicode, from 0 to 9. Other digits sets (such as
+ * superscripts) would need a different subclass.
+ */
+class U_I18N_API DecimalFormatSymbols : public UObject {
+public:
+    /**
+     * Constants for specifying a number format symbol.
+     * @stable ICU 2.0
+     */
+    enum ENumberFormatSymbol {
+        /** The decimal separator */
+        kDecimalSeparatorSymbol,
+        /** The grouping separator */
+        kGroupingSeparatorSymbol,
+        /** The pattern separator */
+        kPatternSeparatorSymbol,
+        /** The percent sign */
+        kPercentSymbol,
+        /** Zero*/
+        kZeroDigitSymbol,
+        /** Character representing a digit in the pattern */
+        kDigitSymbol,
+        /** The minus sign */
+        kMinusSignSymbol,
+        /** The plus sign */
+        kPlusSignSymbol,
+        /** The currency symbol */
+        kCurrencySymbol,
+        /** The international currency symbol */
+        kIntlCurrencySymbol,
+        /** The monetary separator */
+        kMonetarySeparatorSymbol,
+        /** The exponential symbol */
+        kExponentialSymbol,
+        /** Per mill symbol - replaces kPermillSymbol */
+        kPerMillSymbol,
+        /** Escape padding character */
+        kPadEscapeSymbol,
+        /** Infinity symbol */
+        kInfinitySymbol,
+        /** Nan symbol */
+        kNaNSymbol,
+        /** Significant digit symbol
+         * @stable ICU 3.0 */
+        kSignificantDigitSymbol,
+        /** The monetary grouping separator 
+         * @stable ICU 3.6
+         */
+        kMonetaryGroupingSeparatorSymbol,
+        /** count symbol constants */
+        kFormatSymbolCount
+    };
+
+    /**
+     * Create a DecimalFormatSymbols object for the given locale.
+     *
+     * @param locale    The locale to get symbols for.
+     * @param status    Input/output parameter, set to success or
+     *                  failure code upon return.
+     * @stable ICU 2.0
+     */
+    DecimalFormatSymbols(const Locale& locale, UErrorCode& status);
+
+    /**
+     * Create a DecimalFormatSymbols object for the default locale.
+     * This constructor will not fail.  If the resource file data is
+     * not available, it will use hard-coded last-resort data and
+     * set status to U_USING_FALLBACK_ERROR.
+     *
+     * @param status    Input/output parameter, set to success or
+     *                  failure code upon return.
+     * @stable ICU 2.0
+     */
+    DecimalFormatSymbols( UErrorCode& status);
+
+    /**
+     * Copy constructor.
+     * @stable ICU 2.0
+     */
+    DecimalFormatSymbols(const DecimalFormatSymbols&);
+
+    /**
+     * Assignment operator.
+     * @stable ICU 2.0
+     */
+    DecimalFormatSymbols& operator=(const DecimalFormatSymbols&);
+
+    /**
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~DecimalFormatSymbols();
+
+    /**
+     * Return true if another object is semantically equal to this one.
+     *
+     * @param other    the object to be compared with.
+     * @return         true if another object is semantically equal to this one.
+     * @stable ICU 2.0
+     */
+    UBool operator==(const DecimalFormatSymbols& other) const;
+
+    /**
+     * Return true if another object is semantically unequal to this one.
+     *
+     * @param other    the object to be compared with.
+     * @return         true if another object is semantically unequal to this one.
+     * @stable ICU 2.0
+     */
+    UBool operator!=(const DecimalFormatSymbols& other) const { return !operator==(other); }
+
+    /**
+     * Get one of the format symbols by its enum constant.
+     * Each symbol is stored as a string so that graphemes
+     * (characters with modifyer letters) can be used.
+     *
+     * @param symbol    Constant to indicate a number format symbol.
+     * @return    the format symbols by the param 'symbol'
+     * @stable ICU 2.0
+     */
+    inline UnicodeString getSymbol(ENumberFormatSymbol symbol) const;
+
+    /**
+     * Set one of the format symbols by its enum constant.
+     * Each symbol is stored as a string so that graphemes
+     * (characters with modifyer letters) can be used.
+     *
+     * @param symbol    Constant to indicate a number format symbol.
+     * @param value     value of the format sybmol
+     * @stable ICU 2.0
+     */
+    void setSymbol(ENumberFormatSymbol symbol, const UnicodeString &value);
+
+    /**
+     * Returns the locale for which this object was constructed.
+     * @stable ICU 2.6
+     */
+    inline Locale getLocale() const;
+
+    /**
+     * Returns the locale for this object. Two flavors are available:
+     * valid and actual locale.
+     * @stable ICU 2.8
+     */
+    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.2
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+    DecimalFormatSymbols(); // default constructor not implemented
+
+    /**
+     * Initializes the symbols from the LocaleElements resource bundle.
+     * Note: The organization of LocaleElements badly needs to be
+     * cleaned up.
+     *
+     * @param locale               The locale to get symbols for.
+     * @param success              Input/output parameter, set to success or
+     *                             failure code upon return.
+     * @param useLastResortData    determine if use last resort data
+     */
+    void initialize(const Locale& locale, UErrorCode& success, UBool useLastResortData = FALSE);
+
+    /**
+     * Initialize the symbols from the given array of UnicodeStrings.
+     * The array must be of the correct size.
+     * 
+     * @param numberElements    the number format symbols
+     * @param numberElementsLength length of numberElements
+     */
+    void initialize(const UChar** numberElements, int32_t *numberElementsStrLen, int32_t numberElementsLength);
+
+    /**
+     * Initialize the symbols with default values.
+     */
+    void initialize();
+
+    void setCurrencyForSymbols();
+
+public:
+    /**
+     * _Internal_ function - more efficient version of getSymbol,
+     * returning a const reference to one of the symbol strings.
+     * The returned reference becomes invalid when the symbol is changed
+     * or when the DecimalFormatSymbols are destroyed.
+     * ### TODO markus 2002oct11: Consider proposing getConstSymbol() to be really public.
+     *
+     * @param symbol Constant to indicate a number format symbol.
+     * @return the format symbol by the param 'symbol'
+     * @internal
+     */
+    inline const UnicodeString &getConstSymbol(ENumberFormatSymbol symbol) const;
+
+    /**
+     * Returns that pattern stored in currecy info. Internal API for use by NumberFormat API.
+     * @internal
+     */
+    inline const UChar* getCurrencyPattern(void) const;
+
+private:
+    /**
+     * Private symbol strings.
+     * They are either loaded from a resource bundle or otherwise owned.
+     * setSymbol() clones the symbol string.
+     * Readonly aliases can only come from a resource bundle, so that we can always
+     * use fastCopyFrom() with them.
+     *
+     * If DecimalFormatSymbols becomes subclassable and the status of fSymbols changes
+     * from private to protected,
+     * or when fSymbols can be set any other way that allows them to be readonly aliases
+     * to non-resource bundle strings,
+     * then regular UnicodeString copies must be used instead of fastCopyFrom().
+     *
+     * @internal
+     */
+    UnicodeString fSymbols[kFormatSymbolCount];
+
+    /**
+     * Non-symbol variable for getConstSymbol(). Always empty.
+     * @internal
+     */
+    UnicodeString fNoSymbol;
+
+    Locale locale;
+
+    char actualLocale[ULOC_FULLNAME_CAPACITY];
+    char validLocale[ULOC_FULLNAME_CAPACITY];
+    const UChar* currPattern;
+};
+
+// -------------------------------------
+
+inline UnicodeString
+DecimalFormatSymbols::getSymbol(ENumberFormatSymbol symbol) const {
+    const UnicodeString *strPtr;
+    if(symbol < kFormatSymbolCount) {
+        strPtr = &fSymbols[symbol];
+    } else {
+        strPtr = &fNoSymbol;
+    }
+    return *strPtr;
+}
+
+inline const UnicodeString &
+DecimalFormatSymbols::getConstSymbol(ENumberFormatSymbol symbol) const {
+    const UnicodeString *strPtr;
+    if(symbol < kFormatSymbolCount) {
+        strPtr = &fSymbols[symbol];
+    } else {
+        strPtr = &fNoSymbol;
+    }
+    return *strPtr;
+}
+
+// -------------------------------------
+
+inline void
+DecimalFormatSymbols::setSymbol(ENumberFormatSymbol symbol, const UnicodeString &value) {
+    if(symbol<kFormatSymbolCount) {
+        fSymbols[symbol]=value;
+    }
+}
+
+// -------------------------------------
+
+inline Locale
+DecimalFormatSymbols::getLocale() const {
+    return locale;
+}
+
+inline const UChar*
+DecimalFormatSymbols::getCurrencyPattern() const {
+    return currPattern;
+}
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _DCFMTSYM
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/decimfmt.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/decimfmt.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/decimfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1901 +0,0 @@
-/*
-********************************************************************************
-*   Copyright (C) 1997-2009, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-********************************************************************************
-*
-* File DECIMFMT.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   02/19/97    aliu        Converted from java.
-*   03/20/97    clhuang     Updated per C++ implementation.
-*   04/03/97    aliu        Rewrote parsing and formatting completely, and
-*                           cleaned up and debugged.  Actually works now.
-*   04/17/97    aliu        Changed DigitCount to int per code review.
-*   07/10/97    helena      Made ParsePosition a class and get rid of the function
-*                           hiding problems.
-*   09/09/97    aliu        Ported over support for exponential formats.
-*    07/20/98    stephen        Changed documentation
-********************************************************************************
-*/
- 
-#ifndef DECIMFMT_H
-#define DECIMFMT_H
- 
-#include "unicode/utypes.h"
-/**
- * \file 
- * \brief C++ API: Formats decimal numbers.
- */
- 
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/dcfmtsym.h"
-#include "unicode/numfmt.h"
-#include "unicode/locid.h"
-
-U_NAMESPACE_BEGIN
-
-class DigitList;
-class ChoiceFormat;
-class UnicodeSet;
-
-/**
- * DecimalFormat is a concrete subclass of NumberFormat that formats decimal
- * numbers. It has a variety of features designed to make it possible to parse
- * and format numbers in any locale, including support for Western, Arabic, or
- * Indic digits.  It also supports different flavors of numbers, including
- * integers ("123"), fixed-point numbers ("123.4"), scientific notation
- * ("1.23E4"), percentages ("12%"), and currency amounts ("$123").  All of these
- * flavors can be easily localized.
- *
- * <p>To obtain a NumberFormat for a specific locale (including the default
- * locale) call one of NumberFormat's factory methods such as
- * createInstance(). Do not call the DecimalFormat constructors directly, unless
- * you know what you are doing, since the NumberFormat factory methods may
- * return subclasses other than DecimalFormat.
- *
- * <p><strong>Example Usage</strong>
- *
- * \code
- *     // Normally we would have a GUI with a menu for this
- *     int32_t locCount;
- *     const Locale* locales = NumberFormat::getAvailableLocales(locCount);
- * 
- *     double myNumber = -1234.56;
- *     UErrorCode success = U_ZERO_ERROR;
- *     NumberFormat* form;
- * 
- *     // Print out a number with the localized number, currency and percent
- *     // format for each locale.
- *     UnicodeString countryName;
- *     UnicodeString displayName;
- *     UnicodeString str;
- *     UnicodeString pattern;
- *     Formattable fmtable;
- *     for (int32_t j = 0; j < 3; ++j) {
- *         cout << endl << "FORMAT " << j << endl;
- *         for (int32_t i = 0; i < locCount; ++i) {
- *             if (locales[i].getCountry(countryName).size() == 0) {
- *                 // skip language-only
- *                 continue;
- *             }
- *             switch (j) {
- *             case 0:
- *                 form = NumberFormat::createInstance(locales[i], success ); break;
- *             case 1:
- *                 form = NumberFormat::createCurrencyInstance(locales[i], success ); break;
- *             default:
- *                 form = NumberFormat::createPercentInstance(locales[i], success ); break;
- *             }
- *             if (form) {
- *                 str.remove();
- *                 pattern = ((DecimalFormat*)form)->toPattern(pattern);
- *                 cout << locales[i].getDisplayName(displayName) << ": " << pattern;
- *                 cout << "  ->  " << form->format(myNumber,str) << endl;
- *                 form->parse(form->format(myNumber,str), fmtable, success);
- *                 delete form;  
- *             }
- *         }
- *     }
- * \endcode
- *
- * <p><strong>Patterns</strong>
- *
- * <p>A DecimalFormat consists of a <em>pattern</em> and a set of
- * <em>symbols</em>.  The pattern may be set directly using
- * applyPattern(), or indirectly using other API methods which
- * manipulate aspects of the pattern, such as the minimum number of integer
- * digits.  The symbols are stored in a DecimalFormatSymbols
- * object.  When using the NumberFormat factory methods, the
- * pattern and symbols are read from ICU's locale data.
- * 
- * <p><strong>Special Pattern Characters</strong>
- *
- * <p>Many characters in a pattern are taken literally; they are matched during
- * parsing and output unchanged during formatting.  Special characters, on the
- * other hand, stand for other characters, strings, or classes of characters.
- * For example, the '#' character is replaced by a localized digit.  Often the
- * replacement character is the same as the pattern character; in the U.S. locale,
- * the ',' grouping character is replaced by ','.  However, the replacement is
- * still happening, and if the symbols are modified, the grouping character
- * changes.  Some special characters affect the behavior of the formatter by
- * their presence; for example, if the percent character is seen, then the
- * value is multiplied by 100 before being displayed.
- *
- * <p>To insert a special character in a pattern as a literal, that is, without
- * any special meaning, the character must be quoted.  There are some exceptions to
- * this which are noted below.
- *
- * <p>The characters listed here are used in non-localized patterns.  Localized
- * patterns use the corresponding characters taken from this formatter's
- * DecimalFormatSymbols object instead, and these characters lose
- * their special status.  Two exceptions are the currency sign and quote, which
- * are not localized.
- *
- * <table border=0 cellspacing=3 cellpadding=0>
- *   <tr bgcolor="#ccccff">
- *     <td align=left><strong>Symbol</strong>
- *     <td align=left><strong>Location</strong>
- *     <td align=left><strong>Localized?</strong>
- *     <td align=left><strong>Meaning</strong>
- *   <tr valign=top>
- *     <td><code>0</code>
- *     <td>Number
- *     <td>Yes
- *     <td>Digit
- *   <tr valign=top bgcolor="#eeeeff">
- *     <td><code>1-9</code>
- *     <td>Number
- *     <td>Yes
- *     <td>'1' through '9' indicate rounding.
- *   <tr valign=top>
- *     <td><code>\htmlonly&#x40;\endhtmlonly</code> <!--doxygen doesn't like @-->
- *     <td>Number
- *     <td>No
- *     <td>Significant digit
- *   <tr valign=top bgcolor="#eeeeff">
- *     <td><code>#</code>
- *     <td>Number
- *     <td>Yes
- *     <td>Digit, zero shows as absent
- *   <tr valign=top>
- *     <td><code>.</code>
- *     <td>Number
- *     <td>Yes
- *     <td>Decimal separator or monetary decimal separator
- *   <tr valign=top bgcolor="#eeeeff">
- *     <td><code>-</code>
- *     <td>Number
- *     <td>Yes
- *     <td>Minus sign
- *   <tr valign=top>
- *     <td><code>,</code>
- *     <td>Number
- *     <td>Yes
- *     <td>Grouping separator
- *   <tr valign=top bgcolor="#eeeeff">
- *     <td><code>E</code>
- *     <td>Number
- *     <td>Yes
- *     <td>Separates mantissa and exponent in scientific notation.
- *         <em>Need not be quoted in prefix or suffix.</em>
- *   <tr valign=top>
- *     <td><code>+</code>
- *     <td>Exponent
- *     <td>Yes
- *     <td>Prefix positive exponents with localized plus sign.
- *         <em>Need not be quoted in prefix or suffix.</em>
- *   <tr valign=top bgcolor="#eeeeff">
- *     <td><code>;</code>
- *     <td>Subpattern boundary
- *     <td>Yes
- *     <td>Separates positive and negative subpatterns
- *   <tr valign=top>
- *     <td><code>\%</code>
- *     <td>Prefix or suffix
- *     <td>Yes
- *     <td>Multiply by 100 and show as percentage
- *   <tr valign=top bgcolor="#eeeeff">
- *     <td><code>\\u2030</code>
- *     <td>Prefix or suffix
- *     <td>Yes
- *     <td>Multiply by 1000 and show as per mille
- *   <tr valign=top>
- *     <td><code>\htmlonly&curren;\endhtmlonly</code> (<code>\\u00A4</code>)
- *     <td>Prefix or suffix
- *     <td>No
- *     <td>Currency sign, replaced by currency symbol.  If
- *         doubled, replaced by international currency symbol.
- *         If present in a pattern, the monetary decimal separator
- *         is used instead of the decimal separator.
- *   <tr valign=top bgcolor="#eeeeff">
- *     <td><code>'</code>
- *     <td>Prefix or suffix
- *     <td>No
- *     <td>Used to quote special characters in a prefix or suffix,
- *         for example, <code>"'#'#"</code> formats 123 to
- *         <code>"#123"</code>.  To create a single quote
- *         itself, use two in a row: <code>"# o''clock"</code>.
- *   <tr valign=top>
- *     <td><code>*</code>
- *     <td>Prefix or suffix boundary
- *     <td>Yes
- *     <td>Pad escape, precedes pad character
- * </table>
- *
- * <p>A DecimalFormat pattern contains a postive and negative
- * subpattern, for example, "#,##0.00;(#,##0.00)".  Each subpattern has a
- * prefix, a numeric part, and a suffix.  If there is no explicit negative
- * subpattern, the negative subpattern is the localized minus sign prefixed to the
- * positive subpattern. That is, "0.00" alone is equivalent to "0.00;-0.00".  If there
- * is an explicit negative subpattern, it serves only to specify the negative
- * prefix and suffix; the number of digits, minimal digits, and other
- * characteristics are ignored in the negative subpattern. That means that
- * "#,##0.0#;(#)" has precisely the same result as "#,##0.0#;(#,##0.0#)".
- *
- * <p>The prefixes, suffixes, and various symbols used for infinity, digits,
- * thousands separators, decimal separators, etc. may be set to arbitrary
- * values, and they will appear properly during formatting.  However, care must
- * be taken that the symbols and strings do not conflict, or parsing will be
- * unreliable.  For example, either the positive and negative prefixes or the
- * suffixes must be distinct for parse() to be able
- * to distinguish positive from negative values.  Another example is that the
- * decimal separator and thousands separator should be distinct characters, or
- * parsing will be impossible.
- *
- * <p>The <em>grouping separator</em> is a character that separates clusters of
- * integer digits to make large numbers more legible.  It commonly used for
- * thousands, but in some locales it separates ten-thousands.  The <em>grouping
- * size</em> is the number of digits between the grouping separators, such as 3
- * for "100,000,000" or 4 for "1 0000 0000". There are actually two different
- * grouping sizes: One used for the least significant integer digits, the
- * <em>primary grouping size</em>, and one used for all others, the
- * <em>secondary grouping size</em>.  In most locales these are the same, but
- * sometimes they are different. For example, if the primary grouping interval
- * is 3, and the secondary is 2, then this corresponds to the pattern
- * "#,##,##0", and the number 123456789 is formatted as "12,34,56,789".  If a
- * pattern contains multiple grouping separators, the interval between the last
- * one and the end of the integer defines the primary grouping size, and the
- * interval between the last two defines the secondary grouping size. All others
- * are ignored, so "#,##,###,####" == "###,###,####" == "##,#,###,####".
- *
- * <p>Illegal patterns, such as "#.#.#" or "#.###,###", will cause
- * DecimalFormat to set a failing UErrorCode.
- *
- * <p><strong>Pattern BNF</strong>
- *
- * <pre>
- * pattern    := subpattern (';' subpattern)?
- * subpattern := prefix? number exponent? suffix?
- * number     := (integer ('.' fraction)?) | sigDigits
- * prefix     := '\\u0000'..'\\uFFFD' - specialCharacters
- * suffix     := '\\u0000'..'\\uFFFD' - specialCharacters
- * integer    := '#'* '0'* '0'
- * fraction   := '0'* '#'*
- * sigDigits  := '#'* '@' '@'* '#'*
- * exponent   := 'E' '+'? '0'* '0'
- * padSpec    := '*' padChar
- * padChar    := '\\u0000'..'\\uFFFD' - quote
- * &nbsp;
- * Notation:
- *   X*       0 or more instances of X
- *   X?       0 or 1 instances of X
- *   X|Y      either X or Y
- *   C..D     any character from C up to D, inclusive
- *   S-T      characters in S, except those in T
- * </pre>
- * The first subpattern is for positive numbers. The second (optional)
- * subpattern is for negative numbers.
- * 
- * <p>Not indicated in the BNF syntax above:
- *
- * <ul><li>The grouping separator ',' can occur inside the integer and
- * sigDigits elements, between any two pattern characters of that
- * element, as long as the integer or sigDigits element is not
- * followed by the exponent element.
- *
- * <li>Two grouping intervals are recognized: That between the
- *     decimal point and the first grouping symbol, and that
- *     between the first and second grouping symbols. These
- *     intervals are identical in most locales, but in some
- *     locales they differ. For example, the pattern
- *     &quot;#,##,###&quot; formats the number 123456789 as
- *     &quot;12,34,56,789&quot;.</li>
- * 
- * <li>The pad specifier <code>padSpec</code> may appear before the prefix,
- * after the prefix, before the suffix, after the suffix, or not at all.
- *
- * <li>In place of '0', the digits '1' through '9' may be used to
- * indicate a rounding increment.
- * </ul>
- *
- * <p><strong>Parsing</strong>
- *
- * <p>DecimalFormat parses all Unicode characters that represent
- * decimal digits, as defined by u_charDigitValue().  In addition,
- * DecimalFormat also recognizes as digits the ten consecutive
- * characters starting with the localized zero digit defined in the
- * DecimalFormatSymbols object.  During formatting, the
- * DecimalFormatSymbols-based digits are output.
- *
- * <p>During parsing, grouping separators are ignored.
- *
- * <p>If parse(UnicodeString&,Formattable&,ParsePosition&)
- * fails to parse a string, it leaves the parse position unchanged.
- * The convenience method parse(UnicodeString&,Formattable&,UErrorCode&)
- * indicates parse failure by setting a failing
- * UErrorCode.
- *
- * <p><strong>Formatting</strong>
- *
- * <p>Formatting is guided by several parameters, all of which can be
- * specified either using a pattern or using the API.  The following
- * description applies to formats that do not use <a href="#sci">scientific
- * notation</a> or <a href="#sigdig">significant digits</a>.
- *
- * <ul><li>If the number of actual integer digits exceeds the
- * <em>maximum integer digits</em>, then only the least significant
- * digits are shown.  For example, 1997 is formatted as "97" if the
- * maximum integer digits is set to 2.
- *
- * <li>If the number of actual integer digits is less than the
- * <em>minimum integer digits</em>, then leading zeros are added.  For
- * example, 1997 is formatted as "01997" if the minimum integer digits
- * is set to 5.
- *
- * <li>If the number of actual fraction digits exceeds the <em>maximum
- * fraction digits</em>, then half-even rounding it performed to the
- * maximum fraction digits.  For example, 0.125 is formatted as "0.12"
- * if the maximum fraction digits is 2.  This behavior can be changed
- * by specifying a rounding increment and a rounding mode.
- *
- * <li>If the number of actual fraction digits is less than the
- * <em>minimum fraction digits</em>, then trailing zeros are added.
- * For example, 0.125 is formatted as "0.1250" if the mimimum fraction
- * digits is set to 4.
- *
- * <li>Trailing fractional zeros are not displayed if they occur
- * <em>j</em> positions after the decimal, where <em>j</em> is less
- * than the maximum fraction digits. For example, 0.10004 is
- * formatted as "0.1" if the maximum fraction digits is four or less.
- * </ul>
- *
- * <p><strong>Special Values</strong>
- *
- * <p><code>NaN</code> is represented as a single character, typically
- * <code>\\uFFFD</code>.  This character is determined by the
- * DecimalFormatSymbols object.  This is the only value for which
- * the prefixes and suffixes are not used.
- *
- * <p>Infinity is represented as a single character, typically
- * <code>\\u221E</code>, with the positive or negative prefixes and suffixes
- * applied.  The infinity character is determined by the
- * DecimalFormatSymbols object.
- *
- * <a name="sci"><strong>Scientific Notation</strong></a>
- *
- * <p>Numbers in scientific notation are expressed as the product of a mantissa
- * and a power of ten, for example, 1234 can be expressed as 1.234 x 10<sup>3</sup>. The
- * mantissa is typically in the half-open interval [1.0, 10.0) or sometimes [0.0, 1.0),
- * but it need not be.  DecimalFormat supports arbitrary mantissas.
- * DecimalFormat can be instructed to use scientific
- * notation through the API or through the pattern.  In a pattern, the exponent
- * character immediately followed by one or more digit characters indicates
- * scientific notation.  Example: "0.###E0" formats the number 1234 as
- * "1.234E3".
- *
- * <ul>
- * <li>The number of digit characters after the exponent character gives the
- * minimum exponent digit count.  There is no maximum.  Negative exponents are
- * formatted using the localized minus sign, <em>not</em> the prefix and suffix
- * from the pattern.  This allows patterns such as "0.###E0 m/s".  To prefix
- * positive exponents with a localized plus sign, specify '+' between the
- * exponent and the digits: "0.###E+0" will produce formats "1E+1", "1E+0",
- * "1E-1", etc.  (In localized patterns, use the localized plus sign rather than
- * '+'.)
- *
- * <li>The minimum number of integer digits is achieved by adjusting the
- * exponent.  Example: 0.00123 formatted with "00.###E0" yields "12.3E-4".  This
- * only happens if there is no maximum number of integer digits.  If there is a
- * maximum, then the minimum number of integer digits is fixed at one.
- *
- * <li>The maximum number of integer digits, if present, specifies the exponent
- * grouping.  The most common use of this is to generate <em>engineering
- * notation</em>, in which the exponent is a multiple of three, e.g.,
- * "##0.###E0".  The number 12345 is formatted using "##0.####E0" as "12.345E3".
- *
- * <li>When using scientific notation, the formatter controls the
- * digit counts using significant digits logic.  The maximum number of
- * significant digits limits the total number of integer and fraction
- * digits that will be shown in the mantissa; it does not affect
- * parsing.  For example, 12345 formatted with "##0.##E0" is "12.3E3".
- * See the section on significant digits for more details.
- *
- * <li>The number of significant digits shown is determined as
- * follows: If areSignificantDigitsUsed() returns false, then the
- * minimum number of significant digits shown is one, and the maximum
- * number of significant digits shown is the sum of the <em>minimum
- * integer</em> and <em>maximum fraction</em> digits, and is
- * unaffected by the maximum integer digits.  If this sum is zero,
- * then all significant digits are shown.  If
- * areSignificantDigitsUsed() returns true, then the significant digit
- * counts are specified by getMinimumSignificantDigits() and
- * getMaximumSignificantDigits().  In this case, the number of
- * integer digits is fixed at one, and there is no exponent grouping.
- *
- * <li>Exponential patterns may not contain grouping separators.
- * </ul>
- *
- * <a name="sigdig"><strong>Significant Digits</strong></a>
- *
- * <code>DecimalFormat</code> has two ways of controlling how many
- * digits are shows: (a) significant digits counts, or (b) integer and
- * fraction digit counts.  Integer and fraction digit counts are
- * described above.  When a formatter is using significant digits
- * counts, the number of integer and fraction digits is not specified
- * directly, and the formatter settings for these counts are ignored.
- * Instead, the formatter uses however many integer and fraction
- * digits are required to display the specified number of significant
- * digits.  Examples:
- *
- * <table border=0 cellspacing=3 cellpadding=0>
- *   <tr bgcolor="#ccccff">
- *     <td align=left>Pattern
- *     <td align=left>Minimum significant digits
- *     <td align=left>Maximum significant digits
- *     <td align=left>Number
- *     <td align=left>Output of format()
- *   <tr valign=top>
- *     <td><code>\@\@\@</code>
- *     <td>3
- *     <td>3
- *     <td>12345
- *     <td><code>12300</code>
- *   <tr valign=top bgcolor="#eeeeff">
- *     <td><code>\@\@\@</code>
- *     <td>3
- *     <td>3
- *     <td>0.12345
- *     <td><code>0.123</code>
- *   <tr valign=top>
- *     <td><code>\@\@##</code>
- *     <td>2
- *     <td>4
- *     <td>3.14159
- *     <td><code>3.142</code>
- *   <tr valign=top bgcolor="#eeeeff">
- *     <td><code>\@\@##</code>
- *     <td>2
- *     <td>4
- *     <td>1.23004
- *     <td><code>1.23</code>
- * </table>
- *
- * <ul>
- * <li>Significant digit counts may be expressed using patterns that
- * specify a minimum and maximum number of significant digits.  These
- * are indicated by the <code>'@'</code> and <code>'#'</code>
- * characters.  The minimum number of significant digits is the number
- * of <code>'@'</code> characters.  The maximum number of significant
- * digits is the number of <code>'@'</code> characters plus the number
- * of <code>'#'</code> characters following on the right.  For
- * example, the pattern <code>"@@@"</code> indicates exactly 3
- * significant digits.  The pattern <code>"@##"</code> indicates from
- * 1 to 3 significant digits.  Trailing zero digits to the right of
- * the decimal separator are suppressed after the minimum number of
- * significant digits have been shown.  For example, the pattern
- * <code>"@##"</code> formats the number 0.1203 as
- * <code>"0.12"</code>.
- *
- * <li>If a pattern uses significant digits, it may not contain a
- * decimal separator, nor the <code>'0'</code> pattern character.
- * Patterns such as <code>"@00"</code> or <code>"@.###"</code> are
- * disallowed.
- *
- * <li>Any number of <code>'#'</code> characters may be prepended to
- * the left of the leftmost <code>'@'</code> character.  These have no
- * effect on the minimum and maximum significant digits counts, but
- * may be used to position grouping separators.  For example,
- * <code>"#,#@#"</code> indicates a minimum of one significant digits,
- * a maximum of two significant digits, and a grouping size of three.
- *
- * <li>In order to enable significant digits formatting, use a pattern
- * containing the <code>'@'</code> pattern character.  Alternatively,
- * call setSignificantDigitsUsed(TRUE).
- *
- * <li>In order to disable significant digits formatting, use a
- * pattern that does not contain the <code>'@'</code> pattern
- * character. Alternatively, call setSignificantDigitsUsed(FALSE).
- *
- * <li>The number of significant digits has no effect on parsing.
- *
- * <li>Significant digits may be used together with exponential notation. Such
- * patterns are equivalent to a normal exponential pattern with a minimum and
- * maximum integer digit count of one, a minimum fraction digit count of
- * <code>getMinimumSignificantDigits() - 1</code>, and a maximum fraction digit
- * count of <code>getMaximumSignificantDigits() - 1</code>. For example, the
- * pattern <code>"@@###E0"</code> is equivalent to <code>"0.0###E0"</code>.
- *
- * <li>If signficant digits are in use, then the integer and fraction
- * digit counts, as set via the API, are ignored.  If significant
- * digits are not in use, then the signficant digit counts, as set via
- * the API, are ignored.
- *
- * </ul>
- *
- * <p><strong>Padding</strong>
- *
- * <p>DecimalFormat supports padding the result of
- * format() to a specific width.  Padding may be specified either
- * through the API or through the pattern syntax.  In a pattern the pad escape
- * character, followed by a single pad character, causes padding to be parsed
- * and formatted.  The pad escape character is '*' in unlocalized patterns, and
- * can be localized using DecimalFormatSymbols::setSymbol() with a
- * DecimalFormatSymbols::kPadEscapeSymbol
- * selector.  For example, <code>"$*x#,##0.00"</code> formats 123 to
- * <code>"$xx123.00"</code>, and 1234 to <code>"$1,234.00"</code>.
- *
- * <ul>
- * <li>When padding is in effect, the width of the positive subpattern,
- * including prefix and suffix, determines the format width.  For example, in
- * the pattern <code>"* #0 o''clock"</code>, the format width is 10.
- *
- * <li>The width is counted in 16-bit code units (UChars).
- *
- * <li>Some parameters which usually do not matter have meaning when padding is
- * used, because the pattern width is significant with padding.  In the pattern
- * "* ##,##,#,##0.##", the format width is 14.  The initial characters "##,##,"
- * do not affect the grouping size or maximum integer digits, but they do affect
- * the format width.
- *
- * <li>Padding may be inserted at one of four locations: before the prefix,
- * after the prefix, before the suffix, or after the suffix.  If padding is
- * specified in any other location, applyPattern()
- * sets a failing UErrorCode.  If there is no prefix,
- * before the prefix and after the prefix are equivalent, likewise for the
- * suffix.
- *
- * <li>When specified in a pattern, the 32-bit code point immediately
- * following the pad escape is the pad character. This may be any character,
- * including a special pattern character. That is, the pad escape
- * <em>escapes</em> the following character. If there is no character after
- * the pad escape, then the pattern is illegal.
- *
- * </ul>
- *
- * <p><strong>Rounding</strong>
- *
- * <p>DecimalFormat supports rounding to a specific increment.  For
- * example, 1230 rounded to the nearest 50 is 1250.  1.234 rounded to the
- * nearest 0.65 is 1.3.  The rounding increment may be specified through the API
- * or in a pattern.  To specify a rounding increment in a pattern, include the
- * increment in the pattern itself.  "#,#50" specifies a rounding increment of
- * 50.  "#,##0.05" specifies a rounding increment of 0.05.
- *
- * <ul>
- * <li>Rounding only affects the string produced by formatting.  It does
- * not affect parsing or change any numerical values.
- *
- * <li>A <em>rounding mode</em> determines how values are rounded; see
- * DecimalFormat::ERoundingMode.  Rounding increments specified in
- * patterns use the default mode, DecimalFormat::kRoundHalfEven.
- *
- * <li>Some locales use rounding in their currency formats to reflect the
- * smallest currency denomination.
- *
- * <li>In a pattern, digits '1' through '9' specify rounding, but otherwise
- * behave identically to digit '0'.
- * </ul>
- *
- * <p><strong>Synchronization</strong>
- *
- * <p>DecimalFormat objects are not synchronized.  Multiple
- * threads should not access one formatter concurrently.
- *
- * <p><strong>Subclassing</strong>
- *
- * <p><em>User subclasses are not supported.</em> While clients may write
- * subclasses, such code will not necessarily work and will not be
- * guaranteed to work stably from release to release.
- */
-class U_I18N_API DecimalFormat: public NumberFormat {
-public:
-    /**
-     * Rounding mode.
-     * @stable ICU 2.4
-     */
-    enum ERoundingMode {
-        kRoundCeiling,  /**< Round towards positive infinity */
-        kRoundFloor,    /**< Round towards negative infinity */
-        kRoundDown,     /**< Round towards zero */
-        kRoundUp,       /**< Round away from zero */
-        kRoundHalfEven, /**< Round towards the nearest integer, or
-                             towards the nearest even integer if equidistant */
-        kRoundHalfDown, /**< Round towards the nearest integer, or
-                             towards zero if equidistant */
-        kRoundHalfUp    /**< Round towards the nearest integer, or
-                             away from zero if equidistant */
-        // We don't support ROUND_UNNECESSARY
-    };
-
-    /**
-     * Pad position.
-     * @stable ICU 2.4
-     */
-    enum EPadPosition {
-        kPadBeforePrefix,
-        kPadAfterPrefix,
-        kPadBeforeSuffix,
-        kPadAfterSuffix
-    };
-
-    /**
-     * Create a DecimalFormat using the default pattern and symbols
-     * for the default locale. This is a convenient way to obtain a
-     * DecimalFormat when internationalization is not the main concern.
-     * <P>
-     * To obtain standard formats for a given locale, use the factory methods
-     * on NumberFormat such as createInstance. These factories will
-     * return the most appropriate sub-class of NumberFormat for a given
-     * locale.
-     * @param status    Output param set to success/failure code. If the
-     *                  pattern is invalid this will be set to a failure code.
-     * @stable ICU 2.0
-     */
-    DecimalFormat(UErrorCode& status);
-
-    /**
-     * Create a DecimalFormat from the given pattern and the symbols
-     * for the default locale. This is a convenient way to obtain a
-     * DecimalFormat when internationalization is not the main concern.
-     * <P>
-     * To obtain standard formats for a given locale, use the factory methods
-     * on NumberFormat such as createInstance. These factories will
-     * return the most appropriate sub-class of NumberFormat for a given
-     * locale.
-     * @param pattern   A non-localized pattern string.
-     * @param status    Output param set to success/failure code. If the
-     *                  pattern is invalid this will be set to a failure code.
-     * @stable ICU 2.0
-     */
-    DecimalFormat(const UnicodeString& pattern,
-                  UErrorCode& status);
-
-    /**
-     * Create a DecimalFormat from the given pattern and symbols.
-     * Use this constructor when you need to completely customize the
-     * behavior of the format.
-     * <P>
-     * To obtain standard formats for a given
-     * locale, use the factory methods on NumberFormat such as
-     * createInstance or createCurrencyInstance. If you need only minor adjustments
-     * to a standard format, you can modify the format returned by
-     * a NumberFormat factory method.
-     *
-     * @param pattern           a non-localized pattern string
-     * @param symbolsToAdopt    the set of symbols to be used.  The caller should not
-     *                          delete this object after making this call.
-     * @param status            Output param set to success/failure code. If the
-     *                          pattern is invalid this will be set to a failure code.
-     * @stable ICU 2.0
-     */
-    DecimalFormat(  const UnicodeString& pattern,
-                    DecimalFormatSymbols* symbolsToAdopt,
-                    UErrorCode& status);
-
-    /**
-     * Create a DecimalFormat from the given pattern and symbols.
-     * Use this constructor when you need to completely customize the
-     * behavior of the format.
-     * <P>
-     * To obtain standard formats for a given
-     * locale, use the factory methods on NumberFormat such as
-     * createInstance or createCurrencyInstance. If you need only minor adjustments
-     * to a standard format, you can modify the format returned by
-     * a NumberFormat factory method.
-     *
-     * @param pattern           a non-localized pattern string
-     * @param symbolsToAdopt    the set of symbols to be used.  The caller should not
-     *                          delete this object after making this call.
-     * @param parseError        Output param to receive errors occured during parsing 
-     * @param status            Output param set to success/failure code. If the
-     *                          pattern is invalid this will be set to a failure code.
-     * @stable ICU 2.0
-     */
-    DecimalFormat(  const UnicodeString& pattern,
-                    DecimalFormatSymbols* symbolsToAdopt,
-                    UParseError& parseError,
-                    UErrorCode& status);
-    /**
-     * Create a DecimalFormat from the given pattern and symbols.
-     * Use this constructor when you need to completely customize the
-     * behavior of the format.
-     * <P>
-     * To obtain standard formats for a given
-     * locale, use the factory methods on NumberFormat such as
-     * createInstance or createCurrencyInstance. If you need only minor adjustments
-     * to a standard format, you can modify the format returned by
-     * a NumberFormat factory method.
-     *
-     * @param pattern           a non-localized pattern string
-     * @param symbols   the set of symbols to be used
-     * @param status            Output param set to success/failure code. If the
-     *                          pattern is invalid this will be set to a failure code.
-     * @stable ICU 2.0
-     */
-    DecimalFormat(  const UnicodeString& pattern,
-                    const DecimalFormatSymbols& symbols,
-                    UErrorCode& status);
-
-    /**
-     * Copy constructor.
-     * 
-     * @param source    the DecimalFormat object to be copied from.
-     * @stable ICU 2.0
-     */
-    DecimalFormat(const DecimalFormat& source);
-
-    /**
-     * Assignment operator.
-     *
-     * @param rhs    the DecimalFormat object to be copied.
-     * @stable ICU 2.0
-     */
-    DecimalFormat& operator=(const DecimalFormat& rhs);
-
-    /**
-     * Destructor.
-     * @stable ICU 2.0
-     */
-    virtual ~DecimalFormat();
-
-    /**
-     * Clone this Format object polymorphically. The caller owns the
-     * result and should delete it when done.
-     *
-     * @return    a polymorphic copy of this DecimalFormat.
-     * @stable ICU 2.0
-     */
-    virtual Format* clone(void) const;
-
-    /**
-     * Return true if the given Format objects are semantically equal.
-     * Objects of different subclasses are considered unequal.
-     *
-     * @param other    the object to be compared with.
-     * @return         true if the given Format objects are semantically equal.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const Format& other) const;
-
-    /**
-     * Format a double or long number using base-10 representation.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-    */
-    virtual UnicodeString& format(double number,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos) const;
-    /**
-     * Format a long number using base-10 representation.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& format(int32_t number,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos) const;
-    /**
-     * Format an int64 number using base-10 representation.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.8
-     */
-    virtual UnicodeString& format(int64_t number,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos) const;
-
-    /**
-     * Format a Formattable using base-10 representation.
-     *
-     * @param obj       The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @param status    Error code indicating success or failure.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& format(const Formattable& obj,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos,
-                                  UErrorCode& status) const;
-
-    /**
-     * Redeclared NumberFormat method.
-     * Formats an object to produce a string.
-     *
-     * @param obj       The object to format.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param status    Output parameter filled in with success or failure status.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(const Formattable& obj,
-                          UnicodeString& appendTo,
-                          UErrorCode& status) const;
-
-    /**
-     * Redeclared NumberFormat method.
-     * Format a double number.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(double number,
-                          UnicodeString& appendTo) const;
-
-    /**
-     * Redeclared NumberFormat method.
-     * Format a long number. These methods call the NumberFormat
-     * pure virtual format() methods with the default FieldPosition.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(int32_t number,
-                          UnicodeString& appendTo) const;
-
-    /**
-     * Redeclared NumberFormat method.
-     * Format an int64 number. These methods call the NumberFormat
-     * pure virtual format() methods with the default FieldPosition.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.8
-     */
-    UnicodeString& format(int64_t number,
-                          UnicodeString& appendTo) const;
-   /**
-    * Parse the given string using this object's choices. The method
-    * does string comparisons to try to find an optimal match.
-    * If no object can be parsed, index is unchanged, and NULL is
-    * returned.  The result is returned as the most parsimonious
-    * type of Formattable that will accomodate all of the
-    * necessary precision.  For example, if the result is exactly 12,
-    * it will be returned as a long.  However, if it is 1.5, it will
-    * be returned as a double.
-    *
-    * @param text           The text to be parsed.
-    * @param result         Formattable to be set to the parse result.
-    *                       If parse fails, return contents are undefined.
-    * @param parsePosition  The position to start parsing at on input.
-    *                       On output, moved to after the last successfully
-    *                       parse character. On parse failure, does not change.
-    * @see Formattable
-    * @stable ICU 2.0
-    */
-    virtual void parse(const UnicodeString& text,
-                       Formattable& result,
-                       ParsePosition& parsePosition) const;
-
-    // Declare here again to get rid of function hiding problems.
-    /** 
-     * Parse the given string using this object's choices.
-     *
-     * @param text           The text to be parsed.
-     * @param result         Formattable to be set to the parse result.
-     * @param status    Output parameter filled in with success or failure status.
-     * @stable ICU 2.0
-     */
-    virtual void parse(const UnicodeString& text, 
-                       Formattable& result, 
-                       UErrorCode& status) const;
-
-    /**
-     * Parses text from the given string as a currency amount.  Unlike
-     * the parse() method, this method will attempt to parse a generic
-     * currency name, searching for a match of this object's locale's
-     * currency display names, or for a 3-letter ISO currency code.
-     * This method will fail if this format is not a currency format,
-     * that is, if it does not contain the currency pattern symbol
-     * (U+00A4) in its prefix or suffix.
-     *
-     * @param text the string to parse
-     * @param result output parameter to receive result. This will have
-     * its currency set to the parsed ISO currency code.
-     * @param pos input-output position; on input, the position within
-     * text to match; must have 0 <= pos.getIndex() < text.length();
-     * on output, the position after the last matched character. If
-     * the parse fails, the position in unchanged upon output.
-     * @return a reference to result
-     * @internal
-     */
-    virtual Formattable& parseCurrency(const UnicodeString& text,
-                                       Formattable& result,
-                                       ParsePosition& pos) const;
-
-    /**
-     * Returns the decimal format symbols, which is generally not changed
-     * by the programmer or user.
-     * @return desired DecimalFormatSymbols
-     * @see DecimalFormatSymbols
-     * @stable ICU 2.0
-     */
-    virtual const DecimalFormatSymbols* getDecimalFormatSymbols(void) const;
-
-    /**
-     * Sets the decimal format symbols, which is generally not changed
-     * by the programmer or user.
-     * @param symbolsToAdopt DecimalFormatSymbols to be adopted.
-     * @stable ICU 2.0
-     */
-    virtual void adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt);
-
-    /**
-     * Sets the decimal format symbols, which is generally not changed
-     * by the programmer or user.
-     * @param symbols DecimalFormatSymbols.
-     * @stable ICU 2.0
-     */
-    virtual void setDecimalFormatSymbols(const DecimalFormatSymbols& symbols);
-
-
-    /**
-     * Get the positive prefix.
-     *
-     * @param result    Output param which will receive the positive prefix.
-     * @return          A reference to 'result'.
-     * Examples: +123, $123, sFr123
-     * @stable ICU 2.0
-     */
-    UnicodeString& getPositivePrefix(UnicodeString& result) const;
-
-    /**
-     * Set the positive prefix.
-     *
-     * @param newValue    the new value of the the positive prefix to be set.
-     * Examples: +123, $123, sFr123
-     * @stable ICU 2.0
-     */
-    virtual void setPositivePrefix(const UnicodeString& newValue);
-
-    /**
-     * Get the negative prefix.
-     *
-     * @param result    Output param which will receive the negative prefix.
-     * @return          A reference to 'result'.
-     * Examples: -123, ($123) (with negative suffix), sFr-123
-     * @stable ICU 2.0
-     */
-    UnicodeString& getNegativePrefix(UnicodeString& result) const;
-
-    /**
-     * Set the negative prefix.
-     *
-     * @param newValue    the new value of the the negative prefix to be set.
-     * Examples: -123, ($123) (with negative suffix), sFr-123
-     * @stable ICU 2.0
-     */
-    virtual void setNegativePrefix(const UnicodeString& newValue);
-
-    /**
-     * Get the positive suffix.
-     *
-     * @param result    Output param which will receive the positive suffix.
-     * @return          A reference to 'result'.
-     * Example: 123%
-     * @stable ICU 2.0
-     */
-    UnicodeString& getPositiveSuffix(UnicodeString& result) const;
-
-    /**
-     * Set the positive suffix.
-     *
-     * @param newValue    the new value of the positive suffix to be set.
-     * Example: 123%
-     * @stable ICU 2.0
-     */
-    virtual void setPositiveSuffix(const UnicodeString& newValue);
-
-    /**
-     * Get the negative suffix.
-     *
-     * @param result    Output param which will receive the negative suffix.
-     * @return          A reference to 'result'.
-     * Examples: -123%, ($123) (with positive suffixes)
-     * @stable ICU 2.0
-     */
-    UnicodeString& getNegativeSuffix(UnicodeString& result) const;
-
-    /**
-     * Set the negative suffix.
-     *
-     * @param newValue    the new value of the negative suffix to be set.
-     * Examples: 123%
-     * @stable ICU 2.0
-     */
-    virtual void setNegativeSuffix(const UnicodeString& newValue);
-
-    /**
-     * Get the multiplier for use in percent, permill, etc.
-     * For a percentage, set the suffixes to have "%" and the multiplier to be 100.
-     * (For Arabic, use arabic percent symbol).
-     * For a permill, set the suffixes to have "\\u2031" and the multiplier to be 1000.
-     *
-     * @return    the multiplier for use in percent, permill, etc.
-     * Examples: with 100, 1.23 -> "123", and "123" -> 1.23
-     * @stable ICU 2.0
-     */
-    int32_t getMultiplier(void) const;
-
-    /**
-     * Set the multiplier for use in percent, permill, etc.
-     * For a percentage, set the suffixes to have "%" and the multiplier to be 100.
-     * (For Arabic, use arabic percent symbol).
-     * For a permill, set the suffixes to have "\\u2031" and the multiplier to be 1000.
-     *
-     * @param newValue    the new value of the multiplier for use in percent, permill, etc.
-     * Examples: with 100, 1.23 -> "123", and "123" -> 1.23
-     * @stable ICU 2.0
-     */
-    virtual void setMultiplier(int32_t newValue);
-
-    /**
-     * Get the rounding increment.
-     * @return A positive rounding increment, or 0.0 if rounding
-     * is not in effect.
-     * @see #setRoundingIncrement
-     * @see #getRoundingMode
-     * @see #setRoundingMode
-     * @stable ICU 2.0
-     */
-    virtual double getRoundingIncrement(void) const;
-
-    /**
-     * Set the rounding increment.  This method also controls whether
-     * rounding is enabled.
-     * @param newValue A positive rounding increment, or 0.0 to disable rounding.
-     * Negative increments are equivalent to 0.0.
-     * @see #getRoundingIncrement
-     * @see #getRoundingMode
-     * @see #setRoundingMode
-     * @stable ICU 2.0
-     */
-    virtual void setRoundingIncrement(double newValue);
-
-    /**
-     * Get the rounding mode.
-     * @return A rounding mode
-     * @see #setRoundingIncrement
-     * @see #getRoundingIncrement
-     * @see #setRoundingMode
-     * @stable ICU 2.0
-     */
-    virtual ERoundingMode getRoundingMode(void) const;
-
-    /**
-     * Set the rounding mode.  This has no effect unless the rounding
-     * increment is greater than zero.
-     * @param roundingMode A rounding mode
-     * @see #setRoundingIncrement
-     * @see #getRoundingIncrement
-     * @see #getRoundingMode
-     * @stable ICU 2.0
-     */
-    virtual void setRoundingMode(ERoundingMode roundingMode);
-
-    /**
-     * Get the width to which the output of format() is padded.
-     * The width is counted in 16-bit code units.
-     * @return the format width, or zero if no padding is in effect
-     * @see #setFormatWidth
-     * @see #getPadCharacterString
-     * @see #setPadCharacter
-     * @see #getPadPosition
-     * @see #setPadPosition
-     * @stable ICU 2.0
-     */
-    virtual int32_t getFormatWidth(void) const;
-
-    /**
-     * Set the width to which the output of format() is padded.
-     * The width is counted in 16-bit code units.
-     * This method also controls whether padding is enabled.
-     * @param width the width to which to pad the result of
-     * format(), or zero to disable padding.  A negative
-     * width is equivalent to 0.
-     * @see #getFormatWidth
-     * @see #getPadCharacterString
-     * @see #setPadCharacter
-     * @see #getPadPosition
-     * @see #setPadPosition
-     * @stable ICU 2.0
-     */
-    virtual void setFormatWidth(int32_t width);
-
-    /**
-     * Get the pad character used to pad to the format width.  The
-     * default is ' '.
-     * @return a string containing the pad character. This will always
-     * have a length of one 32-bit code point.
-     * @see #setFormatWidth
-     * @see #getFormatWidth
-     * @see #setPadCharacter
-     * @see #getPadPosition
-     * @see #setPadPosition
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString getPadCharacterString() const;
-
-    /**
-     * Set the character used to pad to the format width.  If padding
-     * is not enabled, then this will take effect if padding is later
-     * enabled.
-     * @param padChar a string containing the pad charcter. If the string
-     * has length 0, then the pad characer is set to ' '.  Otherwise
-     * padChar.char32At(0) will be used as the pad character.
-     * @see #setFormatWidth
-     * @see #getFormatWidth
-     * @see #getPadCharacterString
-     * @see #getPadPosition
-     * @see #setPadPosition
-     * @stable ICU 2.0
-     */
-    virtual void setPadCharacter(const UnicodeString &padChar);
-
-    /**
-     * Get the position at which padding will take place.  This is the location
-     * at which padding will be inserted if the result of format()
-     * is shorter than the format width.
-     * @return the pad position, one of kPadBeforePrefix,
-     * kPadAfterPrefix, kPadBeforeSuffix, or
-     * kPadAfterSuffix.
-     * @see #setFormatWidth
-     * @see #getFormatWidth
-     * @see #setPadCharacter
-     * @see #getPadCharacterString
-     * @see #setPadPosition
-     * @see #EPadPosition
-     * @stable ICU 2.0
-     */
-    virtual EPadPosition getPadPosition(void) const;
-
-    /**
-     * Set the position at which padding will take place.  This is the location
-     * at which padding will be inserted if the result of format()
-     * is shorter than the format width.  This has no effect unless padding is
-     * enabled.
-     * @param padPos the pad position, one of kPadBeforePrefix,
-     * kPadAfterPrefix, kPadBeforeSuffix, or
-     * kPadAfterSuffix.
-     * @see #setFormatWidth
-     * @see #getFormatWidth
-     * @see #setPadCharacter
-     * @see #getPadCharacterString
-     * @see #getPadPosition
-     * @see #EPadPosition
-     * @stable ICU 2.0
-     */
-    virtual void setPadPosition(EPadPosition padPos);
-
-    /**
-     * Return whether or not scientific notation is used.
-     * @return TRUE if this object formats and parses scientific notation
-     * @see #setScientificNotation
-     * @see #getMinimumExponentDigits
-     * @see #setMinimumExponentDigits
-     * @see #isExponentSignAlwaysShown
-     * @see #setExponentSignAlwaysShown
-     * @stable ICU 2.0
-     */
-    virtual UBool isScientificNotation(void);
-
-    /**
-     * Set whether or not scientific notation is used. When scientific notation
-     * is used, the effective maximum number of integer digits is <= 8.  If the
-     * maximum number of integer digits is set to more than 8, the effective
-     * maximum will be 1.  This allows this call to generate a 'default' scientific
-     * number format without additional changes.
-     * @param useScientific TRUE if this object formats and parses scientific
-     * notation
-     * @see #isScientificNotation
-     * @see #getMinimumExponentDigits
-     * @see #setMinimumExponentDigits
-     * @see #isExponentSignAlwaysShown
-     * @see #setExponentSignAlwaysShown
-     * @stable ICU 2.0
-     */
-    virtual void setScientificNotation(UBool useScientific);
-
-    /**
-     * Return the minimum exponent digits that will be shown.
-     * @return the minimum exponent digits that will be shown
-     * @see #setScientificNotation
-     * @see #isScientificNotation
-     * @see #setMinimumExponentDigits
-     * @see #isExponentSignAlwaysShown
-     * @see #setExponentSignAlwaysShown
-     * @stable ICU 2.0
-     */
-    virtual int8_t getMinimumExponentDigits(void) const;
-
-    /**
-     * Set the minimum exponent digits that will be shown.  This has no
-     * effect unless scientific notation is in use.
-     * @param minExpDig a value >= 1 indicating the fewest exponent digits
-     * that will be shown.  Values less than 1 will be treated as 1.
-     * @see #setScientificNotation
-     * @see #isScientificNotation
-     * @see #getMinimumExponentDigits
-     * @see #isExponentSignAlwaysShown
-     * @see #setExponentSignAlwaysShown
-     * @stable ICU 2.0
-     */
-    virtual void setMinimumExponentDigits(int8_t minExpDig);
-
-    /**
-     * Return whether the exponent sign is always shown.
-     * @return TRUE if the exponent is always prefixed with either the
-     * localized minus sign or the localized plus sign, false if only negative
-     * exponents are prefixed with the localized minus sign.
-     * @see #setScientificNotation
-     * @see #isScientificNotation
-     * @see #setMinimumExponentDigits
-     * @see #getMinimumExponentDigits
-     * @see #setExponentSignAlwaysShown
-     * @stable ICU 2.0
-     */
-    virtual UBool isExponentSignAlwaysShown(void);
-
-    /**
-     * Set whether the exponent sign is always shown.  This has no effect
-     * unless scientific notation is in use.
-     * @param expSignAlways TRUE if the exponent is always prefixed with either
-     * the localized minus sign or the localized plus sign, false if only
-     * negative exponents are prefixed with the localized minus sign.
-     * @see #setScientificNotation
-     * @see #isScientificNotation
-     * @see #setMinimumExponentDigits
-     * @see #getMinimumExponentDigits
-     * @see #isExponentSignAlwaysShown
-     * @stable ICU 2.0
-     */
-    virtual void setExponentSignAlwaysShown(UBool expSignAlways);
-
-    /**
-     * Return the grouping size. Grouping size is the number of digits between
-     * grouping separators in the integer portion of a number.  For example,
-     * in the number "123,456.78", the grouping size is 3.
-     *
-     * @return    the grouping size.
-     * @see setGroupingSize
-     * @see NumberFormat::isGroupingUsed
-     * @see DecimalFormatSymbols::getGroupingSeparator
-     * @stable ICU 2.0
-     */
-    int32_t getGroupingSize(void) const;
-
-    /**
-     * Set the grouping size. Grouping size is the number of digits between
-     * grouping separators in the integer portion of a number.  For example,
-     * in the number "123,456.78", the grouping size is 3.
-     *
-     * @param newValue    the new value of the grouping size.
-     * @see getGroupingSize
-     * @see NumberFormat::setGroupingUsed
-     * @see DecimalFormatSymbols::setGroupingSeparator
-     * @stable ICU 2.0
-     */
-    virtual void setGroupingSize(int32_t newValue);
-
-    /**
-     * Return the secondary grouping size. In some locales one
-     * grouping interval is used for the least significant integer
-     * digits (the primary grouping size), and another is used for all
-     * others (the secondary grouping size).  A formatter supporting a
-     * secondary grouping size will return a positive integer unequal
-     * to the primary grouping size returned by
-     * getGroupingSize().  For example, if the primary
-     * grouping size is 4, and the secondary grouping size is 2, then
-     * the number 123456789 formats as "1,23,45,6789", and the pattern
-     * appears as "#,##,###0".
-     * @return the secondary grouping size, or a value less than
-     * one if there is none
-     * @see setSecondaryGroupingSize
-     * @see NumberFormat::isGroupingUsed
-     * @see DecimalFormatSymbols::getGroupingSeparator
-     * @stable ICU 2.4
-     */
-    int32_t getSecondaryGroupingSize(void) const;
-
-    /**
-     * Set the secondary grouping size. If set to a value less than 1,
-     * then secondary grouping is turned off, and the primary grouping
-     * size is used for all intervals, not just the least significant.
-     *
-     * @param newValue    the new value of the secondary grouping size.
-     * @see getSecondaryGroupingSize
-     * @see NumberFormat#setGroupingUsed
-     * @see DecimalFormatSymbols::setGroupingSeparator
-     * @stable ICU 2.4
-     */
-    virtual void setSecondaryGroupingSize(int32_t newValue);
-
-    /**
-     * Allows you to get the behavior of the decimal separator with integers.
-     * (The decimal separator will always appear with decimals.)
-     *
-     * @return    TRUE if the decimal separator always appear with decimals.
-     * Example: Decimal ON: 12345 -> 12345.; OFF: 12345 -> 12345
-     * @stable ICU 2.0
-     */
-    UBool isDecimalSeparatorAlwaysShown(void) const;
-
-    /**
-     * Allows you to set the behavior of the decimal separator with integers.
-     * (The decimal separator will always appear with decimals.)
-     *
-     * @param newValue    set TRUE if the decimal separator will always appear with decimals.
-     * Example: Decimal ON: 12345 -> 12345.; OFF: 12345 -> 12345
-     * @stable ICU 2.0
-     */
-    virtual void setDecimalSeparatorAlwaysShown(UBool newValue);
-
-    /**
-     * Synthesizes a pattern string that represents the current state
-     * of this Format object.
-     *
-     * @param result    Output param which will receive the pattern.
-     *                  Previous contents are deleted.
-     * @return          A reference to 'result'.
-     * @see applyPattern
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& toPattern(UnicodeString& result) const;
-
-    /**
-     * Synthesizes a localized pattern string that represents the current
-     * state of this Format object.
-     *
-     * @param result    Output param which will receive the localized pattern.
-     *                  Previous contents are deleted.
-     * @return          A reference to 'result'.
-     * @see applyPattern
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& toLocalizedPattern(UnicodeString& result) const;
- 
-    /**
-     * Apply the given pattern to this Format object.  A pattern is a
-     * short-hand specification for the various formatting properties.
-     * These properties can also be changed individually through the
-     * various setter methods.
-     * <P>
-     * There is no limit to integer digits are set
-     * by this routine, since that is the typical end-user desire;
-     * use setMaximumInteger if you want to set a real value.
-     * For negative numbers, use a second pattern, separated by a semicolon
-     * <pre>
-     * .      Example "#,#00.0#" -> 1,234.56
-     * </pre>
-     * This means a minimum of 2 integer digits, 1 fraction digit, and
-     * a maximum of 2 fraction digits.
-     * <pre>
-     * .      Example: "#,#00.0#;(#,#00.0#)" for negatives in parantheses.
-     * </pre>
-     * In negative patterns, the minimum and maximum counts are ignored;
-     * these are presumed to be set in the positive pattern.
-     *
-     * @param pattern    The pattern to be applied.
-     * @param parseError Struct to recieve information on position 
-     *                   of error if an error is encountered
-     * @param status     Output param set to success/failure code on
-     *                   exit. If the pattern is invalid, this will be
-     *                   set to a failure result.
-     * @stable ICU 2.0
-     */
-    virtual void applyPattern(const UnicodeString& pattern,
-                             UParseError& parseError,
-                             UErrorCode& status);
-    /**
-     * Sets the pattern.
-     * @param pattern   The pattern to be applied.
-     * @param status    Output param set to success/failure code on
-     *                  exit. If the pattern is invalid, this will be
-     *                  set to a failure result.
-     * @stable ICU 2.0
-     */  
-    virtual void applyPattern(const UnicodeString& pattern,
-                             UErrorCode& status);
-
-    /**
-     * Apply the given pattern to this Format object.  The pattern
-     * is assumed to be in a localized notation. A pattern is a
-     * short-hand specification for the various formatting properties.
-     * These properties can also be changed individually through the
-     * various setter methods.
-     * <P>
-     * There is no limit to integer digits are set
-     * by this routine, since that is the typical end-user desire;
-     * use setMaximumInteger if you want to set a real value.
-     * For negative numbers, use a second pattern, separated by a semicolon
-     * <pre>
-     * .      Example "#,#00.0#" -> 1,234.56
-     * </pre>
-     * This means a minimum of 2 integer digits, 1 fraction digit, and
-     * a maximum of 2 fraction digits.
-     *
-     * Example: "#,#00.0#;(#,#00.0#)" for negatives in parantheses.
-     *
-     * In negative patterns, the minimum and maximum counts are ignored;
-     * these are presumed to be set in the positive pattern.
-     *
-     * @param pattern   The localized pattern to be applied.
-     * @param parseError Struct to recieve information on position 
-     *                   of error if an error is encountered
-     * @param status    Output param set to success/failure code on
-     *                  exit. If the pattern is invalid, this will be
-     *                  set to a failure result.
-     * @stable ICU 2.0
-     */
-    virtual void applyLocalizedPattern(const UnicodeString& pattern,
-                                       UParseError& parseError,
-                                       UErrorCode& status);
-
-    /**
-     * Apply the given pattern to this Format object.
-     *
-     * @param pattern   The localized pattern to be applied.
-     * @param status    Output param set to success/failure code on
-     *                  exit. If the pattern is invalid, this will be
-     *                  set to a failure result.
-     * @stable ICU 2.0
-     */
-    virtual void applyLocalizedPattern(const UnicodeString& pattern,
-                                       UErrorCode& status);
-
-
-    /**
-     * Sets the maximum number of digits allowed in the integer portion of a
-     * number. This override limits the integer digit count to 309.
-     *
-     * @param newValue    the new value of the maximum number of digits 
-     *                      allowed in the integer portion of a number.
-     * @see NumberFormat#setMaximumIntegerDigits
-     * @stable ICU 2.0
-     */
-    virtual void setMaximumIntegerDigits(int32_t newValue);
-
-    /**
-     * Sets the minimum number of digits allowed in the integer portion of a
-     * number. This override limits the integer digit count to 309.
-     * 
-     * @param newValue    the new value of the minimum number of digits 
-     *                      allowed in the integer portion of a number.
-     * @see NumberFormat#setMinimumIntegerDigits
-     * @stable ICU 2.0
-     */
-    virtual void setMinimumIntegerDigits(int32_t newValue);
-
-    /**
-     * Sets the maximum number of digits allowed in the fraction portion of a
-     * number. This override limits the fraction digit count to 340.
-     *
-     * @param newValue    the new value of the maximum number of digits 
-     *                    allowed in the fraction portion of a number.
-     * @see NumberFormat#setMaximumFractionDigits
-     * @stable ICU 2.0
-     */
-    virtual void setMaximumFractionDigits(int32_t newValue);
-
-    /**
-     * Sets the minimum number of digits allowed in the fraction portion of a
-     * number. This override limits the fraction digit count to 340.
-     *
-     * @param newValue    the new value of the minimum number of digits 
-     *                    allowed in the fraction portion of a number.
-     * @see NumberFormat#setMinimumFractionDigits
-     * @stable ICU 2.0
-     */
-    virtual void setMinimumFractionDigits(int32_t newValue);
-
-    /**
-     * Returns the minimum number of significant digits that will be
-     * displayed. This value has no effect unless areSignificantDigitsUsed()
-     * returns true.
-     * @return the fewest significant digits that will be shown
-     * @stable ICU 3.0
-     */
-    int32_t getMinimumSignificantDigits() const;
-
-    /**
-     * Returns the maximum number of significant digits that will be
-     * displayed. This value has no effect unless areSignificantDigitsUsed()
-     * returns true.
-     * @return the most significant digits that will be shown
-     * @stable ICU 3.0
-     */
-    int32_t getMaximumSignificantDigits() const;
-
-    /**
-     * Sets the minimum number of significant digits that will be
-     * displayed.  If <code>min</code> is less than one then it is set
-     * to one.  If the maximum significant digits count is less than
-     * <code>min</code>, then it is set to <code>min</code>. This
-     * value has no effect unless areSignificantDigits() returns true.
-     * @param min the fewest significant digits to be shown 
-     * @stable ICU 3.0
-     */
-    void setMinimumSignificantDigits(int32_t min);
-
-    /**
-     * Sets the maximum number of significant digits that will be
-     * displayed.  If <code>max</code> is less than one then it is set
-     * to one.  If the minimum significant digits count is greater
-     * than <code>max</code>, then it is set to <code>max</code>.
-     * This value has no effect unless areSignificantDigits() returns
-     * true.
-     * @param max the most significant digits to be shown 
-     * @stable ICU 3.0
-     */
-    void setMaximumSignificantDigits(int32_t max);
-
-    /**
-     * Returns true if significant digits are in use, or false if
-     * integer and fraction digit counts are in use.
-     * @return true if significant digits are in use
-     * @stable ICU 3.0
-     */
-    UBool areSignificantDigitsUsed() const;
-
-    /**
-     * Sets whether significant digits are in use, or integer and
-     * fraction digit counts are in use.
-     * @param useSignificantDigits true to use significant digits, or
-     * false to use integer and fraction digit counts
-     * @stable ICU 3.0
-     */
-    void setSignificantDigitsUsed(UBool useSignificantDigits);
-
- public:
-    /**
-     * Sets the currency used to display currency
-     * amounts.  This takes effect immediately, if this format is a
-     * currency format.  If this format is not a currency format, then
-     * the currency is used if and when this object becomes a
-     * currency format through the application of a new pattern.
-     * @param theCurrency a 3-letter ISO code indicating new currency
-     * to use.  It need not be null-terminated.  May be the empty
-     * string or NULL to indicate no currency.
-     * @param ec input-output error code
-     * @stable ICU 3.0
-     */
-    virtual void setCurrency(const UChar* theCurrency, UErrorCode& ec);
-
-    /**
-     * Sets the currency used to display currency amounts.  See
-     * setCurrency(const UChar*, UErrorCode&).
-     * @deprecated ICU 3.0. Use setCurrency(const UChar*, UErrorCode&).
-     */
-    virtual void setCurrency(const UChar* theCurrency);
-
-    /**
-     * The resource tags we use to retrieve decimal format data from
-     * locale resource bundles.
-     * @deprecated ICU 3.4. This string has no public purpose. Please don't use it.
-     */
-    static const char fgNumberPatterns[];
-
-public:
-
-    /**
-     * Return the class ID for this class.  This is useful only for
-     * comparing to a return value from getDynamicClassID().  For example:
-     * <pre>
-     * .      Base* polymorphic_pointer = createPolymorphicObject();
-     * .      if (polymorphic_pointer->getDynamicClassID() ==
-     * .          Derived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
-     * This method is to implement a simple version of RTTI, since not all
-     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
-     * clone() methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-
-private:
-    DecimalFormat(); // default constructor not implemented
-
-    int32_t precision(UBool isIntegral) const;
-
-    /**
-     * Do real work of constructing a new DecimalFormat.
-     */
-    void construct(UErrorCode&               status,
-                   UParseError&             parseErr,
-                   const UnicodeString*     pattern = 0,
-                   DecimalFormatSymbols*    symbolsToAdopt = 0
-                   );
-
-    /**
-     * Does the real work of generating a pattern.
-     *
-     * @param result     Output param which will receive the pattern.
-     *                   Previous contents are deleted.
-     * @param localized  TRUE return localized pattern.
-     * @return           A reference to 'result'.
-     */
-    UnicodeString& toPattern(UnicodeString& result, UBool localized) const;
-
-    /**
-     * Does the real work of applying a pattern.
-     * @param pattern    The pattern to be applied.
-     * @param localized  If true, the pattern is localized; else false.
-     * @param parseError Struct to recieve information on position 
-     *                   of error if an error is encountered
-     * @param status     Output param set to success/failure code on
-     *                   exit. If the pattern is invalid, this will be
-     *                   set to a failure result.
-     */
-    void applyPattern(const UnicodeString& pattern,
-                            UBool localized,
-                            UParseError& parseError,
-                            UErrorCode& status);
-    /**
-     * Do the work of formatting a number, either a double or a long.
-     *
-     * @param appendTo       Output parameter to receive result.
-     *                       Result is appended to existing contents.
-     * @param fieldPosition  On input: an alignment field, if desired.
-     *                       On output: the offsets of the alignment field.
-     * @param digits         the digits to be formatted.
-     * @param isInteger      if TRUE format the digits as Integer.
-     * @return               Reference to 'appendTo' parameter.
-     */
-    UnicodeString& subformat(UnicodeString& appendTo,
-                             FieldPosition& fieldPosition,
-                             DigitList& digits,
-                             UBool         isInteger) const;
-
-    void parse(const UnicodeString& text,
-               Formattable& result,
-               ParsePosition& pos,
-               UBool parseCurrency) const;
-
-    enum {
-        fgStatusInfinite,
-        fgStatusLength      // Leave last in list.
-    } StatusFlags;
-
-    UBool subparse(const UnicodeString& text, ParsePosition& parsePosition,
-                   DigitList& digits, UBool* status,
-                   UChar* currency) const;
-
-    int32_t skipPadding(const UnicodeString& text, int32_t position) const;
-
-    int32_t compareAffix(const UnicodeString& input,
-                         int32_t pos,
-                         UBool isNegative,
-                         UBool isPrefix,
-                         UChar* currency) const;
-    
-    static int32_t compareSimpleAffix(const UnicodeString& affix,
-                                      const UnicodeString& input,
-                                      int32_t pos,
-                                      UBool strict);
-    
-    static int32_t skipRuleWhiteSpace(const UnicodeString& text, int32_t pos);
-    
-    static int32_t skipUWhiteSpace(const UnicodeString& text, int32_t pos);
-    
-    int32_t compareComplexAffix(const UnicodeString& affixPat,
-                                const UnicodeString& input,
-                                int32_t pos,
-                                UChar* currency) const;
-
-    static int32_t match(const UnicodeString& text, int32_t pos, UChar32 ch);
-
-    static int32_t match(const UnicodeString& text, int32_t pos, const UnicodeString& str);
-	
-    static UBool matchSymbol(const UnicodeString &text, int32_t position, int32_t length, const UnicodeString &symbol,
-                             UnicodeSet *sset, UChar32 schar);
-	
-    /**
-     * Get a decimal format symbol.
-     * Returns a const reference to the symbol string.
-     * @internal
-     */
-    inline const UnicodeString &getConstSymbol(DecimalFormatSymbols::ENumberFormatSymbol symbol) const;
-
-    int32_t appendAffix(UnicodeString& buf, double number,
-                        UBool isNegative, UBool isPrefix) const;
-
-    /**
-     * Append an affix to the given UnicodeString, using quotes if
-     * there are special characters.  Single quotes themselves must be
-     * escaped in either case.
-     */
-    void appendAffixPattern(UnicodeString& appendTo, const UnicodeString& affix, 
-                            UBool localized) const;
-
-    void appendAffixPattern(UnicodeString& appendTo,
-                            const UnicodeString* affixPattern,
-                            const UnicodeString& expAffix, UBool localized) const;
-
-    void expandAffix(const UnicodeString& pattern,
-                     UnicodeString& affix,
-                     double number,
-                     UBool doFormat) const;
-
-    void expandAffixes();
-    
-    static double round(double a, ERoundingMode mode, UBool isNegative);
-
-    void addPadding(UnicodeString& appendTo,
-                    FieldPosition& fieldPosition,
-                    int32_t prefixLen, int32_t suffixLen) const;
-
-    UBool isGroupingPosition(int32_t pos) const;
-
-    void setCurrencyForSymbols();
-
-    /**
-     * Constants.
-     */
-    //static const int8_t fgMaxDigit; // The largest digit, in this case 9
-
-    /*transient*/ //DigitList* fDigitList;
-
-    UnicodeString           fPositivePrefix;
-    UnicodeString           fPositiveSuffix;
-    UnicodeString           fNegativePrefix;
-    UnicodeString           fNegativeSuffix;
-    UnicodeString*          fPosPrefixPattern;
-    UnicodeString*          fPosSuffixPattern;
-    UnicodeString*          fNegPrefixPattern;
-    UnicodeString*          fNegSuffixPattern;
-
-    /**
-     * Formatter for ChoiceFormat-based currency names.  If this field
-     * is not null, then delegate to it to format currency symbols.
-     * @since ICU 2.6
-     */
-    ChoiceFormat*           fCurrencyChoice;
-
-    int32_t                 fMultiplier;
-    int32_t                 fGroupingSize;
-    int32_t                 fGroupingSize2;
-    UBool                   fDecimalSeparatorAlwaysShown;
-    /*transient*/ UBool     fIsCurrencyFormat;
-    DecimalFormatSymbols*   fSymbols;
-
-    UBool                   fUseSignificantDigits;
-    int32_t                 fMinSignificantDigits;
-    int32_t                 fMaxSignificantDigits;
-
-    UBool                   fUseExponentialNotation;
-    int8_t                  fMinExponentDigits;
-    UBool                   fExponentSignAlwaysShown;
-
-    /* If fRoundingIncrement is NULL, there is no rounding.  Otherwise, round to
-     * fRoundingIncrement.getDouble().  Since this operation may be expensive,
-     * we cache the result in fRoundingDouble.  All methods that update
-     * fRoundingIncrement also update fRoundingDouble. */
-    DigitList*              fRoundingIncrement;
-    /*transient*/ double    fRoundingDouble;
-    ERoundingMode           fRoundingMode;
-
-    UChar32                 fPad;
-    int32_t                 fFormatWidth;
-    EPadPosition            fPadPosition;
-
-protected:
-
-    /**
-     * Returns the currency in effect for this formatter.  Subclasses
-     * should override this method as needed.  Unlike getCurrency(),
-     * this method should never return "".
-     * @result output parameter for null-terminated result, which must
-     * have a capacity of at least 4
-     * @internal
-     */
-    virtual void getEffectiveCurrency(UChar* result, UErrorCode& ec) const;
-
-  /** number of integer digits 
-   * @stable ICU 2.4
-   */  
-    static const int32_t  kDoubleIntegerDigits;
-  /** number of fraction digits 
-   * @stable ICU 2.4
-   */  
-    static const int32_t  kDoubleFractionDigits;
-
-    /**
-     * When someone turns on scientific mode, we assume that more than this
-     * number of digits is due to flipping from some other mode that didn't
-     * restrict the maximum, and so we force 1 integer digit.  We don't bother
-     * to track and see if someone is using exponential notation with more than
-     * this number, it wouldn't make sense anyway, and this is just to make sure
-     * that someone turning on scientific mode with default settings doesn't
-     * end up with lots of zeroes.
-     * @stable ICU 2.8
-     */
-    static const int32_t  kMaxScientificIntegerDigits;
-};
-
-inline UnicodeString&
-DecimalFormat::format(const Formattable& obj,
-                      UnicodeString& appendTo,
-                      UErrorCode& status) const {
-    // Don't use Format:: - use immediate base class only,
-    // in case immediate base modifies behavior later.
-    return NumberFormat::format(obj, appendTo, status);
-}
-
-inline UnicodeString&
-DecimalFormat::format(double number,
-                      UnicodeString& appendTo) const {
-    FieldPosition pos(0);
-    return format(number, appendTo, pos);
-}
-
-inline UnicodeString&
-DecimalFormat::format(int32_t number,
-                      UnicodeString& appendTo) const {
-    FieldPosition pos(0);
-    return format((int64_t)number, appendTo, pos);
-}
-
-inline const UnicodeString &
-DecimalFormat::getConstSymbol(DecimalFormatSymbols::ENumberFormatSymbol symbol) const {
-    return fSymbols->getConstSymbol(symbol);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _DECIMFMT
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/decimfmt.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/decimfmt.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/decimfmt.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/decimfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1901 @@
+/*
+********************************************************************************
+*   Copyright (C) 1997-2009, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+********************************************************************************
+*
+* File DECIMFMT.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   02/19/97    aliu        Converted from java.
+*   03/20/97    clhuang     Updated per C++ implementation.
+*   04/03/97    aliu        Rewrote parsing and formatting completely, and
+*                           cleaned up and debugged.  Actually works now.
+*   04/17/97    aliu        Changed DigitCount to int per code review.
+*   07/10/97    helena      Made ParsePosition a class and get rid of the function
+*                           hiding problems.
+*   09/09/97    aliu        Ported over support for exponential formats.
+*    07/20/98    stephen        Changed documentation
+********************************************************************************
+*/
+ 
+#ifndef DECIMFMT_H
+#define DECIMFMT_H
+ 
+#include "unicode/utypes.h"
+/**
+ * \file 
+ * \brief C++ API: Formats decimal numbers.
+ */
+ 
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/dcfmtsym.h"
+#include "unicode/numfmt.h"
+#include "unicode/locid.h"
+
+U_NAMESPACE_BEGIN
+
+class DigitList;
+class ChoiceFormat;
+class UnicodeSet;
+
+/**
+ * DecimalFormat is a concrete subclass of NumberFormat that formats decimal
+ * numbers. It has a variety of features designed to make it possible to parse
+ * and format numbers in any locale, including support for Western, Arabic, or
+ * Indic digits.  It also supports different flavors of numbers, including
+ * integers ("123"), fixed-point numbers ("123.4"), scientific notation
+ * ("1.23E4"), percentages ("12%"), and currency amounts ("$123").  All of these
+ * flavors can be easily localized.
+ *
+ * <p>To obtain a NumberFormat for a specific locale (including the default
+ * locale) call one of NumberFormat's factory methods such as
+ * createInstance(). Do not call the DecimalFormat constructors directly, unless
+ * you know what you are doing, since the NumberFormat factory methods may
+ * return subclasses other than DecimalFormat.
+ *
+ * <p><strong>Example Usage</strong>
+ *
+ * \code
+ *     // Normally we would have a GUI with a menu for this
+ *     int32_t locCount;
+ *     const Locale* locales = NumberFormat::getAvailableLocales(locCount);
+ * 
+ *     double myNumber = -1234.56;
+ *     UErrorCode success = U_ZERO_ERROR;
+ *     NumberFormat* form;
+ * 
+ *     // Print out a number with the localized number, currency and percent
+ *     // format for each locale.
+ *     UnicodeString countryName;
+ *     UnicodeString displayName;
+ *     UnicodeString str;
+ *     UnicodeString pattern;
+ *     Formattable fmtable;
+ *     for (int32_t j = 0; j < 3; ++j) {
+ *         cout << endl << "FORMAT " << j << endl;
+ *         for (int32_t i = 0; i < locCount; ++i) {
+ *             if (locales[i].getCountry(countryName).size() == 0) {
+ *                 // skip language-only
+ *                 continue;
+ *             }
+ *             switch (j) {
+ *             case 0:
+ *                 form = NumberFormat::createInstance(locales[i], success ); break;
+ *             case 1:
+ *                 form = NumberFormat::createCurrencyInstance(locales[i], success ); break;
+ *             default:
+ *                 form = NumberFormat::createPercentInstance(locales[i], success ); break;
+ *             }
+ *             if (form) {
+ *                 str.remove();
+ *                 pattern = ((DecimalFormat*)form)->toPattern(pattern);
+ *                 cout << locales[i].getDisplayName(displayName) << ": " << pattern;
+ *                 cout << "  ->  " << form->format(myNumber,str) << endl;
+ *                 form->parse(form->format(myNumber,str), fmtable, success);
+ *                 delete form;  
+ *             }
+ *         }
+ *     }
+ * \endcode
+ *
+ * <p><strong>Patterns</strong>
+ *
+ * <p>A DecimalFormat consists of a <em>pattern</em> and a set of
+ * <em>symbols</em>.  The pattern may be set directly using
+ * applyPattern(), or indirectly using other API methods which
+ * manipulate aspects of the pattern, such as the minimum number of integer
+ * digits.  The symbols are stored in a DecimalFormatSymbols
+ * object.  When using the NumberFormat factory methods, the
+ * pattern and symbols are read from ICU's locale data.
+ * 
+ * <p><strong>Special Pattern Characters</strong>
+ *
+ * <p>Many characters in a pattern are taken literally; they are matched during
+ * parsing and output unchanged during formatting.  Special characters, on the
+ * other hand, stand for other characters, strings, or classes of characters.
+ * For example, the '#' character is replaced by a localized digit.  Often the
+ * replacement character is the same as the pattern character; in the U.S. locale,
+ * the ',' grouping character is replaced by ','.  However, the replacement is
+ * still happening, and if the symbols are modified, the grouping character
+ * changes.  Some special characters affect the behavior of the formatter by
+ * their presence; for example, if the percent character is seen, then the
+ * value is multiplied by 100 before being displayed.
+ *
+ * <p>To insert a special character in a pattern as a literal, that is, without
+ * any special meaning, the character must be quoted.  There are some exceptions to
+ * this which are noted below.
+ *
+ * <p>The characters listed here are used in non-localized patterns.  Localized
+ * patterns use the corresponding characters taken from this formatter's
+ * DecimalFormatSymbols object instead, and these characters lose
+ * their special status.  Two exceptions are the currency sign and quote, which
+ * are not localized.
+ *
+ * <table border=0 cellspacing=3 cellpadding=0>
+ *   <tr bgcolor="#ccccff">
+ *     <td align=left><strong>Symbol</strong>
+ *     <td align=left><strong>Location</strong>
+ *     <td align=left><strong>Localized?</strong>
+ *     <td align=left><strong>Meaning</strong>
+ *   <tr valign=top>
+ *     <td><code>0</code>
+ *     <td>Number
+ *     <td>Yes
+ *     <td>Digit
+ *   <tr valign=top bgcolor="#eeeeff">
+ *     <td><code>1-9</code>
+ *     <td>Number
+ *     <td>Yes
+ *     <td>'1' through '9' indicate rounding.
+ *   <tr valign=top>
+ *     <td><code>\htmlonly&#x40;\endhtmlonly</code> <!--doxygen doesn't like @-->
+ *     <td>Number
+ *     <td>No
+ *     <td>Significant digit
+ *   <tr valign=top bgcolor="#eeeeff">
+ *     <td><code>#</code>
+ *     <td>Number
+ *     <td>Yes
+ *     <td>Digit, zero shows as absent
+ *   <tr valign=top>
+ *     <td><code>.</code>
+ *     <td>Number
+ *     <td>Yes
+ *     <td>Decimal separator or monetary decimal separator
+ *   <tr valign=top bgcolor="#eeeeff">
+ *     <td><code>-</code>
+ *     <td>Number
+ *     <td>Yes
+ *     <td>Minus sign
+ *   <tr valign=top>
+ *     <td><code>,</code>
+ *     <td>Number
+ *     <td>Yes
+ *     <td>Grouping separator
+ *   <tr valign=top bgcolor="#eeeeff">
+ *     <td><code>E</code>
+ *     <td>Number
+ *     <td>Yes
+ *     <td>Separates mantissa and exponent in scientific notation.
+ *         <em>Need not be quoted in prefix or suffix.</em>
+ *   <tr valign=top>
+ *     <td><code>+</code>
+ *     <td>Exponent
+ *     <td>Yes
+ *     <td>Prefix positive exponents with localized plus sign.
+ *         <em>Need not be quoted in prefix or suffix.</em>
+ *   <tr valign=top bgcolor="#eeeeff">
+ *     <td><code>;</code>
+ *     <td>Subpattern boundary
+ *     <td>Yes
+ *     <td>Separates positive and negative subpatterns
+ *   <tr valign=top>
+ *     <td><code>\%</code>
+ *     <td>Prefix or suffix
+ *     <td>Yes
+ *     <td>Multiply by 100 and show as percentage
+ *   <tr valign=top bgcolor="#eeeeff">
+ *     <td><code>\\u2030</code>
+ *     <td>Prefix or suffix
+ *     <td>Yes
+ *     <td>Multiply by 1000 and show as per mille
+ *   <tr valign=top>
+ *     <td><code>\htmlonly&curren;\endhtmlonly</code> (<code>\\u00A4</code>)
+ *     <td>Prefix or suffix
+ *     <td>No
+ *     <td>Currency sign, replaced by currency symbol.  If
+ *         doubled, replaced by international currency symbol.
+ *         If present in a pattern, the monetary decimal separator
+ *         is used instead of the decimal separator.
+ *   <tr valign=top bgcolor="#eeeeff">
+ *     <td><code>'</code>
+ *     <td>Prefix or suffix
+ *     <td>No
+ *     <td>Used to quote special characters in a prefix or suffix,
+ *         for example, <code>"'#'#"</code> formats 123 to
+ *         <code>"#123"</code>.  To create a single quote
+ *         itself, use two in a row: <code>"# o''clock"</code>.
+ *   <tr valign=top>
+ *     <td><code>*</code>
+ *     <td>Prefix or suffix boundary
+ *     <td>Yes
+ *     <td>Pad escape, precedes pad character
+ * </table>
+ *
+ * <p>A DecimalFormat pattern contains a postive and negative
+ * subpattern, for example, "#,##0.00;(#,##0.00)".  Each subpattern has a
+ * prefix, a numeric part, and a suffix.  If there is no explicit negative
+ * subpattern, the negative subpattern is the localized minus sign prefixed to the
+ * positive subpattern. That is, "0.00" alone is equivalent to "0.00;-0.00".  If there
+ * is an explicit negative subpattern, it serves only to specify the negative
+ * prefix and suffix; the number of digits, minimal digits, and other
+ * characteristics are ignored in the negative subpattern. That means that
+ * "#,##0.0#;(#)" has precisely the same result as "#,##0.0#;(#,##0.0#)".
+ *
+ * <p>The prefixes, suffixes, and various symbols used for infinity, digits,
+ * thousands separators, decimal separators, etc. may be set to arbitrary
+ * values, and they will appear properly during formatting.  However, care must
+ * be taken that the symbols and strings do not conflict, or parsing will be
+ * unreliable.  For example, either the positive and negative prefixes or the
+ * suffixes must be distinct for parse() to be able
+ * to distinguish positive from negative values.  Another example is that the
+ * decimal separator and thousands separator should be distinct characters, or
+ * parsing will be impossible.
+ *
+ * <p>The <em>grouping separator</em> is a character that separates clusters of
+ * integer digits to make large numbers more legible.  It commonly used for
+ * thousands, but in some locales it separates ten-thousands.  The <em>grouping
+ * size</em> is the number of digits between the grouping separators, such as 3
+ * for "100,000,000" or 4 for "1 0000 0000". There are actually two different
+ * grouping sizes: One used for the least significant integer digits, the
+ * <em>primary grouping size</em>, and one used for all others, the
+ * <em>secondary grouping size</em>.  In most locales these are the same, but
+ * sometimes they are different. For example, if the primary grouping interval
+ * is 3, and the secondary is 2, then this corresponds to the pattern
+ * "#,##,##0", and the number 123456789 is formatted as "12,34,56,789".  If a
+ * pattern contains multiple grouping separators, the interval between the last
+ * one and the end of the integer defines the primary grouping size, and the
+ * interval between the last two defines the secondary grouping size. All others
+ * are ignored, so "#,##,###,####" == "###,###,####" == "##,#,###,####".
+ *
+ * <p>Illegal patterns, such as "#.#.#" or "#.###,###", will cause
+ * DecimalFormat to set a failing UErrorCode.
+ *
+ * <p><strong>Pattern BNF</strong>
+ *
+ * <pre>
+ * pattern    := subpattern (';' subpattern)?
+ * subpattern := prefix? number exponent? suffix?
+ * number     := (integer ('.' fraction)?) | sigDigits
+ * prefix     := '\\u0000'..'\\uFFFD' - specialCharacters
+ * suffix     := '\\u0000'..'\\uFFFD' - specialCharacters
+ * integer    := '#'* '0'* '0'
+ * fraction   := '0'* '#'*
+ * sigDigits  := '#'* '@' '@'* '#'*
+ * exponent   := 'E' '+'? '0'* '0'
+ * padSpec    := '*' padChar
+ * padChar    := '\\u0000'..'\\uFFFD' - quote
+ * &nbsp;
+ * Notation:
+ *   X*       0 or more instances of X
+ *   X?       0 or 1 instances of X
+ *   X|Y      either X or Y
+ *   C..D     any character from C up to D, inclusive
+ *   S-T      characters in S, except those in T
+ * </pre>
+ * The first subpattern is for positive numbers. The second (optional)
+ * subpattern is for negative numbers.
+ * 
+ * <p>Not indicated in the BNF syntax above:
+ *
+ * <ul><li>The grouping separator ',' can occur inside the integer and
+ * sigDigits elements, between any two pattern characters of that
+ * element, as long as the integer or sigDigits element is not
+ * followed by the exponent element.
+ *
+ * <li>Two grouping intervals are recognized: That between the
+ *     decimal point and the first grouping symbol, and that
+ *     between the first and second grouping symbols. These
+ *     intervals are identical in most locales, but in some
+ *     locales they differ. For example, the pattern
+ *     &quot;#,##,###&quot; formats the number 123456789 as
+ *     &quot;12,34,56,789&quot;.</li>
+ * 
+ * <li>The pad specifier <code>padSpec</code> may appear before the prefix,
+ * after the prefix, before the suffix, after the suffix, or not at all.
+ *
+ * <li>In place of '0', the digits '1' through '9' may be used to
+ * indicate a rounding increment.
+ * </ul>
+ *
+ * <p><strong>Parsing</strong>
+ *
+ * <p>DecimalFormat parses all Unicode characters that represent
+ * decimal digits, as defined by u_charDigitValue().  In addition,
+ * DecimalFormat also recognizes as digits the ten consecutive
+ * characters starting with the localized zero digit defined in the
+ * DecimalFormatSymbols object.  During formatting, the
+ * DecimalFormatSymbols-based digits are output.
+ *
+ * <p>During parsing, grouping separators are ignored.
+ *
+ * <p>If parse(UnicodeString&,Formattable&,ParsePosition&)
+ * fails to parse a string, it leaves the parse position unchanged.
+ * The convenience method parse(UnicodeString&,Formattable&,UErrorCode&)
+ * indicates parse failure by setting a failing
+ * UErrorCode.
+ *
+ * <p><strong>Formatting</strong>
+ *
+ * <p>Formatting is guided by several parameters, all of which can be
+ * specified either using a pattern or using the API.  The following
+ * description applies to formats that do not use <a href="#sci">scientific
+ * notation</a> or <a href="#sigdig">significant digits</a>.
+ *
+ * <ul><li>If the number of actual integer digits exceeds the
+ * <em>maximum integer digits</em>, then only the least significant
+ * digits are shown.  For example, 1997 is formatted as "97" if the
+ * maximum integer digits is set to 2.
+ *
+ * <li>If the number of actual integer digits is less than the
+ * <em>minimum integer digits</em>, then leading zeros are added.  For
+ * example, 1997 is formatted as "01997" if the minimum integer digits
+ * is set to 5.
+ *
+ * <li>If the number of actual fraction digits exceeds the <em>maximum
+ * fraction digits</em>, then half-even rounding it performed to the
+ * maximum fraction digits.  For example, 0.125 is formatted as "0.12"
+ * if the maximum fraction digits is 2.  This behavior can be changed
+ * by specifying a rounding increment and a rounding mode.
+ *
+ * <li>If the number of actual fraction digits is less than the
+ * <em>minimum fraction digits</em>, then trailing zeros are added.
+ * For example, 0.125 is formatted as "0.1250" if the mimimum fraction
+ * digits is set to 4.
+ *
+ * <li>Trailing fractional zeros are not displayed if they occur
+ * <em>j</em> positions after the decimal, where <em>j</em> is less
+ * than the maximum fraction digits. For example, 0.10004 is
+ * formatted as "0.1" if the maximum fraction digits is four or less.
+ * </ul>
+ *
+ * <p><strong>Special Values</strong>
+ *
+ * <p><code>NaN</code> is represented as a single character, typically
+ * <code>\\uFFFD</code>.  This character is determined by the
+ * DecimalFormatSymbols object.  This is the only value for which
+ * the prefixes and suffixes are not used.
+ *
+ * <p>Infinity is represented as a single character, typically
+ * <code>\\u221E</code>, with the positive or negative prefixes and suffixes
+ * applied.  The infinity character is determined by the
+ * DecimalFormatSymbols object.
+ *
+ * <a name="sci"><strong>Scientific Notation</strong></a>
+ *
+ * <p>Numbers in scientific notation are expressed as the product of a mantissa
+ * and a power of ten, for example, 1234 can be expressed as 1.234 x 10<sup>3</sup>. The
+ * mantissa is typically in the half-open interval [1.0, 10.0) or sometimes [0.0, 1.0),
+ * but it need not be.  DecimalFormat supports arbitrary mantissas.
+ * DecimalFormat can be instructed to use scientific
+ * notation through the API or through the pattern.  In a pattern, the exponent
+ * character immediately followed by one or more digit characters indicates
+ * scientific notation.  Example: "0.###E0" formats the number 1234 as
+ * "1.234E3".
+ *
+ * <ul>
+ * <li>The number of digit characters after the exponent character gives the
+ * minimum exponent digit count.  There is no maximum.  Negative exponents are
+ * formatted using the localized minus sign, <em>not</em> the prefix and suffix
+ * from the pattern.  This allows patterns such as "0.###E0 m/s".  To prefix
+ * positive exponents with a localized plus sign, specify '+' between the
+ * exponent and the digits: "0.###E+0" will produce formats "1E+1", "1E+0",
+ * "1E-1", etc.  (In localized patterns, use the localized plus sign rather than
+ * '+'.)
+ *
+ * <li>The minimum number of integer digits is achieved by adjusting the
+ * exponent.  Example: 0.00123 formatted with "00.###E0" yields "12.3E-4".  This
+ * only happens if there is no maximum number of integer digits.  If there is a
+ * maximum, then the minimum number of integer digits is fixed at one.
+ *
+ * <li>The maximum number of integer digits, if present, specifies the exponent
+ * grouping.  The most common use of this is to generate <em>engineering
+ * notation</em>, in which the exponent is a multiple of three, e.g.,
+ * "##0.###E0".  The number 12345 is formatted using "##0.####E0" as "12.345E3".
+ *
+ * <li>When using scientific notation, the formatter controls the
+ * digit counts using significant digits logic.  The maximum number of
+ * significant digits limits the total number of integer and fraction
+ * digits that will be shown in the mantissa; it does not affect
+ * parsing.  For example, 12345 formatted with "##0.##E0" is "12.3E3".
+ * See the section on significant digits for more details.
+ *
+ * <li>The number of significant digits shown is determined as
+ * follows: If areSignificantDigitsUsed() returns false, then the
+ * minimum number of significant digits shown is one, and the maximum
+ * number of significant digits shown is the sum of the <em>minimum
+ * integer</em> and <em>maximum fraction</em> digits, and is
+ * unaffected by the maximum integer digits.  If this sum is zero,
+ * then all significant digits are shown.  If
+ * areSignificantDigitsUsed() returns true, then the significant digit
+ * counts are specified by getMinimumSignificantDigits() and
+ * getMaximumSignificantDigits().  In this case, the number of
+ * integer digits is fixed at one, and there is no exponent grouping.
+ *
+ * <li>Exponential patterns may not contain grouping separators.
+ * </ul>
+ *
+ * <a name="sigdig"><strong>Significant Digits</strong></a>
+ *
+ * <code>DecimalFormat</code> has two ways of controlling how many
+ * digits are shows: (a) significant digits counts, or (b) integer and
+ * fraction digit counts.  Integer and fraction digit counts are
+ * described above.  When a formatter is using significant digits
+ * counts, the number of integer and fraction digits is not specified
+ * directly, and the formatter settings for these counts are ignored.
+ * Instead, the formatter uses however many integer and fraction
+ * digits are required to display the specified number of significant
+ * digits.  Examples:
+ *
+ * <table border=0 cellspacing=3 cellpadding=0>
+ *   <tr bgcolor="#ccccff">
+ *     <td align=left>Pattern
+ *     <td align=left>Minimum significant digits
+ *     <td align=left>Maximum significant digits
+ *     <td align=left>Number
+ *     <td align=left>Output of format()
+ *   <tr valign=top>
+ *     <td><code>\@\@\@</code>
+ *     <td>3
+ *     <td>3
+ *     <td>12345
+ *     <td><code>12300</code>
+ *   <tr valign=top bgcolor="#eeeeff">
+ *     <td><code>\@\@\@</code>
+ *     <td>3
+ *     <td>3
+ *     <td>0.12345
+ *     <td><code>0.123</code>
+ *   <tr valign=top>
+ *     <td><code>\@\@##</code>
+ *     <td>2
+ *     <td>4
+ *     <td>3.14159
+ *     <td><code>3.142</code>
+ *   <tr valign=top bgcolor="#eeeeff">
+ *     <td><code>\@\@##</code>
+ *     <td>2
+ *     <td>4
+ *     <td>1.23004
+ *     <td><code>1.23</code>
+ * </table>
+ *
+ * <ul>
+ * <li>Significant digit counts may be expressed using patterns that
+ * specify a minimum and maximum number of significant digits.  These
+ * are indicated by the <code>'@'</code> and <code>'#'</code>
+ * characters.  The minimum number of significant digits is the number
+ * of <code>'@'</code> characters.  The maximum number of significant
+ * digits is the number of <code>'@'</code> characters plus the number
+ * of <code>'#'</code> characters following on the right.  For
+ * example, the pattern <code>"@@@"</code> indicates exactly 3
+ * significant digits.  The pattern <code>"@##"</code> indicates from
+ * 1 to 3 significant digits.  Trailing zero digits to the right of
+ * the decimal separator are suppressed after the minimum number of
+ * significant digits have been shown.  For example, the pattern
+ * <code>"@##"</code> formats the number 0.1203 as
+ * <code>"0.12"</code>.
+ *
+ * <li>If a pattern uses significant digits, it may not contain a
+ * decimal separator, nor the <code>'0'</code> pattern character.
+ * Patterns such as <code>"@00"</code> or <code>"@.###"</code> are
+ * disallowed.
+ *
+ * <li>Any number of <code>'#'</code> characters may be prepended to
+ * the left of the leftmost <code>'@'</code> character.  These have no
+ * effect on the minimum and maximum significant digits counts, but
+ * may be used to position grouping separators.  For example,
+ * <code>"#,#@#"</code> indicates a minimum of one significant digits,
+ * a maximum of two significant digits, and a grouping size of three.
+ *
+ * <li>In order to enable significant digits formatting, use a pattern
+ * containing the <code>'@'</code> pattern character.  Alternatively,
+ * call setSignificantDigitsUsed(TRUE).
+ *
+ * <li>In order to disable significant digits formatting, use a
+ * pattern that does not contain the <code>'@'</code> pattern
+ * character. Alternatively, call setSignificantDigitsUsed(FALSE).
+ *
+ * <li>The number of significant digits has no effect on parsing.
+ *
+ * <li>Significant digits may be used together with exponential notation. Such
+ * patterns are equivalent to a normal exponential pattern with a minimum and
+ * maximum integer digit count of one, a minimum fraction digit count of
+ * <code>getMinimumSignificantDigits() - 1</code>, and a maximum fraction digit
+ * count of <code>getMaximumSignificantDigits() - 1</code>. For example, the
+ * pattern <code>"@@###E0"</code> is equivalent to <code>"0.0###E0"</code>.
+ *
+ * <li>If signficant digits are in use, then the integer and fraction
+ * digit counts, as set via the API, are ignored.  If significant
+ * digits are not in use, then the signficant digit counts, as set via
+ * the API, are ignored.
+ *
+ * </ul>
+ *
+ * <p><strong>Padding</strong>
+ *
+ * <p>DecimalFormat supports padding the result of
+ * format() to a specific width.  Padding may be specified either
+ * through the API or through the pattern syntax.  In a pattern the pad escape
+ * character, followed by a single pad character, causes padding to be parsed
+ * and formatted.  The pad escape character is '*' in unlocalized patterns, and
+ * can be localized using DecimalFormatSymbols::setSymbol() with a
+ * DecimalFormatSymbols::kPadEscapeSymbol
+ * selector.  For example, <code>"$*x#,##0.00"</code> formats 123 to
+ * <code>"$xx123.00"</code>, and 1234 to <code>"$1,234.00"</code>.
+ *
+ * <ul>
+ * <li>When padding is in effect, the width of the positive subpattern,
+ * including prefix and suffix, determines the format width.  For example, in
+ * the pattern <code>"* #0 o''clock"</code>, the format width is 10.
+ *
+ * <li>The width is counted in 16-bit code units (UChars).
+ *
+ * <li>Some parameters which usually do not matter have meaning when padding is
+ * used, because the pattern width is significant with padding.  In the pattern
+ * "* ##,##,#,##0.##", the format width is 14.  The initial characters "##,##,"
+ * do not affect the grouping size or maximum integer digits, but they do affect
+ * the format width.
+ *
+ * <li>Padding may be inserted at one of four locations: before the prefix,
+ * after the prefix, before the suffix, or after the suffix.  If padding is
+ * specified in any other location, applyPattern()
+ * sets a failing UErrorCode.  If there is no prefix,
+ * before the prefix and after the prefix are equivalent, likewise for the
+ * suffix.
+ *
+ * <li>When specified in a pattern, the 32-bit code point immediately
+ * following the pad escape is the pad character. This may be any character,
+ * including a special pattern character. That is, the pad escape
+ * <em>escapes</em> the following character. If there is no character after
+ * the pad escape, then the pattern is illegal.
+ *
+ * </ul>
+ *
+ * <p><strong>Rounding</strong>
+ *
+ * <p>DecimalFormat supports rounding to a specific increment.  For
+ * example, 1230 rounded to the nearest 50 is 1250.  1.234 rounded to the
+ * nearest 0.65 is 1.3.  The rounding increment may be specified through the API
+ * or in a pattern.  To specify a rounding increment in a pattern, include the
+ * increment in the pattern itself.  "#,#50" specifies a rounding increment of
+ * 50.  "#,##0.05" specifies a rounding increment of 0.05.
+ *
+ * <ul>
+ * <li>Rounding only affects the string produced by formatting.  It does
+ * not affect parsing or change any numerical values.
+ *
+ * <li>A <em>rounding mode</em> determines how values are rounded; see
+ * DecimalFormat::ERoundingMode.  Rounding increments specified in
+ * patterns use the default mode, DecimalFormat::kRoundHalfEven.
+ *
+ * <li>Some locales use rounding in their currency formats to reflect the
+ * smallest currency denomination.
+ *
+ * <li>In a pattern, digits '1' through '9' specify rounding, but otherwise
+ * behave identically to digit '0'.
+ * </ul>
+ *
+ * <p><strong>Synchronization</strong>
+ *
+ * <p>DecimalFormat objects are not synchronized.  Multiple
+ * threads should not access one formatter concurrently.
+ *
+ * <p><strong>Subclassing</strong>
+ *
+ * <p><em>User subclasses are not supported.</em> While clients may write
+ * subclasses, such code will not necessarily work and will not be
+ * guaranteed to work stably from release to release.
+ */
+class U_I18N_API DecimalFormat: public NumberFormat {
+public:
+    /**
+     * Rounding mode.
+     * @stable ICU 2.4
+     */
+    enum ERoundingMode {
+        kRoundCeiling,  /**< Round towards positive infinity */
+        kRoundFloor,    /**< Round towards negative infinity */
+        kRoundDown,     /**< Round towards zero */
+        kRoundUp,       /**< Round away from zero */
+        kRoundHalfEven, /**< Round towards the nearest integer, or
+                             towards the nearest even integer if equidistant */
+        kRoundHalfDown, /**< Round towards the nearest integer, or
+                             towards zero if equidistant */
+        kRoundHalfUp    /**< Round towards the nearest integer, or
+                             away from zero if equidistant */
+        // We don't support ROUND_UNNECESSARY
+    };
+
+    /**
+     * Pad position.
+     * @stable ICU 2.4
+     */
+    enum EPadPosition {
+        kPadBeforePrefix,
+        kPadAfterPrefix,
+        kPadBeforeSuffix,
+        kPadAfterSuffix
+    };
+
+    /**
+     * Create a DecimalFormat using the default pattern and symbols
+     * for the default locale. This is a convenient way to obtain a
+     * DecimalFormat when internationalization is not the main concern.
+     * <P>
+     * To obtain standard formats for a given locale, use the factory methods
+     * on NumberFormat such as createInstance. These factories will
+     * return the most appropriate sub-class of NumberFormat for a given
+     * locale.
+     * @param status    Output param set to success/failure code. If the
+     *                  pattern is invalid this will be set to a failure code.
+     * @stable ICU 2.0
+     */
+    DecimalFormat(UErrorCode& status);
+
+    /**
+     * Create a DecimalFormat from the given pattern and the symbols
+     * for the default locale. This is a convenient way to obtain a
+     * DecimalFormat when internationalization is not the main concern.
+     * <P>
+     * To obtain standard formats for a given locale, use the factory methods
+     * on NumberFormat such as createInstance. These factories will
+     * return the most appropriate sub-class of NumberFormat for a given
+     * locale.
+     * @param pattern   A non-localized pattern string.
+     * @param status    Output param set to success/failure code. If the
+     *                  pattern is invalid this will be set to a failure code.
+     * @stable ICU 2.0
+     */
+    DecimalFormat(const UnicodeString& pattern,
+                  UErrorCode& status);
+
+    /**
+     * Create a DecimalFormat from the given pattern and symbols.
+     * Use this constructor when you need to completely customize the
+     * behavior of the format.
+     * <P>
+     * To obtain standard formats for a given
+     * locale, use the factory methods on NumberFormat such as
+     * createInstance or createCurrencyInstance. If you need only minor adjustments
+     * to a standard format, you can modify the format returned by
+     * a NumberFormat factory method.
+     *
+     * @param pattern           a non-localized pattern string
+     * @param symbolsToAdopt    the set of symbols to be used.  The caller should not
+     *                          delete this object after making this call.
+     * @param status            Output param set to success/failure code. If the
+     *                          pattern is invalid this will be set to a failure code.
+     * @stable ICU 2.0
+     */
+    DecimalFormat(  const UnicodeString& pattern,
+                    DecimalFormatSymbols* symbolsToAdopt,
+                    UErrorCode& status);
+
+    /**
+     * Create a DecimalFormat from the given pattern and symbols.
+     * Use this constructor when you need to completely customize the
+     * behavior of the format.
+     * <P>
+     * To obtain standard formats for a given
+     * locale, use the factory methods on NumberFormat such as
+     * createInstance or createCurrencyInstance. If you need only minor adjustments
+     * to a standard format, you can modify the format returned by
+     * a NumberFormat factory method.
+     *
+     * @param pattern           a non-localized pattern string
+     * @param symbolsToAdopt    the set of symbols to be used.  The caller should not
+     *                          delete this object after making this call.
+     * @param parseError        Output param to receive errors occured during parsing 
+     * @param status            Output param set to success/failure code. If the
+     *                          pattern is invalid this will be set to a failure code.
+     * @stable ICU 2.0
+     */
+    DecimalFormat(  const UnicodeString& pattern,
+                    DecimalFormatSymbols* symbolsToAdopt,
+                    UParseError& parseError,
+                    UErrorCode& status);
+    /**
+     * Create a DecimalFormat from the given pattern and symbols.
+     * Use this constructor when you need to completely customize the
+     * behavior of the format.
+     * <P>
+     * To obtain standard formats for a given
+     * locale, use the factory methods on NumberFormat such as
+     * createInstance or createCurrencyInstance. If you need only minor adjustments
+     * to a standard format, you can modify the format returned by
+     * a NumberFormat factory method.
+     *
+     * @param pattern           a non-localized pattern string
+     * @param symbols   the set of symbols to be used
+     * @param status            Output param set to success/failure code. If the
+     *                          pattern is invalid this will be set to a failure code.
+     * @stable ICU 2.0
+     */
+    DecimalFormat(  const UnicodeString& pattern,
+                    const DecimalFormatSymbols& symbols,
+                    UErrorCode& status);
+
+    /**
+     * Copy constructor.
+     * 
+     * @param source    the DecimalFormat object to be copied from.
+     * @stable ICU 2.0
+     */
+    DecimalFormat(const DecimalFormat& source);
+
+    /**
+     * Assignment operator.
+     *
+     * @param rhs    the DecimalFormat object to be copied.
+     * @stable ICU 2.0
+     */
+    DecimalFormat& operator=(const DecimalFormat& rhs);
+
+    /**
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~DecimalFormat();
+
+    /**
+     * Clone this Format object polymorphically. The caller owns the
+     * result and should delete it when done.
+     *
+     * @return    a polymorphic copy of this DecimalFormat.
+     * @stable ICU 2.0
+     */
+    virtual Format* clone(void) const;
+
+    /**
+     * Return true if the given Format objects are semantically equal.
+     * Objects of different subclasses are considered unequal.
+     *
+     * @param other    the object to be compared with.
+     * @return         true if the given Format objects are semantically equal.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const Format& other) const;
+
+    /**
+     * Format a double or long number using base-10 representation.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+    */
+    virtual UnicodeString& format(double number,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos) const;
+    /**
+     * Format a long number using base-10 representation.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& format(int32_t number,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos) const;
+    /**
+     * Format an int64 number using base-10 representation.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.8
+     */
+    virtual UnicodeString& format(int64_t number,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos) const;
+
+    /**
+     * Format a Formattable using base-10 representation.
+     *
+     * @param obj       The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @param status    Error code indicating success or failure.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& format(const Formattable& obj,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos,
+                                  UErrorCode& status) const;
+
+    /**
+     * Redeclared NumberFormat method.
+     * Formats an object to produce a string.
+     *
+     * @param obj       The object to format.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param status    Output parameter filled in with success or failure status.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(const Formattable& obj,
+                          UnicodeString& appendTo,
+                          UErrorCode& status) const;
+
+    /**
+     * Redeclared NumberFormat method.
+     * Format a double number.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(double number,
+                          UnicodeString& appendTo) const;
+
+    /**
+     * Redeclared NumberFormat method.
+     * Format a long number. These methods call the NumberFormat
+     * pure virtual format() methods with the default FieldPosition.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(int32_t number,
+                          UnicodeString& appendTo) const;
+
+    /**
+     * Redeclared NumberFormat method.
+     * Format an int64 number. These methods call the NumberFormat
+     * pure virtual format() methods with the default FieldPosition.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.8
+     */
+    UnicodeString& format(int64_t number,
+                          UnicodeString& appendTo) const;
+   /**
+    * Parse the given string using this object's choices. The method
+    * does string comparisons to try to find an optimal match.
+    * If no object can be parsed, index is unchanged, and NULL is
+    * returned.  The result is returned as the most parsimonious
+    * type of Formattable that will accomodate all of the
+    * necessary precision.  For example, if the result is exactly 12,
+    * it will be returned as a long.  However, if it is 1.5, it will
+    * be returned as a double.
+    *
+    * @param text           The text to be parsed.
+    * @param result         Formattable to be set to the parse result.
+    *                       If parse fails, return contents are undefined.
+    * @param parsePosition  The position to start parsing at on input.
+    *                       On output, moved to after the last successfully
+    *                       parse character. On parse failure, does not change.
+    * @see Formattable
+    * @stable ICU 2.0
+    */
+    virtual void parse(const UnicodeString& text,
+                       Formattable& result,
+                       ParsePosition& parsePosition) const;
+
+    // Declare here again to get rid of function hiding problems.
+    /** 
+     * Parse the given string using this object's choices.
+     *
+     * @param text           The text to be parsed.
+     * @param result         Formattable to be set to the parse result.
+     * @param status    Output parameter filled in with success or failure status.
+     * @stable ICU 2.0
+     */
+    virtual void parse(const UnicodeString& text, 
+                       Formattable& result, 
+                       UErrorCode& status) const;
+
+    /**
+     * Parses text from the given string as a currency amount.  Unlike
+     * the parse() method, this method will attempt to parse a generic
+     * currency name, searching for a match of this object's locale's
+     * currency display names, or for a 3-letter ISO currency code.
+     * This method will fail if this format is not a currency format,
+     * that is, if it does not contain the currency pattern symbol
+     * (U+00A4) in its prefix or suffix.
+     *
+     * @param text the string to parse
+     * @param result output parameter to receive result. This will have
+     * its currency set to the parsed ISO currency code.
+     * @param pos input-output position; on input, the position within
+     * text to match; must have 0 <= pos.getIndex() < text.length();
+     * on output, the position after the last matched character. If
+     * the parse fails, the position in unchanged upon output.
+     * @return a reference to result
+     * @internal
+     */
+    virtual Formattable& parseCurrency(const UnicodeString& text,
+                                       Formattable& result,
+                                       ParsePosition& pos) const;
+
+    /**
+     * Returns the decimal format symbols, which is generally not changed
+     * by the programmer or user.
+     * @return desired DecimalFormatSymbols
+     * @see DecimalFormatSymbols
+     * @stable ICU 2.0
+     */
+    virtual const DecimalFormatSymbols* getDecimalFormatSymbols(void) const;
+
+    /**
+     * Sets the decimal format symbols, which is generally not changed
+     * by the programmer or user.
+     * @param symbolsToAdopt DecimalFormatSymbols to be adopted.
+     * @stable ICU 2.0
+     */
+    virtual void adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt);
+
+    /**
+     * Sets the decimal format symbols, which is generally not changed
+     * by the programmer or user.
+     * @param symbols DecimalFormatSymbols.
+     * @stable ICU 2.0
+     */
+    virtual void setDecimalFormatSymbols(const DecimalFormatSymbols& symbols);
+
+
+    /**
+     * Get the positive prefix.
+     *
+     * @param result    Output param which will receive the positive prefix.
+     * @return          A reference to 'result'.
+     * Examples: +123, $123, sFr123
+     * @stable ICU 2.0
+     */
+    UnicodeString& getPositivePrefix(UnicodeString& result) const;
+
+    /**
+     * Set the positive prefix.
+     *
+     * @param newValue    the new value of the the positive prefix to be set.
+     * Examples: +123, $123, sFr123
+     * @stable ICU 2.0
+     */
+    virtual void setPositivePrefix(const UnicodeString& newValue);
+
+    /**
+     * Get the negative prefix.
+     *
+     * @param result    Output param which will receive the negative prefix.
+     * @return          A reference to 'result'.
+     * Examples: -123, ($123) (with negative suffix), sFr-123
+     * @stable ICU 2.0
+     */
+    UnicodeString& getNegativePrefix(UnicodeString& result) const;
+
+    /**
+     * Set the negative prefix.
+     *
+     * @param newValue    the new value of the the negative prefix to be set.
+     * Examples: -123, ($123) (with negative suffix), sFr-123
+     * @stable ICU 2.0
+     */
+    virtual void setNegativePrefix(const UnicodeString& newValue);
+
+    /**
+     * Get the positive suffix.
+     *
+     * @param result    Output param which will receive the positive suffix.
+     * @return          A reference to 'result'.
+     * Example: 123%
+     * @stable ICU 2.0
+     */
+    UnicodeString& getPositiveSuffix(UnicodeString& result) const;
+
+    /**
+     * Set the positive suffix.
+     *
+     * @param newValue    the new value of the positive suffix to be set.
+     * Example: 123%
+     * @stable ICU 2.0
+     */
+    virtual void setPositiveSuffix(const UnicodeString& newValue);
+
+    /**
+     * Get the negative suffix.
+     *
+     * @param result    Output param which will receive the negative suffix.
+     * @return          A reference to 'result'.
+     * Examples: -123%, ($123) (with positive suffixes)
+     * @stable ICU 2.0
+     */
+    UnicodeString& getNegativeSuffix(UnicodeString& result) const;
+
+    /**
+     * Set the negative suffix.
+     *
+     * @param newValue    the new value of the negative suffix to be set.
+     * Examples: 123%
+     * @stable ICU 2.0
+     */
+    virtual void setNegativeSuffix(const UnicodeString& newValue);
+
+    /**
+     * Get the multiplier for use in percent, permill, etc.
+     * For a percentage, set the suffixes to have "%" and the multiplier to be 100.
+     * (For Arabic, use arabic percent symbol).
+     * For a permill, set the suffixes to have "\\u2031" and the multiplier to be 1000.
+     *
+     * @return    the multiplier for use in percent, permill, etc.
+     * Examples: with 100, 1.23 -> "123", and "123" -> 1.23
+     * @stable ICU 2.0
+     */
+    int32_t getMultiplier(void) const;
+
+    /**
+     * Set the multiplier for use in percent, permill, etc.
+     * For a percentage, set the suffixes to have "%" and the multiplier to be 100.
+     * (For Arabic, use arabic percent symbol).
+     * For a permill, set the suffixes to have "\\u2031" and the multiplier to be 1000.
+     *
+     * @param newValue    the new value of the multiplier for use in percent, permill, etc.
+     * Examples: with 100, 1.23 -> "123", and "123" -> 1.23
+     * @stable ICU 2.0
+     */
+    virtual void setMultiplier(int32_t newValue);
+
+    /**
+     * Get the rounding increment.
+     * @return A positive rounding increment, or 0.0 if rounding
+     * is not in effect.
+     * @see #setRoundingIncrement
+     * @see #getRoundingMode
+     * @see #setRoundingMode
+     * @stable ICU 2.0
+     */
+    virtual double getRoundingIncrement(void) const;
+
+    /**
+     * Set the rounding increment.  This method also controls whether
+     * rounding is enabled.
+     * @param newValue A positive rounding increment, or 0.0 to disable rounding.
+     * Negative increments are equivalent to 0.0.
+     * @see #getRoundingIncrement
+     * @see #getRoundingMode
+     * @see #setRoundingMode
+     * @stable ICU 2.0
+     */
+    virtual void setRoundingIncrement(double newValue);
+
+    /**
+     * Get the rounding mode.
+     * @return A rounding mode
+     * @see #setRoundingIncrement
+     * @see #getRoundingIncrement
+     * @see #setRoundingMode
+     * @stable ICU 2.0
+     */
+    virtual ERoundingMode getRoundingMode(void) const;
+
+    /**
+     * Set the rounding mode.  This has no effect unless the rounding
+     * increment is greater than zero.
+     * @param roundingMode A rounding mode
+     * @see #setRoundingIncrement
+     * @see #getRoundingIncrement
+     * @see #getRoundingMode
+     * @stable ICU 2.0
+     */
+    virtual void setRoundingMode(ERoundingMode roundingMode);
+
+    /**
+     * Get the width to which the output of format() is padded.
+     * The width is counted in 16-bit code units.
+     * @return the format width, or zero if no padding is in effect
+     * @see #setFormatWidth
+     * @see #getPadCharacterString
+     * @see #setPadCharacter
+     * @see #getPadPosition
+     * @see #setPadPosition
+     * @stable ICU 2.0
+     */
+    virtual int32_t getFormatWidth(void) const;
+
+    /**
+     * Set the width to which the output of format() is padded.
+     * The width is counted in 16-bit code units.
+     * This method also controls whether padding is enabled.
+     * @param width the width to which to pad the result of
+     * format(), or zero to disable padding.  A negative
+     * width is equivalent to 0.
+     * @see #getFormatWidth
+     * @see #getPadCharacterString
+     * @see #setPadCharacter
+     * @see #getPadPosition
+     * @see #setPadPosition
+     * @stable ICU 2.0
+     */
+    virtual void setFormatWidth(int32_t width);
+
+    /**
+     * Get the pad character used to pad to the format width.  The
+     * default is ' '.
+     * @return a string containing the pad character. This will always
+     * have a length of one 32-bit code point.
+     * @see #setFormatWidth
+     * @see #getFormatWidth
+     * @see #setPadCharacter
+     * @see #getPadPosition
+     * @see #setPadPosition
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString getPadCharacterString() const;
+
+    /**
+     * Set the character used to pad to the format width.  If padding
+     * is not enabled, then this will take effect if padding is later
+     * enabled.
+     * @param padChar a string containing the pad charcter. If the string
+     * has length 0, then the pad characer is set to ' '.  Otherwise
+     * padChar.char32At(0) will be used as the pad character.
+     * @see #setFormatWidth
+     * @see #getFormatWidth
+     * @see #getPadCharacterString
+     * @see #getPadPosition
+     * @see #setPadPosition
+     * @stable ICU 2.0
+     */
+    virtual void setPadCharacter(const UnicodeString &padChar);
+
+    /**
+     * Get the position at which padding will take place.  This is the location
+     * at which padding will be inserted if the result of format()
+     * is shorter than the format width.
+     * @return the pad position, one of kPadBeforePrefix,
+     * kPadAfterPrefix, kPadBeforeSuffix, or
+     * kPadAfterSuffix.
+     * @see #setFormatWidth
+     * @see #getFormatWidth
+     * @see #setPadCharacter
+     * @see #getPadCharacterString
+     * @see #setPadPosition
+     * @see #EPadPosition
+     * @stable ICU 2.0
+     */
+    virtual EPadPosition getPadPosition(void) const;
+
+    /**
+     * Set the position at which padding will take place.  This is the location
+     * at which padding will be inserted if the result of format()
+     * is shorter than the format width.  This has no effect unless padding is
+     * enabled.
+     * @param padPos the pad position, one of kPadBeforePrefix,
+     * kPadAfterPrefix, kPadBeforeSuffix, or
+     * kPadAfterSuffix.
+     * @see #setFormatWidth
+     * @see #getFormatWidth
+     * @see #setPadCharacter
+     * @see #getPadCharacterString
+     * @see #getPadPosition
+     * @see #EPadPosition
+     * @stable ICU 2.0
+     */
+    virtual void setPadPosition(EPadPosition padPos);
+
+    /**
+     * Return whether or not scientific notation is used.
+     * @return TRUE if this object formats and parses scientific notation
+     * @see #setScientificNotation
+     * @see #getMinimumExponentDigits
+     * @see #setMinimumExponentDigits
+     * @see #isExponentSignAlwaysShown
+     * @see #setExponentSignAlwaysShown
+     * @stable ICU 2.0
+     */
+    virtual UBool isScientificNotation(void);
+
+    /**
+     * Set whether or not scientific notation is used. When scientific notation
+     * is used, the effective maximum number of integer digits is <= 8.  If the
+     * maximum number of integer digits is set to more than 8, the effective
+     * maximum will be 1.  This allows this call to generate a 'default' scientific
+     * number format without additional changes.
+     * @param useScientific TRUE if this object formats and parses scientific
+     * notation
+     * @see #isScientificNotation
+     * @see #getMinimumExponentDigits
+     * @see #setMinimumExponentDigits
+     * @see #isExponentSignAlwaysShown
+     * @see #setExponentSignAlwaysShown
+     * @stable ICU 2.0
+     */
+    virtual void setScientificNotation(UBool useScientific);
+
+    /**
+     * Return the minimum exponent digits that will be shown.
+     * @return the minimum exponent digits that will be shown
+     * @see #setScientificNotation
+     * @see #isScientificNotation
+     * @see #setMinimumExponentDigits
+     * @see #isExponentSignAlwaysShown
+     * @see #setExponentSignAlwaysShown
+     * @stable ICU 2.0
+     */
+    virtual int8_t getMinimumExponentDigits(void) const;
+
+    /**
+     * Set the minimum exponent digits that will be shown.  This has no
+     * effect unless scientific notation is in use.
+     * @param minExpDig a value >= 1 indicating the fewest exponent digits
+     * that will be shown.  Values less than 1 will be treated as 1.
+     * @see #setScientificNotation
+     * @see #isScientificNotation
+     * @see #getMinimumExponentDigits
+     * @see #isExponentSignAlwaysShown
+     * @see #setExponentSignAlwaysShown
+     * @stable ICU 2.0
+     */
+    virtual void setMinimumExponentDigits(int8_t minExpDig);
+
+    /**
+     * Return whether the exponent sign is always shown.
+     * @return TRUE if the exponent is always prefixed with either the
+     * localized minus sign or the localized plus sign, false if only negative
+     * exponents are prefixed with the localized minus sign.
+     * @see #setScientificNotation
+     * @see #isScientificNotation
+     * @see #setMinimumExponentDigits
+     * @see #getMinimumExponentDigits
+     * @see #setExponentSignAlwaysShown
+     * @stable ICU 2.0
+     */
+    virtual UBool isExponentSignAlwaysShown(void);
+
+    /**
+     * Set whether the exponent sign is always shown.  This has no effect
+     * unless scientific notation is in use.
+     * @param expSignAlways TRUE if the exponent is always prefixed with either
+     * the localized minus sign or the localized plus sign, false if only
+     * negative exponents are prefixed with the localized minus sign.
+     * @see #setScientificNotation
+     * @see #isScientificNotation
+     * @see #setMinimumExponentDigits
+     * @see #getMinimumExponentDigits
+     * @see #isExponentSignAlwaysShown
+     * @stable ICU 2.0
+     */
+    virtual void setExponentSignAlwaysShown(UBool expSignAlways);
+
+    /**
+     * Return the grouping size. Grouping size is the number of digits between
+     * grouping separators in the integer portion of a number.  For example,
+     * in the number "123,456.78", the grouping size is 3.
+     *
+     * @return    the grouping size.
+     * @see setGroupingSize
+     * @see NumberFormat::isGroupingUsed
+     * @see DecimalFormatSymbols::getGroupingSeparator
+     * @stable ICU 2.0
+     */
+    int32_t getGroupingSize(void) const;
+
+    /**
+     * Set the grouping size. Grouping size is the number of digits between
+     * grouping separators in the integer portion of a number.  For example,
+     * in the number "123,456.78", the grouping size is 3.
+     *
+     * @param newValue    the new value of the grouping size.
+     * @see getGroupingSize
+     * @see NumberFormat::setGroupingUsed
+     * @see DecimalFormatSymbols::setGroupingSeparator
+     * @stable ICU 2.0
+     */
+    virtual void setGroupingSize(int32_t newValue);
+
+    /**
+     * Return the secondary grouping size. In some locales one
+     * grouping interval is used for the least significant integer
+     * digits (the primary grouping size), and another is used for all
+     * others (the secondary grouping size).  A formatter supporting a
+     * secondary grouping size will return a positive integer unequal
+     * to the primary grouping size returned by
+     * getGroupingSize().  For example, if the primary
+     * grouping size is 4, and the secondary grouping size is 2, then
+     * the number 123456789 formats as "1,23,45,6789", and the pattern
+     * appears as "#,##,###0".
+     * @return the secondary grouping size, or a value less than
+     * one if there is none
+     * @see setSecondaryGroupingSize
+     * @see NumberFormat::isGroupingUsed
+     * @see DecimalFormatSymbols::getGroupingSeparator
+     * @stable ICU 2.4
+     */
+    int32_t getSecondaryGroupingSize(void) const;
+
+    /**
+     * Set the secondary grouping size. If set to a value less than 1,
+     * then secondary grouping is turned off, and the primary grouping
+     * size is used for all intervals, not just the least significant.
+     *
+     * @param newValue    the new value of the secondary grouping size.
+     * @see getSecondaryGroupingSize
+     * @see NumberFormat#setGroupingUsed
+     * @see DecimalFormatSymbols::setGroupingSeparator
+     * @stable ICU 2.4
+     */
+    virtual void setSecondaryGroupingSize(int32_t newValue);
+
+    /**
+     * Allows you to get the behavior of the decimal separator with integers.
+     * (The decimal separator will always appear with decimals.)
+     *
+     * @return    TRUE if the decimal separator always appear with decimals.
+     * Example: Decimal ON: 12345 -> 12345.; OFF: 12345 -> 12345
+     * @stable ICU 2.0
+     */
+    UBool isDecimalSeparatorAlwaysShown(void) const;
+
+    /**
+     * Allows you to set the behavior of the decimal separator with integers.
+     * (The decimal separator will always appear with decimals.)
+     *
+     * @param newValue    set TRUE if the decimal separator will always appear with decimals.
+     * Example: Decimal ON: 12345 -> 12345.; OFF: 12345 -> 12345
+     * @stable ICU 2.0
+     */
+    virtual void setDecimalSeparatorAlwaysShown(UBool newValue);
+
+    /**
+     * Synthesizes a pattern string that represents the current state
+     * of this Format object.
+     *
+     * @param result    Output param which will receive the pattern.
+     *                  Previous contents are deleted.
+     * @return          A reference to 'result'.
+     * @see applyPattern
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& toPattern(UnicodeString& result) const;
+
+    /**
+     * Synthesizes a localized pattern string that represents the current
+     * state of this Format object.
+     *
+     * @param result    Output param which will receive the localized pattern.
+     *                  Previous contents are deleted.
+     * @return          A reference to 'result'.
+     * @see applyPattern
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& toLocalizedPattern(UnicodeString& result) const;
+ 
+    /**
+     * Apply the given pattern to this Format object.  A pattern is a
+     * short-hand specification for the various formatting properties.
+     * These properties can also be changed individually through the
+     * various setter methods.
+     * <P>
+     * There is no limit to integer digits are set
+     * by this routine, since that is the typical end-user desire;
+     * use setMaximumInteger if you want to set a real value.
+     * For negative numbers, use a second pattern, separated by a semicolon
+     * <pre>
+     * .      Example "#,#00.0#" -> 1,234.56
+     * </pre>
+     * This means a minimum of 2 integer digits, 1 fraction digit, and
+     * a maximum of 2 fraction digits.
+     * <pre>
+     * .      Example: "#,#00.0#;(#,#00.0#)" for negatives in parantheses.
+     * </pre>
+     * In negative patterns, the minimum and maximum counts are ignored;
+     * these are presumed to be set in the positive pattern.
+     *
+     * @param pattern    The pattern to be applied.
+     * @param parseError Struct to recieve information on position 
+     *                   of error if an error is encountered
+     * @param status     Output param set to success/failure code on
+     *                   exit. If the pattern is invalid, this will be
+     *                   set to a failure result.
+     * @stable ICU 2.0
+     */
+    virtual void applyPattern(const UnicodeString& pattern,
+                             UParseError& parseError,
+                             UErrorCode& status);
+    /**
+     * Sets the pattern.
+     * @param pattern   The pattern to be applied.
+     * @param status    Output param set to success/failure code on
+     *                  exit. If the pattern is invalid, this will be
+     *                  set to a failure result.
+     * @stable ICU 2.0
+     */  
+    virtual void applyPattern(const UnicodeString& pattern,
+                             UErrorCode& status);
+
+    /**
+     * Apply the given pattern to this Format object.  The pattern
+     * is assumed to be in a localized notation. A pattern is a
+     * short-hand specification for the various formatting properties.
+     * These properties can also be changed individually through the
+     * various setter methods.
+     * <P>
+     * There is no limit to integer digits are set
+     * by this routine, since that is the typical end-user desire;
+     * use setMaximumInteger if you want to set a real value.
+     * For negative numbers, use a second pattern, separated by a semicolon
+     * <pre>
+     * .      Example "#,#00.0#" -> 1,234.56
+     * </pre>
+     * This means a minimum of 2 integer digits, 1 fraction digit, and
+     * a maximum of 2 fraction digits.
+     *
+     * Example: "#,#00.0#;(#,#00.0#)" for negatives in parantheses.
+     *
+     * In negative patterns, the minimum and maximum counts are ignored;
+     * these are presumed to be set in the positive pattern.
+     *
+     * @param pattern   The localized pattern to be applied.
+     * @param parseError Struct to recieve information on position 
+     *                   of error if an error is encountered
+     * @param status    Output param set to success/failure code on
+     *                  exit. If the pattern is invalid, this will be
+     *                  set to a failure result.
+     * @stable ICU 2.0
+     */
+    virtual void applyLocalizedPattern(const UnicodeString& pattern,
+                                       UParseError& parseError,
+                                       UErrorCode& status);
+
+    /**
+     * Apply the given pattern to this Format object.
+     *
+     * @param pattern   The localized pattern to be applied.
+     * @param status    Output param set to success/failure code on
+     *                  exit. If the pattern is invalid, this will be
+     *                  set to a failure result.
+     * @stable ICU 2.0
+     */
+    virtual void applyLocalizedPattern(const UnicodeString& pattern,
+                                       UErrorCode& status);
+
+
+    /**
+     * Sets the maximum number of digits allowed in the integer portion of a
+     * number. This override limits the integer digit count to 309.
+     *
+     * @param newValue    the new value of the maximum number of digits 
+     *                      allowed in the integer portion of a number.
+     * @see NumberFormat#setMaximumIntegerDigits
+     * @stable ICU 2.0
+     */
+    virtual void setMaximumIntegerDigits(int32_t newValue);
+
+    /**
+     * Sets the minimum number of digits allowed in the integer portion of a
+     * number. This override limits the integer digit count to 309.
+     * 
+     * @param newValue    the new value of the minimum number of digits 
+     *                      allowed in the integer portion of a number.
+     * @see NumberFormat#setMinimumIntegerDigits
+     * @stable ICU 2.0
+     */
+    virtual void setMinimumIntegerDigits(int32_t newValue);
+
+    /**
+     * Sets the maximum number of digits allowed in the fraction portion of a
+     * number. This override limits the fraction digit count to 340.
+     *
+     * @param newValue    the new value of the maximum number of digits 
+     *                    allowed in the fraction portion of a number.
+     * @see NumberFormat#setMaximumFractionDigits
+     * @stable ICU 2.0
+     */
+    virtual void setMaximumFractionDigits(int32_t newValue);
+
+    /**
+     * Sets the minimum number of digits allowed in the fraction portion of a
+     * number. This override limits the fraction digit count to 340.
+     *
+     * @param newValue    the new value of the minimum number of digits 
+     *                    allowed in the fraction portion of a number.
+     * @see NumberFormat#setMinimumFractionDigits
+     * @stable ICU 2.0
+     */
+    virtual void setMinimumFractionDigits(int32_t newValue);
+
+    /**
+     * Returns the minimum number of significant digits that will be
+     * displayed. This value has no effect unless areSignificantDigitsUsed()
+     * returns true.
+     * @return the fewest significant digits that will be shown
+     * @stable ICU 3.0
+     */
+    int32_t getMinimumSignificantDigits() const;
+
+    /**
+     * Returns the maximum number of significant digits that will be
+     * displayed. This value has no effect unless areSignificantDigitsUsed()
+     * returns true.
+     * @return the most significant digits that will be shown
+     * @stable ICU 3.0
+     */
+    int32_t getMaximumSignificantDigits() const;
+
+    /**
+     * Sets the minimum number of significant digits that will be
+     * displayed.  If <code>min</code> is less than one then it is set
+     * to one.  If the maximum significant digits count is less than
+     * <code>min</code>, then it is set to <code>min</code>. This
+     * value has no effect unless areSignificantDigits() returns true.
+     * @param min the fewest significant digits to be shown 
+     * @stable ICU 3.0
+     */
+    void setMinimumSignificantDigits(int32_t min);
+
+    /**
+     * Sets the maximum number of significant digits that will be
+     * displayed.  If <code>max</code> is less than one then it is set
+     * to one.  If the minimum significant digits count is greater
+     * than <code>max</code>, then it is set to <code>max</code>.
+     * This value has no effect unless areSignificantDigits() returns
+     * true.
+     * @param max the most significant digits to be shown 
+     * @stable ICU 3.0
+     */
+    void setMaximumSignificantDigits(int32_t max);
+
+    /**
+     * Returns true if significant digits are in use, or false if
+     * integer and fraction digit counts are in use.
+     * @return true if significant digits are in use
+     * @stable ICU 3.0
+     */
+    UBool areSignificantDigitsUsed() const;
+
+    /**
+     * Sets whether significant digits are in use, or integer and
+     * fraction digit counts are in use.
+     * @param useSignificantDigits true to use significant digits, or
+     * false to use integer and fraction digit counts
+     * @stable ICU 3.0
+     */
+    void setSignificantDigitsUsed(UBool useSignificantDigits);
+
+ public:
+    /**
+     * Sets the currency used to display currency
+     * amounts.  This takes effect immediately, if this format is a
+     * currency format.  If this format is not a currency format, then
+     * the currency is used if and when this object becomes a
+     * currency format through the application of a new pattern.
+     * @param theCurrency a 3-letter ISO code indicating new currency
+     * to use.  It need not be null-terminated.  May be the empty
+     * string or NULL to indicate no currency.
+     * @param ec input-output error code
+     * @stable ICU 3.0
+     */
+    virtual void setCurrency(const UChar* theCurrency, UErrorCode& ec);
+
+    /**
+     * Sets the currency used to display currency amounts.  See
+     * setCurrency(const UChar*, UErrorCode&).
+     * @deprecated ICU 3.0. Use setCurrency(const UChar*, UErrorCode&).
+     */
+    virtual void setCurrency(const UChar* theCurrency);
+
+    /**
+     * The resource tags we use to retrieve decimal format data from
+     * locale resource bundles.
+     * @deprecated ICU 3.4. This string has no public purpose. Please don't use it.
+     */
+    static const char fgNumberPatterns[];
+
+public:
+
+    /**
+     * Return the class ID for this class.  This is useful only for
+     * comparing to a return value from getDynamicClassID().  For example:
+     * <pre>
+     * .      Base* polymorphic_pointer = createPolymorphicObject();
+     * .      if (polymorphic_pointer->getDynamicClassID() ==
+     * .          Derived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
+     * This method is to implement a simple version of RTTI, since not all
+     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
+     * clone() methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+private:
+    DecimalFormat(); // default constructor not implemented
+
+    int32_t precision(UBool isIntegral) const;
+
+    /**
+     * Do real work of constructing a new DecimalFormat.
+     */
+    void construct(UErrorCode&               status,
+                   UParseError&             parseErr,
+                   const UnicodeString*     pattern = 0,
+                   DecimalFormatSymbols*    symbolsToAdopt = 0
+                   );
+
+    /**
+     * Does the real work of generating a pattern.
+     *
+     * @param result     Output param which will receive the pattern.
+     *                   Previous contents are deleted.
+     * @param localized  TRUE return localized pattern.
+     * @return           A reference to 'result'.
+     */
+    UnicodeString& toPattern(UnicodeString& result, UBool localized) const;
+
+    /**
+     * Does the real work of applying a pattern.
+     * @param pattern    The pattern to be applied.
+     * @param localized  If true, the pattern is localized; else false.
+     * @param parseError Struct to recieve information on position 
+     *                   of error if an error is encountered
+     * @param status     Output param set to success/failure code on
+     *                   exit. If the pattern is invalid, this will be
+     *                   set to a failure result.
+     */
+    void applyPattern(const UnicodeString& pattern,
+                            UBool localized,
+                            UParseError& parseError,
+                            UErrorCode& status);
+    /**
+     * Do the work of formatting a number, either a double or a long.
+     *
+     * @param appendTo       Output parameter to receive result.
+     *                       Result is appended to existing contents.
+     * @param fieldPosition  On input: an alignment field, if desired.
+     *                       On output: the offsets of the alignment field.
+     * @param digits         the digits to be formatted.
+     * @param isInteger      if TRUE format the digits as Integer.
+     * @return               Reference to 'appendTo' parameter.
+     */
+    UnicodeString& subformat(UnicodeString& appendTo,
+                             FieldPosition& fieldPosition,
+                             DigitList& digits,
+                             UBool         isInteger) const;
+
+    void parse(const UnicodeString& text,
+               Formattable& result,
+               ParsePosition& pos,
+               UBool parseCurrency) const;
+
+    enum {
+        fgStatusInfinite,
+        fgStatusLength      // Leave last in list.
+    } StatusFlags;
+
+    UBool subparse(const UnicodeString& text, ParsePosition& parsePosition,
+                   DigitList& digits, UBool* status,
+                   UChar* currency) const;
+
+    int32_t skipPadding(const UnicodeString& text, int32_t position) const;
+
+    int32_t compareAffix(const UnicodeString& input,
+                         int32_t pos,
+                         UBool isNegative,
+                         UBool isPrefix,
+                         UChar* currency) const;
+    
+    static int32_t compareSimpleAffix(const UnicodeString& affix,
+                                      const UnicodeString& input,
+                                      int32_t pos,
+                                      UBool strict);
+    
+    static int32_t skipRuleWhiteSpace(const UnicodeString& text, int32_t pos);
+    
+    static int32_t skipUWhiteSpace(const UnicodeString& text, int32_t pos);
+    
+    int32_t compareComplexAffix(const UnicodeString& affixPat,
+                                const UnicodeString& input,
+                                int32_t pos,
+                                UChar* currency) const;
+
+    static int32_t match(const UnicodeString& text, int32_t pos, UChar32 ch);
+
+    static int32_t match(const UnicodeString& text, int32_t pos, const UnicodeString& str);
+	
+    static UBool matchSymbol(const UnicodeString &text, int32_t position, int32_t length, const UnicodeString &symbol,
+                             UnicodeSet *sset, UChar32 schar);
+	
+    /**
+     * Get a decimal format symbol.
+     * Returns a const reference to the symbol string.
+     * @internal
+     */
+    inline const UnicodeString &getConstSymbol(DecimalFormatSymbols::ENumberFormatSymbol symbol) const;
+
+    int32_t appendAffix(UnicodeString& buf, double number,
+                        UBool isNegative, UBool isPrefix) const;
+
+    /**
+     * Append an affix to the given UnicodeString, using quotes if
+     * there are special characters.  Single quotes themselves must be
+     * escaped in either case.
+     */
+    void appendAffixPattern(UnicodeString& appendTo, const UnicodeString& affix, 
+                            UBool localized) const;
+
+    void appendAffixPattern(UnicodeString& appendTo,
+                            const UnicodeString* affixPattern,
+                            const UnicodeString& expAffix, UBool localized) const;
+
+    void expandAffix(const UnicodeString& pattern,
+                     UnicodeString& affix,
+                     double number,
+                     UBool doFormat) const;
+
+    void expandAffixes();
+    
+    static double round(double a, ERoundingMode mode, UBool isNegative);
+
+    void addPadding(UnicodeString& appendTo,
+                    FieldPosition& fieldPosition,
+                    int32_t prefixLen, int32_t suffixLen) const;
+
+    UBool isGroupingPosition(int32_t pos) const;
+
+    void setCurrencyForSymbols();
+
+    /**
+     * Constants.
+     */
+    //static const int8_t fgMaxDigit; // The largest digit, in this case 9
+
+    /*transient*/ //DigitList* fDigitList;
+
+    UnicodeString           fPositivePrefix;
+    UnicodeString           fPositiveSuffix;
+    UnicodeString           fNegativePrefix;
+    UnicodeString           fNegativeSuffix;
+    UnicodeString*          fPosPrefixPattern;
+    UnicodeString*          fPosSuffixPattern;
+    UnicodeString*          fNegPrefixPattern;
+    UnicodeString*          fNegSuffixPattern;
+
+    /**
+     * Formatter for ChoiceFormat-based currency names.  If this field
+     * is not null, then delegate to it to format currency symbols.
+     * @since ICU 2.6
+     */
+    ChoiceFormat*           fCurrencyChoice;
+
+    int32_t                 fMultiplier;
+    int32_t                 fGroupingSize;
+    int32_t                 fGroupingSize2;
+    UBool                   fDecimalSeparatorAlwaysShown;
+    /*transient*/ UBool     fIsCurrencyFormat;
+    DecimalFormatSymbols*   fSymbols;
+
+    UBool                   fUseSignificantDigits;
+    int32_t                 fMinSignificantDigits;
+    int32_t                 fMaxSignificantDigits;
+
+    UBool                   fUseExponentialNotation;
+    int8_t                  fMinExponentDigits;
+    UBool                   fExponentSignAlwaysShown;
+
+    /* If fRoundingIncrement is NULL, there is no rounding.  Otherwise, round to
+     * fRoundingIncrement.getDouble().  Since this operation may be expensive,
+     * we cache the result in fRoundingDouble.  All methods that update
+     * fRoundingIncrement also update fRoundingDouble. */
+    DigitList*              fRoundingIncrement;
+    /*transient*/ double    fRoundingDouble;
+    ERoundingMode           fRoundingMode;
+
+    UChar32                 fPad;
+    int32_t                 fFormatWidth;
+    EPadPosition            fPadPosition;
+
+protected:
+
+    /**
+     * Returns the currency in effect for this formatter.  Subclasses
+     * should override this method as needed.  Unlike getCurrency(),
+     * this method should never return "".
+     * @result output parameter for null-terminated result, which must
+     * have a capacity of at least 4
+     * @internal
+     */
+    virtual void getEffectiveCurrency(UChar* result, UErrorCode& ec) const;
+
+  /** number of integer digits 
+   * @stable ICU 2.4
+   */  
+    static const int32_t  kDoubleIntegerDigits;
+  /** number of fraction digits 
+   * @stable ICU 2.4
+   */  
+    static const int32_t  kDoubleFractionDigits;
+
+    /**
+     * When someone turns on scientific mode, we assume that more than this
+     * number of digits is due to flipping from some other mode that didn't
+     * restrict the maximum, and so we force 1 integer digit.  We don't bother
+     * to track and see if someone is using exponential notation with more than
+     * this number, it wouldn't make sense anyway, and this is just to make sure
+     * that someone turning on scientific mode with default settings doesn't
+     * end up with lots of zeroes.
+     * @stable ICU 2.8
+     */
+    static const int32_t  kMaxScientificIntegerDigits;
+};
+
+inline UnicodeString&
+DecimalFormat::format(const Formattable& obj,
+                      UnicodeString& appendTo,
+                      UErrorCode& status) const {
+    // Don't use Format:: - use immediate base class only,
+    // in case immediate base modifies behavior later.
+    return NumberFormat::format(obj, appendTo, status);
+}
+
+inline UnicodeString&
+DecimalFormat::format(double number,
+                      UnicodeString& appendTo) const {
+    FieldPosition pos(0);
+    return format(number, appendTo, pos);
+}
+
+inline UnicodeString&
+DecimalFormat::format(int32_t number,
+                      UnicodeString& appendTo) const {
+    FieldPosition pos(0);
+    return format((int64_t)number, appendTo, pos);
+}
+
+inline const UnicodeString &
+DecimalFormat::getConstSymbol(DecimalFormatSymbols::ENumberFormatSymbol symbol) const {
+    return fSymbols->getConstSymbol(symbol);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _DECIMFMT
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/docmain.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/docmain.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/docmain.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,202 +0,0 @@
-/********************************************************************
- * COPYRIGHT: 
- * Copyright (c) 1997-2007, International Business Machines Corporation and
- * others. All Rights Reserved.
- *
- *  FILE NAME: DOCMAIN.h
- *
- *   Date          Name        Description
- *   12/11/2000    Ram        Creation.
- */
-
-/* This file contains documentation for Doxygen and doesnot have
- * any significance with respect to C or C++ API
- */
-
-/*! \mainpage
- *
- * \section API API Reference Usage
- * 
- * <h3>C++ Programmers:</h3>
- * <p>Use <a href="hierarchy.html">Class Hierarchy</a> or <a href="classes.html"> Alphabetical List </a>
- * or <a href="annotated.html"> Compound List</a>
- * to find the class you are interested in. For example, to find BreakIterator,
- * you can go to the <a href="classes.html"> Alphabetical List</a>, then click on
- * "BreakIterator". Once you are at the class, you will find an inheritance
- * chart, a list of the public members, a detailed description of the class,
- * then detailed member descriptions.</p>
- * 
- * <h3>C Programmers:</h3>
- * <p>Use <a href="#Module">Module List</a> or <a href="globals.html">File Members</a>
- * to find a list of all the functions and constants.
- * For example, to find BreakIterator functions you would click on
- * <a href="files.html"> File List</a>,
- * then find "ubrk.h" and click on it. You will find descriptions of Defines,
- * Typedefs, Enumerations, and Functions, with detailed descriptions below.
- * If you want to find a specific function, such as ubrk_next(), then click
- * first on <a href="globals.html"> File Members</a>, then use your browser
- * Find dialog to search for "ubrk_next()".</p>
- *
- *
- * <h3>API References for Previous Releases</h3>
- * <p>The API References for each release of ICU are also available as
- * a zip file from the ICU 
- * <a href="http://icu-project.org/download/">download page</a>.</p>
- *
- * <hr>
- *
- * <h2>Architecture (User's Guide)</h2>
- * <ul>
- *   <li><a href="http://icu-project.org/userguide/">Introduction</a></li>
- *   <li><a href="http://icu-project.org/userguide/i18n.html">Internationalization</a></li>
- *   <li><a href="http://icu-project.org/userguide/design.html">Locale Model</a></li>
- *   <li><a href="http://icu-project.org/userguide/design.html">Multithreading</a></li>
- *   <li><a href="http://icu-project.org/userguide/conversion.html">Conversion</a></li>
- *   <li><a href="http://icu-project.org/userguide/design.html">Error Handling</a></li>
- * </ul>
- *
- * <hr>
- *\htmlonly <h2><a NAME="Module">Module List</a></h2> \endhtmlonly
- * <table border="1" cols="3" align="center">
- *   <tr>
- *     <td><strong>Module Name</strong></td>
- *     <td><strong>C</strong></td>
- *     <td><strong>C++</strong></td>
- *   </tr>
- *   <tr>
- *     <td>Basic Types and Constants</td>
- *     <td>utypes.h</td>
- *     <td>utypes.h</td>
- *   </tr>
- *   <tr>
- *     <td>Strings and Character Iteration</td>
- *     <td>ustring.h, utf.h</td>
- *     <td>UnicodeString, CharacterIterator</td>
- *   </tr>
- *   <tr>
- *     <td>Unicode Character<br>Properties and Names</td>
- *     <td>uchar.h</td>
- *     <td>uchar.h C API</td>
- *   </tr>
- *   <tr>
- *     <td>Codepage Conversion</td>
- *     <td>ucnv.h</td>
- *     <td>ucnv.h C API</td>
- *   </tr>
- *   <tr>
- *     <td>Unicode Text Compression</td>
- *     <td>ucnv.h <br> (encoding name "SCSU" or "BOCU-1")</td>
- *     <td>ucnv.h C API</td>
- *   </tr>
- *   <tr>
- *     <td>Locales </td>
- *     <td>uloc.h</a></td>
- *     <td>Locale</td>
- *   </tr>
- *   <tr>
- *     <td>Resource Bundles</td>
- *     <td>ures.h</td>
- *     <td>ResourceBundle</td>
- *   </tr>
- *   <tr>
- *     <td>Normalization</td>
- *     <td>unorm.h</td>
- *     <td>Normalizer</td>
- *   </tr>
- *   <tr>
- *     <td>Calendars</td>
- *     <td>ucal.h</td>
- *     <td>Calendar</td>
- *   </tr>
- *   <tr>
- *     <td>Date and Time Formatting</td>
- *     <td>udat.h</td>
- *     <td>DateFormat</td>
- *   </tr>
- *   <tr>
- *     <td>Message Formatting</td>
- *     <td>umsg.h</td>
- *     <td>MessageFormat</td>
- *   </tr>
- *   <tr>
- *     <td>Number Formatting</td>
- *     <td>unum.h</td>
- *     <td>NumberFormat</td>
- *   </tr>
- *   <tr>
- *     <td>Number Spellout <br> (Rule Based Number Formatting)</td>
- *     <td>unum.h <br> (use UNUM_SPELLOUT)</td>
- *     <td>RuleBasedNumberFormat</td>
- *   </tr>
- *   <tr>
- *     <td>Text Transformation <br> (Transliteration)</td>
- *     <td>utrans.h</td>
- *     <td>Transliterator</td>
- *   </tr>
- *   <tr>
- *     <td>Bidirectional Algorithm</td>
- *     <td>ubidi.h</td>
- *     <td>ubidi.h C API</td>
- *   </tr>
- *   <tr>
- *     <td>Arabic Shaping</td>
- *     <td>ushape.h</td>
- *     <td>ushape.h C API</td>
- *   </tr>
- *   <tr>
- *     <td>Collation</td>
- *     <td>ucol.h</td>
- *     <td>Collator</td>
- *   </tr>
- *   <tr>
- *     <td>String Searching</td>
- *     <td>usearch.h</td>
- *     <td>StringSearch</td>
- *   </tr>
- *   <tr>
- *     <td>Text Boundary Analysis <br> (Break Iteration)</td>
- *     <td>ubrk.h</td>
- *     <td>BreakIterator</td>
- *   </tr>
- *   <tr>
- *     <td>Unicode Set</td>
- *     <td>uset.h</td>
- *     <td>UnicodeSet</td>
- *   </tr>
- *   <tr>
- *     <td>Regular Expressions</td>
- *     <td>uregex.h</td>
- *     <td>RegexPattern, RegexMatcher</td>
- *   </tr>
- *   <tr>
- *     <td>StringPrep</td>
- *     <td>usprep.h</td>
- *     <td>usprep.h C API</td>
- *   </tr>
- *   <tr>
- *     <td>International Domain Names in Applications</td>
- *     <td>uidna.h</td>
- *     <td>uidna.h C API</td>
- *   </tr>
- *   <tr>
- *     <td>Universal Time Scale</td>
- *     <td>utmscale.h</td>
- *     <td>utmscale.h C API</td>
- *   </tr>
- *   <tr>
- *     <td>Basic Layout Engine Types and Constants</td>
- *     <td>(no C API)</td>
- *     <td>LETypes.h</td>
- *   </tr>
- *   <tr>
- *     <td>Complex Text Layout</td>
- *     <td>(no C API)</td>
- *     <td>LayoutEngine, ParagraphLayout</td>
- *   </tr>
- *   <tr>
- *     <td>ICU I/O</td>
- *     <td>ustdio.h</td>
- *     <td>ustream.h</td>
- *   </tr>
- * </table>
- */

Copied: MacRuby/trunk/icu-1060/unicode/docmain.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/docmain.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/docmain.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/docmain.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,202 @@
+/********************************************************************
+ * COPYRIGHT: 
+ * Copyright (c) 1997-2007, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *
+ *  FILE NAME: DOCMAIN.h
+ *
+ *   Date          Name        Description
+ *   12/11/2000    Ram        Creation.
+ */
+
+/* This file contains documentation for Doxygen and doesnot have
+ * any significance with respect to C or C++ API
+ */
+
+/*! \mainpage
+ *
+ * \section API API Reference Usage
+ * 
+ * <h3>C++ Programmers:</h3>
+ * <p>Use <a href="hierarchy.html">Class Hierarchy</a> or <a href="classes.html"> Alphabetical List </a>
+ * or <a href="annotated.html"> Compound List</a>
+ * to find the class you are interested in. For example, to find BreakIterator,
+ * you can go to the <a href="classes.html"> Alphabetical List</a>, then click on
+ * "BreakIterator". Once you are at the class, you will find an inheritance
+ * chart, a list of the public members, a detailed description of the class,
+ * then detailed member descriptions.</p>
+ * 
+ * <h3>C Programmers:</h3>
+ * <p>Use <a href="#Module">Module List</a> or <a href="globals.html">File Members</a>
+ * to find a list of all the functions and constants.
+ * For example, to find BreakIterator functions you would click on
+ * <a href="files.html"> File List</a>,
+ * then find "ubrk.h" and click on it. You will find descriptions of Defines,
+ * Typedefs, Enumerations, and Functions, with detailed descriptions below.
+ * If you want to find a specific function, such as ubrk_next(), then click
+ * first on <a href="globals.html"> File Members</a>, then use your browser
+ * Find dialog to search for "ubrk_next()".</p>
+ *
+ *
+ * <h3>API References for Previous Releases</h3>
+ * <p>The API References for each release of ICU are also available as
+ * a zip file from the ICU 
+ * <a href="http://icu-project.org/download/">download page</a>.</p>
+ *
+ * <hr>
+ *
+ * <h2>Architecture (User's Guide)</h2>
+ * <ul>
+ *   <li><a href="http://icu-project.org/userguide/">Introduction</a></li>
+ *   <li><a href="http://icu-project.org/userguide/i18n.html">Internationalization</a></li>
+ *   <li><a href="http://icu-project.org/userguide/design.html">Locale Model</a></li>
+ *   <li><a href="http://icu-project.org/userguide/design.html">Multithreading</a></li>
+ *   <li><a href="http://icu-project.org/userguide/conversion.html">Conversion</a></li>
+ *   <li><a href="http://icu-project.org/userguide/design.html">Error Handling</a></li>
+ * </ul>
+ *
+ * <hr>
+ *\htmlonly <h2><a NAME="Module">Module List</a></h2> \endhtmlonly
+ * <table border="1" cols="3" align="center">
+ *   <tr>
+ *     <td><strong>Module Name</strong></td>
+ *     <td><strong>C</strong></td>
+ *     <td><strong>C++</strong></td>
+ *   </tr>
+ *   <tr>
+ *     <td>Basic Types and Constants</td>
+ *     <td>utypes.h</td>
+ *     <td>utypes.h</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Strings and Character Iteration</td>
+ *     <td>ustring.h, utf.h</td>
+ *     <td>UnicodeString, CharacterIterator</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Unicode Character<br>Properties and Names</td>
+ *     <td>uchar.h</td>
+ *     <td>uchar.h C API</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Codepage Conversion</td>
+ *     <td>ucnv.h</td>
+ *     <td>ucnv.h C API</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Unicode Text Compression</td>
+ *     <td>ucnv.h <br> (encoding name "SCSU" or "BOCU-1")</td>
+ *     <td>ucnv.h C API</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Locales </td>
+ *     <td>uloc.h</a></td>
+ *     <td>Locale</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Resource Bundles</td>
+ *     <td>ures.h</td>
+ *     <td>ResourceBundle</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Normalization</td>
+ *     <td>unorm.h</td>
+ *     <td>Normalizer</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Calendars</td>
+ *     <td>ucal.h</td>
+ *     <td>Calendar</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Date and Time Formatting</td>
+ *     <td>udat.h</td>
+ *     <td>DateFormat</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Message Formatting</td>
+ *     <td>umsg.h</td>
+ *     <td>MessageFormat</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Number Formatting</td>
+ *     <td>unum.h</td>
+ *     <td>NumberFormat</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Number Spellout <br> (Rule Based Number Formatting)</td>
+ *     <td>unum.h <br> (use UNUM_SPELLOUT)</td>
+ *     <td>RuleBasedNumberFormat</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Text Transformation <br> (Transliteration)</td>
+ *     <td>utrans.h</td>
+ *     <td>Transliterator</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Bidirectional Algorithm</td>
+ *     <td>ubidi.h</td>
+ *     <td>ubidi.h C API</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Arabic Shaping</td>
+ *     <td>ushape.h</td>
+ *     <td>ushape.h C API</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Collation</td>
+ *     <td>ucol.h</td>
+ *     <td>Collator</td>
+ *   </tr>
+ *   <tr>
+ *     <td>String Searching</td>
+ *     <td>usearch.h</td>
+ *     <td>StringSearch</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Text Boundary Analysis <br> (Break Iteration)</td>
+ *     <td>ubrk.h</td>
+ *     <td>BreakIterator</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Unicode Set</td>
+ *     <td>uset.h</td>
+ *     <td>UnicodeSet</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Regular Expressions</td>
+ *     <td>uregex.h</td>
+ *     <td>RegexPattern, RegexMatcher</td>
+ *   </tr>
+ *   <tr>
+ *     <td>StringPrep</td>
+ *     <td>usprep.h</td>
+ *     <td>usprep.h C API</td>
+ *   </tr>
+ *   <tr>
+ *     <td>International Domain Names in Applications</td>
+ *     <td>uidna.h</td>
+ *     <td>uidna.h C API</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Universal Time Scale</td>
+ *     <td>utmscale.h</td>
+ *     <td>utmscale.h C API</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Basic Layout Engine Types and Constants</td>
+ *     <td>(no C API)</td>
+ *     <td>LETypes.h</td>
+ *   </tr>
+ *   <tr>
+ *     <td>Complex Text Layout</td>
+ *     <td>(no C API)</td>
+ *     <td>LayoutEngine, ParagraphLayout</td>
+ *   </tr>
+ *   <tr>
+ *     <td>ICU I/O</td>
+ *     <td>ustdio.h</td>
+ *     <td>ustream.h</td>
+ *   </tr>
+ * </table>
+ */

Deleted: MacRuby/trunk/icu-1060/unicode/dtfmtsym.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/dtfmtsym.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/dtfmtsym.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,721 +0,0 @@
-/*  
-********************************************************************************
-*   Copyright (C) 1997-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-********************************************************************************
-*
-* File DTFMTSYM.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   02/19/97    aliu        Converted from java.
-*    07/21/98    stephen        Added getZoneIndex()
-*                            Changed to match C++ conventions
-********************************************************************************
-*/
-     
-#ifndef DTFMTSYM_H
-#define DTFMTSYM_H
- 
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/calendar.h"
-#include "unicode/uobject.h"
-#include "unicode/locid.h"
-#include "unicode/ures.h"
-
-/**
- * \file 
- * \brief C++ API: Symbols for formatting dates.
- */
-
-U_NAMESPACE_BEGIN
-
-/* forward declaration */
-class SimpleDateFormat;
-class Hashtable;
-class ZoneStringFormat;
-class SafeZoneStringFormatPtr;
-
-/**
- * DateFormatSymbols is a public class for encapsulating localizable date-time
- * formatting data -- including timezone data. DateFormatSymbols is used by
- * DateFormat and SimpleDateFormat.
- * <P>
- * Rather than first creating a DateFormatSymbols to get a date-time formatter
- * by using a SimpleDateFormat constructor, clients are encouraged to create a
- * date-time formatter using the getTimeInstance(), getDateInstance(), or
- * getDateTimeInstance() method in DateFormat. Each of these methods can return a
- * date/time formatter initialized with a default format pattern along with the
- * date-time formatting data for a given or default locale. After a formatter is
- * created, clients may modify the format pattern using the setPattern function
- * as so desired. For more information on using these formatter factory
- * functions, see DateFormat.
- * <P>
- * If clients decide to create a date-time formatter with a particular format
- * pattern and locale, they can do so with new SimpleDateFormat(aPattern,
- * new DateFormatSymbols(aLocale)).  This will load the appropriate date-time
- * formatting data from the locale.
- * <P>
- * DateFormatSymbols objects are clonable. When clients obtain a
- * DateFormatSymbols object, they can feel free to modify the date-time
- * formatting data as necessary. For instance, clients can
- * replace the localized date-time format pattern characters with the ones that
- * they feel easy to remember. Or they can change the representative cities
- * originally picked by default to using their favorite ones.
- * <P>
- * DateFormatSymbols are not expected to be subclassed. Data for a calendar is
- * loaded out of resource bundles.  The 'type' parameter indicates the type of
- * calendar, for example, "gregorian" or "japanese".  If the type is not gregorian
- * (or NULL, or an empty string) then the type is appended to the resource name,
- * for example,  'Eras_japanese' instead of 'Eras'.   If the resource 'Eras_japanese' did
- * not exist (even in root), then this class will fall back to just 'Eras', that is,
- * Gregorian data.  Therefore, the calendar implementor MUST ensure that the root
- * locale at least contains any resources that are to be particularized for the
- * calendar type.
- */
-class U_I18N_API DateFormatSymbols : public UObject {
-public:
-    /**
-     * Construct a DateFormatSymbols object by loading format data from
-     * resources for the default locale, in the default calendar (Gregorian).
-     * <P>
-     * NOTE: This constructor will never fail; if it cannot get resource
-     * data for the default locale, it will return a last-resort object
-     * based on hard-coded strings.
-     *
-     * @param status    Status code.  Failure
-     *                  results if the resources for the default cannot be
-     *                  found or cannot be loaded
-     * @stable ICU 2.0
-     */
-    DateFormatSymbols(UErrorCode& status);
-
-    /**
-     * Construct a DateFormatSymbols object by loading format data from
-     * resources for the given locale, in the default calendar (Gregorian).
-     *
-     * @param locale    Locale to load format data from.
-     * @param status    Status code.  Failure
-     *                  results if the resources for the locale cannot be
-     *                  found or cannot be loaded
-     * @stable ICU 2.0
-     */
-    DateFormatSymbols(const Locale& locale,
-                      UErrorCode& status);
-
-    /**
-     * Construct a DateFormatSymbols object by loading format data from
-     * resources for the default locale, in the default calendar (Gregorian).
-     * <P>
-     * NOTE: This constructor will never fail; if it cannot get resource
-     * data for the default locale, it will return a last-resort object
-     * based on hard-coded strings.
-     *
-     * @param type      Type of calendar (as returned by Calendar::getType). 
-     *                  Will be used to access the correct set of strings.
-     *                  (NULL or empty string defaults to "gregorian".)
-     * @param status    Status code.  Failure
-     *                  results if the resources for the default cannot be
-     *                  found or cannot be loaded
-     * @internal
-     */
-    DateFormatSymbols(const char *type, UErrorCode& status);
-
-    /**
-     * Construct a DateFormatSymbols object by loading format data from
-     * resources for the given locale, in the default calendar (Gregorian).
-     *
-     * @param locale    Locale to load format data from.
-     * @param type      Type of calendar (as returned by Calendar::getType). 
-     *                  Will be used to access the correct set of strings.
-     *                  (NULL or empty string defaults to "gregorian".)
-     * @param status    Status code.  Failure
-     *                  results if the resources for the locale cannot be
-     *                  found or cannot be loaded
-     * @internal
-     */
-    DateFormatSymbols(const Locale& locale,
-                      const char *type,
-                      UErrorCode& status);
-
-    /**
-     * Copy constructor.
-     * @stable ICU 2.0
-     */
-    DateFormatSymbols(const DateFormatSymbols&);
-
-    /**
-     * Assignment operator.
-     * @stable ICU 2.0
-     */
-    DateFormatSymbols& operator=(const DateFormatSymbols&);
-
-    /**
-     * Destructor. This is nonvirtual because this class is not designed to be
-     * subclassed.
-     * @stable ICU 2.0
-     */
-    virtual ~DateFormatSymbols();
-
-    /**
-     * Return true if another object is semantically equal to this one.
-     *
-     * @param other    the DateFormatSymbols object to be compared with.
-     * @return         true if other is semantically equal to this.
-     * @stable ICU 2.0
-     */
-    UBool operator==(const DateFormatSymbols& other) const;
-
-    /**
-     * Return true if another object is semantically unequal to this one.
-     *
-     * @param other    the DateFormatSymbols object to be compared with.
-     * @return         true if other is semantically unequal to this.
-     * @stable ICU 2.0
-     */
-    UBool operator!=(const DateFormatSymbols& other) const { return !operator==(other); }
-
-    /**
-     * Gets abbreviated era strings. For example: "AD" and "BC".
-     *
-     * @param count    Filled in with length of the array.
-     * @return         the era strings.
-     * @stable ICU 2.0
-     */
-    const UnicodeString* getEras(int32_t& count) const;
-
-    /**
-     * Sets abbreviated era strings. For example: "AD" and "BC".
-     * @param eras  Array of era strings (DateFormatSymbols retains ownership.)
-     * @param count Filled in with length of the array.
-     * @stable ICU 2.0
-     */
-    void setEras(const UnicodeString* eras, int32_t count);
-
-    /**
-     * Gets era name strings. For example: "Anno Domini" and "Before Christ".
-     *
-     * @param count    Filled in with length of the array.
-     * @return         the era name strings.
-     * @stable ICU 3.4
-     */
-    const UnicodeString* getEraNames(int32_t& count) const;
-
-    /**
-     * Sets era name strings. For example: "Anno Domini" and "Before Christ".
-     * @param eraNames  Array of era name strings (DateFormatSymbols retains ownership.)
-     * @param count Filled in with length of the array.
-     * @stable ICU 3.6
-     */
-    void setEraNames(const UnicodeString* eraNames, int32_t count);
-
-    /**
-     * Gets narrow era strings. For example: A" and "D".
-     *
-     * @param count    Filled in with length of the array.
-     * @return         the narrow era strings.
-     * @draft ICU 4.2
-     */
-    const UnicodeString* getNarrowEras(int32_t& count) const;
-
-    /**
-     * Sets narrow era strings. For example: "A" and "B".
-     * @param narrowEras  Array of narrow era strings (DateFormatSymbols retains ownership.)
-     * @param count Filled in with length of the array.
-     * @draft ICU 4.2
-     */
-    void setNarrowEras(const UnicodeString* narrowEras, int32_t count);
-
-    /**
-     * Gets month strings. For example: "January", "February", etc.
-     * @param count Filled in with length of the array.
-     * @return the month strings. (DateFormatSymbols retains ownership.)
-     * @stable ICU 2.0
-     */
-    const UnicodeString* getMonths(int32_t& count) const;
-
-    /**
-     * Sets month strings. For example: "January", "February", etc.
-     *
-     * @param months    the new month strings. (not adopted; caller retains ownership)
-     * @param count     Filled in with length of the array.
-     * @stable ICU 2.0
-     */
-    void setMonths(const UnicodeString* months, int32_t count);
-
-    /**
-     * Gets short month strings. For example: "Jan", "Feb", etc.
-     *
-     * @param count Filled in with length of the array.
-     * @return the short month strings. (DateFormatSymbols retains ownership.)
-     * @stable ICU 2.0
-     */
-    const UnicodeString* getShortMonths(int32_t& count) const;
-
-    /**
-     * Sets short month strings. For example: "Jan", "Feb", etc.
-     * @param count        Filled in with length of the array.
-     * @param shortMonths  the new short month strings. (not adopted; caller retains ownership)
-     * @stable ICU 2.0
-     */
-    void setShortMonths(const UnicodeString* shortMonths, int32_t count);
-
-    /**
-     * Selector for date formatting context
-     * @stable ICU 3.6
-     */
-    enum DtContextType {
-         FORMAT,
-         STANDALONE,
-         DT_CONTEXT_COUNT
-    };
-
-    /**
-     * Selector for date formatting width
-     * @stable ICU 3.6
-     */
-    enum DtWidthType {
-         ABBREVIATED,
-         WIDE,
-         NARROW,
-         DT_WIDTH_COUNT
-    };
-
-    /**
-     * Gets month strings by width and context. For example: "January", "February", etc.
-     * @param count Filled in with length of the array.
-     * @param context The formatting context, either FORMAT or STANDALONE
-     * @param width   The width of returned strings, either WIDE, ABBREVIATED, or NARROW.
-     * @return the month strings. (DateFormatSymbols retains ownership.)
-     * @stable ICU 3.4
-     */
-    const UnicodeString* getMonths(int32_t& count, DtContextType context, DtWidthType width) const;
-
-    /**
-     * Sets month strings by width and context. For example: "January", "February", etc.
-     *
-     * @param months  The new month strings. (not adopted; caller retains ownership)
-     * @param count   Filled in with length of the array.
-     * @param context The formatting context, either FORMAT or STANDALONE
-     * @param width   The width of returned strings, either WIDE, ABBREVIATED, or NARROW.
-     * @stable ICU 3.6
-     */
-    void setMonths(const UnicodeString* months, int32_t count, DtContextType context, DtWidthType width);
-
-    /**
-     * Gets weekday strings. For example: "Sunday", "Monday", etc.
-     * @param count        Filled in with length of the array.
-     * @return the weekday strings. (DateFormatSymbols retains ownership.)
-     * @stable ICU 2.0
-     */
-    const UnicodeString* getWeekdays(int32_t& count) const;
-
-
-    /**
-     * Sets weekday strings. For example: "Sunday", "Monday", etc.
-     * @param weekdays     the new weekday strings. (not adopted; caller retains ownership)
-     * @param count        Filled in with length of the array.
-     * @stable ICU 2.0
-     */
-    void setWeekdays(const UnicodeString* weekdays, int32_t count);
-
-    /**
-     * Gets short weekday strings. For example: "Sun", "Mon", etc.
-     * @param count        Filled in with length of the array.
-     * @return             the short weekday strings. (DateFormatSymbols retains ownership.)
-     * @stable ICU 2.0
-     */
-    const UnicodeString* getShortWeekdays(int32_t& count) const;
-
-    /**
-     * Sets short weekday strings. For example: "Sun", "Mon", etc.
-     * @param shortWeekdays  the new short weekday strings. (not adopted; caller retains ownership)
-     * @param count          Filled in with length of the array.
-     * @stable ICU 2.0
-     */
-    void setShortWeekdays(const UnicodeString* shortWeekdays, int32_t count);
-
-    /**
-     * Gets weekday strings by width and context. For example: "Sunday", "Monday", etc.
-     * @param count   Filled in with length of the array.
-     * @param context The formatting context, either FORMAT or STANDALONE
-     * @param width   The width of returned strings, either WIDE, ABBREVIATED, or NARROW
-     * @return the month strings. (DateFormatSymbols retains ownership.)
-     * @stable ICU 3.4
-     */
-    const UnicodeString* getWeekdays(int32_t& count, DtContextType context, DtWidthType width) const;
-
-    /**
-     * Sets weekday strings by width and context. For example: "Sunday", "Monday", etc.
-     * @param weekdays  The new weekday strings. (not adopted; caller retains ownership)
-     * @param count     Filled in with length of the array.
-     * @param context   The formatting context, either FORMAT or STANDALONE
-     * @param width     The width of returned strings, either WIDE, ABBREVIATED, or NARROW
-     * @stable ICU 3.6
-     */
-    void setWeekdays(const UnicodeString* weekdays, int32_t count, DtContextType context, DtWidthType width);
-
-    /**
-     * Gets quarter strings by width and context. For example: "1st Quarter", "2nd Quarter", etc.
-     * @param count Filled in with length of the array.
-     * @param context The formatting context, either FORMAT or STANDALONE
-     * @param width   The width of returned strings, either WIDE or ABBREVIATED. There
-     *                are no NARROW quarters.
-     * @return the quarter strings. (DateFormatSymbols retains ownership.)
-     * @stable ICU 3.6
-     */
-    const UnicodeString* getQuarters(int32_t& count, DtContextType context, DtWidthType width) const;
-
-    /**
-     * Sets quarter strings by width and context. For example: "1st Quarter", "2nd Quarter", etc.
-     *
-     * @param quarters  The new quarter strings. (not adopted; caller retains ownership)
-     * @param count   Filled in with length of the array.
-     * @param context The formatting context, either FORMAT or STANDALONE
-     * @param width   The width of returned strings, either WIDE or ABBREVIATED. There
-     *                are no NARROW quarters.
-     * @stable ICU 3.6
-     */
-    void setQuarters(const UnicodeString* quarters, int32_t count, DtContextType context, DtWidthType width);
-
-    /**
-     * Gets AM/PM strings. For example: "AM" and "PM".
-     * @param count        Filled in with length of the array.
-     * @return             the weekday strings. (DateFormatSymbols retains ownership.)
-     * @stable ICU 2.0
-     */
-    const UnicodeString* getAmPmStrings(int32_t& count) const;
-
-    /**
-     * Sets ampm strings. For example: "AM" and "PM".
-     * @param ampms        the new ampm strings. (not adopted; caller retains ownership)
-     * @param count        Filled in with length of the array.
-     * @stable ICU 2.0
-     */
-    void setAmPmStrings(const UnicodeString* ampms, int32_t count);
-
-    /**
-     * Gets timezone strings. These strings are stored in a 2-dimensional array.
-     * @param rowCount      Output param to receive number of rows.
-     * @param columnCount   Output param to receive number of columns.
-     * @return              The timezone strings as a 2-d array. (DateFormatSymbols retains ownership.)
-     * @deprecated ICU 3.6
-     */
-    const UnicodeString** getZoneStrings(int32_t& rowCount, int32_t& columnCount) const;
-
-    /**
-     * Sets timezone strings. These strings are stored in a 2-dimensional array.
-     * @param strings       The timezone strings as a 2-d array to be copied. (not adopted; caller retains ownership)
-     * @param rowCount      The number of rows (count of first index).
-     * @param columnCount   The number of columns (count of second index).
-     * @stable ICU 2.0
-     */
-    void setZoneStrings(const UnicodeString* const* strings, int32_t rowCount, int32_t columnCount);
-
-    /**
-     * Get the non-localized date-time pattern characters.
-     * @return    the non-localized date-time pattern characters
-     * @stable ICU 2.0
-     */
-    static const UChar * U_EXPORT2 getPatternUChars(void);
-
-    /**
-     * Gets localized date-time pattern characters. For example: 'u', 't', etc.
-     * <p>
-     * Note: ICU no longer provides localized date-time pattern characters for a locale
-     * starting ICU 3.8.  This method returns the non-localized date-time pattern
-     * characters unless user defined localized data is set by setLocalPatternChars.
-     * @param result    Output param which will receive the localized date-time pattern characters.
-     * @return          A reference to 'result'.
-     * @stable ICU 2.0
-     */
-    UnicodeString& getLocalPatternChars(UnicodeString& result) const;
-
-    /**
-     * Sets localized date-time pattern characters. For example: 'u', 't', etc.
-     * @param newLocalPatternChars the new localized date-time
-     * pattern characters.
-     * @stable ICU 2.0
-     */
-    void setLocalPatternChars(const UnicodeString& newLocalPatternChars);
-
-    /**
-     * Returns the locale for this object. Two flavors are available:
-     * valid and actual locale.
-     * @stable ICU 2.8
-     */
-    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.2
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.2
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-private:
-
-    friend class SimpleDateFormat;
-    friend class DateFormatSymbolsSingleSetter; // see udat.cpp
-
-    /**
-     * Abbreviated era strings. For example: "AD" and "BC".
-     */
-    UnicodeString*  fEras;
-    int32_t         fErasCount;
-
-    /**
-     * Era name strings. For example: "Anno Domini" and "Before Christ".
-     */
-    UnicodeString*  fEraNames;
-    int32_t         fEraNamesCount;
-
-    /**
-     * Narrow era strings. For example: "A" and "B".
-     */
-    UnicodeString*  fNarrowEras;
-    int32_t         fNarrowErasCount;
-
-    /**
-     * Month strings. For example: "January", "February", etc.
-     */
-    UnicodeString*  fMonths;
-    int32_t         fMonthsCount;
-
-    /**
-     * Short month strings. For example: "Jan", "Feb", etc.
-     */
-    UnicodeString*  fShortMonths;
-    int32_t         fShortMonthsCount;
-
-    /**
-     * Narrow month strings. For example: "J", "F", etc.
-     */
-    UnicodeString*  fNarrowMonths;
-    int32_t         fNarrowMonthsCount;
-
-    /**
-     * Standalone Month strings. For example: "January", "February", etc.
-     */
-    UnicodeString*  fStandaloneMonths;
-    int32_t         fStandaloneMonthsCount;
-
-    /**
-     * Standalone Short month strings. For example: "Jan", "Feb", etc.
-     */
-    UnicodeString*  fStandaloneShortMonths;
-    int32_t         fStandaloneShortMonthsCount;
-
-    /**
-     * Standalone Narrow month strings. For example: "J", "F", etc.
-     */
-    UnicodeString*  fStandaloneNarrowMonths;
-    int32_t         fStandaloneNarrowMonthsCount;
-
-    /**
-     * Weekday strings. For example: "Sunday", "Monday", etc.
-     */
-    UnicodeString*  fWeekdays;
-    int32_t         fWeekdaysCount;
-
-    /**
-     * Short weekday strings. For example: "Sun", "Mon", etc.
-     */
-    UnicodeString*  fShortWeekdays;
-    int32_t         fShortWeekdaysCount;
-
-    /**
-     * Narrow weekday strings. For example: "Sun", "Mon", etc.
-     */
-    UnicodeString*  fNarrowWeekdays;
-    int32_t         fNarrowWeekdaysCount;
-
-    /**
-     * Standalone Weekday strings. For example: "Sunday", "Monday", etc.
-     */
-    UnicodeString*  fStandaloneWeekdays;
-    int32_t         fStandaloneWeekdaysCount;
-
-    /**
-     * Standalone Short weekday strings. For example: "Sun", "Mon", etc.
-     */
-    UnicodeString*  fStandaloneShortWeekdays;
-    int32_t         fStandaloneShortWeekdaysCount;
-
-    /**
-     * Standalone Narrow weekday strings. For example: "Sun", "Mon", etc.
-     */
-    UnicodeString*  fStandaloneNarrowWeekdays;
-    int32_t         fStandaloneNarrowWeekdaysCount;
-
-    /**
-     * Ampm strings. For example: "AM" and "PM".
-     */
-    UnicodeString*  fAmPms;
-    int32_t         fAmPmsCount;
-
-    /**
-     * Quarter strings. For example: "1st quarter", "2nd quarter", etc.
-     */
-    UnicodeString  *fQuarters;
-    int32_t         fQuartersCount;
-
-    /**
-     * Short quarters. For example: "Q1", "Q2", etc.
-     */
-    UnicodeString  *fShortQuarters;
-    int32_t         fShortQuartersCount;
-
-    /**
-     * Standalone quarter strings. For example: "1st quarter", "2nd quarter", etc.
-     */
-    UnicodeString  *fStandaloneQuarters;
-    int32_t         fStandaloneQuartersCount;
-
-    /**
-     * Standalone short quarter strings. For example: "Q1", "Q2", etc.
-     */
-    UnicodeString  *fStandaloneShortQuarters;
-    int32_t         fStandaloneShortQuartersCount;
-
-    /**
-     * The format data of all the timezones in this locale.
-     */
-    UnicodeString   **fZoneStrings;         // Zone string array set by setZoneStrings
-    UnicodeString   **fLocaleZoneStrings;   // Zone string array created by the locale
-    int32_t         fZoneStringsRowCount;
-    int32_t         fZoneStringsColCount;
-
-    const ZoneStringFormat  *fZoneStringFormat;
-    ZoneStringFormat        *fZSFLocal;         // Local ZoneStringFormat instance
-    SafeZoneStringFormatPtr *fZSFCachePtr;      // Cached ZoneStringFormat
-    Locale                  fZSFLocale;         // Locale used for getting ZoneStringFormat
-
-    /**
-     * Pattern string used for localized time zone GMT format.  For example, "GMT{0}"
-     */
-    UnicodeString   fGmtFormat;
-
-    /**
-     * Pattern strings used for formatting zone offset in a localized time zone GMT string.
-     */
-    UnicodeString  *fGmtHourFormats;
-    int32_t         fGmtHourFormatsCount; 
-
-    enum GMTHourType {
-        GMT_NEGATIVE_HMS = 0,
-        GMT_NEGATIVE_HM,
-        GMT_POSITIVE_HMS,
-        GMT_POSITIVE_HM,
-        GMT_HOUR_COUNT
-    };
-
-    /**
-     * Localized date-time pattern characters. For example: use 'u' as 'y'.
-     */
-    UnicodeString   fLocalPatternChars;
-
-private:
-    /** valid/actual locale information 
-     *  these are always ICU locales, so the length should not be a problem
-     */
-    char validLocale[ULOC_FULLNAME_CAPACITY];
-    char actualLocale[ULOC_FULLNAME_CAPACITY];
-
-    DateFormatSymbols(); // default constructor not implemented
-
-    /**
-     * Called by the constructors to actually load data from the resources
-     *
-     * @param locale               The locale to get symbols for.
-     * @param type                 Calendar Type (as from Calendar::getType())
-     * @param status               Input/output parameter, set to success or
-     *                             failure code upon return.
-     * @param useLastResortData    determine if use last resort data
-     */
-    void initializeData(const Locale&, const char *type, UErrorCode& status, UBool useLastResortData = FALSE);
-
-    /**
-     * Copy or alias an array in another object, as appropriate.
-     *
-     * @param dstArray    the copy destination array.
-     * @param dstCount    fill in with the lenth of 'dstArray'.
-     * @param srcArray    the source array to be copied.
-     * @param srcCount    the length of items to be copied from the 'srcArray'.
-     */
-    static void assignArray(UnicodeString*& dstArray,
-                            int32_t& dstCount,
-                            const UnicodeString* srcArray,
-                            int32_t srcCount);
-
-    /**
-     * Return true if the given arrays' contents are equal, or if the arrays are
-     * identical (pointers are equal).
-     *
-     * @param array1   one array to be compared with.
-     * @param array2   another array to be compared with.
-     * @param count    the length of items to be copied.
-     * @return         true if the given arrays' contents are equal, or if the arrays are
-     *                 identical (pointers are equal).
-     */
-    static UBool arrayCompare(const UnicodeString* array1,
-                             const UnicodeString* array2,
-                             int32_t count);
-
-    /**
-     * Create a copy, in fZoneStrings, of the given zone strings array. The
-     * member variables fZoneStringsRowCount and fZoneStringsColCount should be
-     * set already by the caller.
-     */
-    void createZoneStrings(const UnicodeString *const * otherStrings);
-
-    /**
-     * Delete all the storage owned by this object.
-     */
-    void dispose(void);
-
-    /**
-     * Copy all of the other's data to this.
-     * @param other the object to be copied.
-     */
-    void copyData(const DateFormatSymbols& other);
-
-
-    /**
-     * Returns a ZoneStringFormat, used only by SimpleDateFormat for now.
-     */
-    const ZoneStringFormat* getZoneStringFormat(void) const;
-
-    /**
-     * Create a ZoneStringFormat by locale if not yet availble
-     */
-    void initZoneStringFormat(void);
-
-    /**
-     * Create zone strings array by locale if not yet available
-     */
-    void initZoneStringsArray(void);
-
-    /**
-     * Delete just the zone strings.
-     */
-    void disposeZoneStrings(void);
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _DTFMTSYM
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/dtfmtsym.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/dtfmtsym.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/dtfmtsym.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/dtfmtsym.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,721 @@
+/*  
+********************************************************************************
+*   Copyright (C) 1997-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+********************************************************************************
+*
+* File DTFMTSYM.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   02/19/97    aliu        Converted from java.
+*    07/21/98    stephen        Added getZoneIndex()
+*                            Changed to match C++ conventions
+********************************************************************************
+*/
+     
+#ifndef DTFMTSYM_H
+#define DTFMTSYM_H
+ 
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/calendar.h"
+#include "unicode/uobject.h"
+#include "unicode/locid.h"
+#include "unicode/ures.h"
+
+/**
+ * \file 
+ * \brief C++ API: Symbols for formatting dates.
+ */
+
+U_NAMESPACE_BEGIN
+
+/* forward declaration */
+class SimpleDateFormat;
+class Hashtable;
+class ZoneStringFormat;
+class SafeZoneStringFormatPtr;
+
+/**
+ * DateFormatSymbols is a public class for encapsulating localizable date-time
+ * formatting data -- including timezone data. DateFormatSymbols is used by
+ * DateFormat and SimpleDateFormat.
+ * <P>
+ * Rather than first creating a DateFormatSymbols to get a date-time formatter
+ * by using a SimpleDateFormat constructor, clients are encouraged to create a
+ * date-time formatter using the getTimeInstance(), getDateInstance(), or
+ * getDateTimeInstance() method in DateFormat. Each of these methods can return a
+ * date/time formatter initialized with a default format pattern along with the
+ * date-time formatting data for a given or default locale. After a formatter is
+ * created, clients may modify the format pattern using the setPattern function
+ * as so desired. For more information on using these formatter factory
+ * functions, see DateFormat.
+ * <P>
+ * If clients decide to create a date-time formatter with a particular format
+ * pattern and locale, they can do so with new SimpleDateFormat(aPattern,
+ * new DateFormatSymbols(aLocale)).  This will load the appropriate date-time
+ * formatting data from the locale.
+ * <P>
+ * DateFormatSymbols objects are clonable. When clients obtain a
+ * DateFormatSymbols object, they can feel free to modify the date-time
+ * formatting data as necessary. For instance, clients can
+ * replace the localized date-time format pattern characters with the ones that
+ * they feel easy to remember. Or they can change the representative cities
+ * originally picked by default to using their favorite ones.
+ * <P>
+ * DateFormatSymbols are not expected to be subclassed. Data for a calendar is
+ * loaded out of resource bundles.  The 'type' parameter indicates the type of
+ * calendar, for example, "gregorian" or "japanese".  If the type is not gregorian
+ * (or NULL, or an empty string) then the type is appended to the resource name,
+ * for example,  'Eras_japanese' instead of 'Eras'.   If the resource 'Eras_japanese' did
+ * not exist (even in root), then this class will fall back to just 'Eras', that is,
+ * Gregorian data.  Therefore, the calendar implementor MUST ensure that the root
+ * locale at least contains any resources that are to be particularized for the
+ * calendar type.
+ */
+class U_I18N_API DateFormatSymbols : public UObject {
+public:
+    /**
+     * Construct a DateFormatSymbols object by loading format data from
+     * resources for the default locale, in the default calendar (Gregorian).
+     * <P>
+     * NOTE: This constructor will never fail; if it cannot get resource
+     * data for the default locale, it will return a last-resort object
+     * based on hard-coded strings.
+     *
+     * @param status    Status code.  Failure
+     *                  results if the resources for the default cannot be
+     *                  found or cannot be loaded
+     * @stable ICU 2.0
+     */
+    DateFormatSymbols(UErrorCode& status);
+
+    /**
+     * Construct a DateFormatSymbols object by loading format data from
+     * resources for the given locale, in the default calendar (Gregorian).
+     *
+     * @param locale    Locale to load format data from.
+     * @param status    Status code.  Failure
+     *                  results if the resources for the locale cannot be
+     *                  found or cannot be loaded
+     * @stable ICU 2.0
+     */
+    DateFormatSymbols(const Locale& locale,
+                      UErrorCode& status);
+
+    /**
+     * Construct a DateFormatSymbols object by loading format data from
+     * resources for the default locale, in the default calendar (Gregorian).
+     * <P>
+     * NOTE: This constructor will never fail; if it cannot get resource
+     * data for the default locale, it will return a last-resort object
+     * based on hard-coded strings.
+     *
+     * @param type      Type of calendar (as returned by Calendar::getType). 
+     *                  Will be used to access the correct set of strings.
+     *                  (NULL or empty string defaults to "gregorian".)
+     * @param status    Status code.  Failure
+     *                  results if the resources for the default cannot be
+     *                  found or cannot be loaded
+     * @internal
+     */
+    DateFormatSymbols(const char *type, UErrorCode& status);
+
+    /**
+     * Construct a DateFormatSymbols object by loading format data from
+     * resources for the given locale, in the default calendar (Gregorian).
+     *
+     * @param locale    Locale to load format data from.
+     * @param type      Type of calendar (as returned by Calendar::getType). 
+     *                  Will be used to access the correct set of strings.
+     *                  (NULL or empty string defaults to "gregorian".)
+     * @param status    Status code.  Failure
+     *                  results if the resources for the locale cannot be
+     *                  found or cannot be loaded
+     * @internal
+     */
+    DateFormatSymbols(const Locale& locale,
+                      const char *type,
+                      UErrorCode& status);
+
+    /**
+     * Copy constructor.
+     * @stable ICU 2.0
+     */
+    DateFormatSymbols(const DateFormatSymbols&);
+
+    /**
+     * Assignment operator.
+     * @stable ICU 2.0
+     */
+    DateFormatSymbols& operator=(const DateFormatSymbols&);
+
+    /**
+     * Destructor. This is nonvirtual because this class is not designed to be
+     * subclassed.
+     * @stable ICU 2.0
+     */
+    virtual ~DateFormatSymbols();
+
+    /**
+     * Return true if another object is semantically equal to this one.
+     *
+     * @param other    the DateFormatSymbols object to be compared with.
+     * @return         true if other is semantically equal to this.
+     * @stable ICU 2.0
+     */
+    UBool operator==(const DateFormatSymbols& other) const;
+
+    /**
+     * Return true if another object is semantically unequal to this one.
+     *
+     * @param other    the DateFormatSymbols object to be compared with.
+     * @return         true if other is semantically unequal to this.
+     * @stable ICU 2.0
+     */
+    UBool operator!=(const DateFormatSymbols& other) const { return !operator==(other); }
+
+    /**
+     * Gets abbreviated era strings. For example: "AD" and "BC".
+     *
+     * @param count    Filled in with length of the array.
+     * @return         the era strings.
+     * @stable ICU 2.0
+     */
+    const UnicodeString* getEras(int32_t& count) const;
+
+    /**
+     * Sets abbreviated era strings. For example: "AD" and "BC".
+     * @param eras  Array of era strings (DateFormatSymbols retains ownership.)
+     * @param count Filled in with length of the array.
+     * @stable ICU 2.0
+     */
+    void setEras(const UnicodeString* eras, int32_t count);
+
+    /**
+     * Gets era name strings. For example: "Anno Domini" and "Before Christ".
+     *
+     * @param count    Filled in with length of the array.
+     * @return         the era name strings.
+     * @stable ICU 3.4
+     */
+    const UnicodeString* getEraNames(int32_t& count) const;
+
+    /**
+     * Sets era name strings. For example: "Anno Domini" and "Before Christ".
+     * @param eraNames  Array of era name strings (DateFormatSymbols retains ownership.)
+     * @param count Filled in with length of the array.
+     * @stable ICU 3.6
+     */
+    void setEraNames(const UnicodeString* eraNames, int32_t count);
+
+    /**
+     * Gets narrow era strings. For example: A" and "D".
+     *
+     * @param count    Filled in with length of the array.
+     * @return         the narrow era strings.
+     * @draft ICU 4.2
+     */
+    const UnicodeString* getNarrowEras(int32_t& count) const;
+
+    /**
+     * Sets narrow era strings. For example: "A" and "B".
+     * @param narrowEras  Array of narrow era strings (DateFormatSymbols retains ownership.)
+     * @param count Filled in with length of the array.
+     * @draft ICU 4.2
+     */
+    void setNarrowEras(const UnicodeString* narrowEras, int32_t count);
+
+    /**
+     * Gets month strings. For example: "January", "February", etc.
+     * @param count Filled in with length of the array.
+     * @return the month strings. (DateFormatSymbols retains ownership.)
+     * @stable ICU 2.0
+     */
+    const UnicodeString* getMonths(int32_t& count) const;
+
+    /**
+     * Sets month strings. For example: "January", "February", etc.
+     *
+     * @param months    the new month strings. (not adopted; caller retains ownership)
+     * @param count     Filled in with length of the array.
+     * @stable ICU 2.0
+     */
+    void setMonths(const UnicodeString* months, int32_t count);
+
+    /**
+     * Gets short month strings. For example: "Jan", "Feb", etc.
+     *
+     * @param count Filled in with length of the array.
+     * @return the short month strings. (DateFormatSymbols retains ownership.)
+     * @stable ICU 2.0
+     */
+    const UnicodeString* getShortMonths(int32_t& count) const;
+
+    /**
+     * Sets short month strings. For example: "Jan", "Feb", etc.
+     * @param count        Filled in with length of the array.
+     * @param shortMonths  the new short month strings. (not adopted; caller retains ownership)
+     * @stable ICU 2.0
+     */
+    void setShortMonths(const UnicodeString* shortMonths, int32_t count);
+
+    /**
+     * Selector for date formatting context
+     * @stable ICU 3.6
+     */
+    enum DtContextType {
+         FORMAT,
+         STANDALONE,
+         DT_CONTEXT_COUNT
+    };
+
+    /**
+     * Selector for date formatting width
+     * @stable ICU 3.6
+     */
+    enum DtWidthType {
+         ABBREVIATED,
+         WIDE,
+         NARROW,
+         DT_WIDTH_COUNT
+    };
+
+    /**
+     * Gets month strings by width and context. For example: "January", "February", etc.
+     * @param count Filled in with length of the array.
+     * @param context The formatting context, either FORMAT or STANDALONE
+     * @param width   The width of returned strings, either WIDE, ABBREVIATED, or NARROW.
+     * @return the month strings. (DateFormatSymbols retains ownership.)
+     * @stable ICU 3.4
+     */
+    const UnicodeString* getMonths(int32_t& count, DtContextType context, DtWidthType width) const;
+
+    /**
+     * Sets month strings by width and context. For example: "January", "February", etc.
+     *
+     * @param months  The new month strings. (not adopted; caller retains ownership)
+     * @param count   Filled in with length of the array.
+     * @param context The formatting context, either FORMAT or STANDALONE
+     * @param width   The width of returned strings, either WIDE, ABBREVIATED, or NARROW.
+     * @stable ICU 3.6
+     */
+    void setMonths(const UnicodeString* months, int32_t count, DtContextType context, DtWidthType width);
+
+    /**
+     * Gets weekday strings. For example: "Sunday", "Monday", etc.
+     * @param count        Filled in with length of the array.
+     * @return the weekday strings. (DateFormatSymbols retains ownership.)
+     * @stable ICU 2.0
+     */
+    const UnicodeString* getWeekdays(int32_t& count) const;
+
+
+    /**
+     * Sets weekday strings. For example: "Sunday", "Monday", etc.
+     * @param weekdays     the new weekday strings. (not adopted; caller retains ownership)
+     * @param count        Filled in with length of the array.
+     * @stable ICU 2.0
+     */
+    void setWeekdays(const UnicodeString* weekdays, int32_t count);
+
+    /**
+     * Gets short weekday strings. For example: "Sun", "Mon", etc.
+     * @param count        Filled in with length of the array.
+     * @return             the short weekday strings. (DateFormatSymbols retains ownership.)
+     * @stable ICU 2.0
+     */
+    const UnicodeString* getShortWeekdays(int32_t& count) const;
+
+    /**
+     * Sets short weekday strings. For example: "Sun", "Mon", etc.
+     * @param shortWeekdays  the new short weekday strings. (not adopted; caller retains ownership)
+     * @param count          Filled in with length of the array.
+     * @stable ICU 2.0
+     */
+    void setShortWeekdays(const UnicodeString* shortWeekdays, int32_t count);
+
+    /**
+     * Gets weekday strings by width and context. For example: "Sunday", "Monday", etc.
+     * @param count   Filled in with length of the array.
+     * @param context The formatting context, either FORMAT or STANDALONE
+     * @param width   The width of returned strings, either WIDE, ABBREVIATED, or NARROW
+     * @return the month strings. (DateFormatSymbols retains ownership.)
+     * @stable ICU 3.4
+     */
+    const UnicodeString* getWeekdays(int32_t& count, DtContextType context, DtWidthType width) const;
+
+    /**
+     * Sets weekday strings by width and context. For example: "Sunday", "Monday", etc.
+     * @param weekdays  The new weekday strings. (not adopted; caller retains ownership)
+     * @param count     Filled in with length of the array.
+     * @param context   The formatting context, either FORMAT or STANDALONE
+     * @param width     The width of returned strings, either WIDE, ABBREVIATED, or NARROW
+     * @stable ICU 3.6
+     */
+    void setWeekdays(const UnicodeString* weekdays, int32_t count, DtContextType context, DtWidthType width);
+
+    /**
+     * Gets quarter strings by width and context. For example: "1st Quarter", "2nd Quarter", etc.
+     * @param count Filled in with length of the array.
+     * @param context The formatting context, either FORMAT or STANDALONE
+     * @param width   The width of returned strings, either WIDE or ABBREVIATED. There
+     *                are no NARROW quarters.
+     * @return the quarter strings. (DateFormatSymbols retains ownership.)
+     * @stable ICU 3.6
+     */
+    const UnicodeString* getQuarters(int32_t& count, DtContextType context, DtWidthType width) const;
+
+    /**
+     * Sets quarter strings by width and context. For example: "1st Quarter", "2nd Quarter", etc.
+     *
+     * @param quarters  The new quarter strings. (not adopted; caller retains ownership)
+     * @param count   Filled in with length of the array.
+     * @param context The formatting context, either FORMAT or STANDALONE
+     * @param width   The width of returned strings, either WIDE or ABBREVIATED. There
+     *                are no NARROW quarters.
+     * @stable ICU 3.6
+     */
+    void setQuarters(const UnicodeString* quarters, int32_t count, DtContextType context, DtWidthType width);
+
+    /**
+     * Gets AM/PM strings. For example: "AM" and "PM".
+     * @param count        Filled in with length of the array.
+     * @return             the weekday strings. (DateFormatSymbols retains ownership.)
+     * @stable ICU 2.0
+     */
+    const UnicodeString* getAmPmStrings(int32_t& count) const;
+
+    /**
+     * Sets ampm strings. For example: "AM" and "PM".
+     * @param ampms        the new ampm strings. (not adopted; caller retains ownership)
+     * @param count        Filled in with length of the array.
+     * @stable ICU 2.0
+     */
+    void setAmPmStrings(const UnicodeString* ampms, int32_t count);
+
+    /**
+     * Gets timezone strings. These strings are stored in a 2-dimensional array.
+     * @param rowCount      Output param to receive number of rows.
+     * @param columnCount   Output param to receive number of columns.
+     * @return              The timezone strings as a 2-d array. (DateFormatSymbols retains ownership.)
+     * @deprecated ICU 3.6
+     */
+    const UnicodeString** getZoneStrings(int32_t& rowCount, int32_t& columnCount) const;
+
+    /**
+     * Sets timezone strings. These strings are stored in a 2-dimensional array.
+     * @param strings       The timezone strings as a 2-d array to be copied. (not adopted; caller retains ownership)
+     * @param rowCount      The number of rows (count of first index).
+     * @param columnCount   The number of columns (count of second index).
+     * @stable ICU 2.0
+     */
+    void setZoneStrings(const UnicodeString* const* strings, int32_t rowCount, int32_t columnCount);
+
+    /**
+     * Get the non-localized date-time pattern characters.
+     * @return    the non-localized date-time pattern characters
+     * @stable ICU 2.0
+     */
+    static const UChar * U_EXPORT2 getPatternUChars(void);
+
+    /**
+     * Gets localized date-time pattern characters. For example: 'u', 't', etc.
+     * <p>
+     * Note: ICU no longer provides localized date-time pattern characters for a locale
+     * starting ICU 3.8.  This method returns the non-localized date-time pattern
+     * characters unless user defined localized data is set by setLocalPatternChars.
+     * @param result    Output param which will receive the localized date-time pattern characters.
+     * @return          A reference to 'result'.
+     * @stable ICU 2.0
+     */
+    UnicodeString& getLocalPatternChars(UnicodeString& result) const;
+
+    /**
+     * Sets localized date-time pattern characters. For example: 'u', 't', etc.
+     * @param newLocalPatternChars the new localized date-time
+     * pattern characters.
+     * @stable ICU 2.0
+     */
+    void setLocalPatternChars(const UnicodeString& newLocalPatternChars);
+
+    /**
+     * Returns the locale for this object. Two flavors are available:
+     * valid and actual locale.
+     * @stable ICU 2.8
+     */
+    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.2
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+
+    friend class SimpleDateFormat;
+    friend class DateFormatSymbolsSingleSetter; // see udat.cpp
+
+    /**
+     * Abbreviated era strings. For example: "AD" and "BC".
+     */
+    UnicodeString*  fEras;
+    int32_t         fErasCount;
+
+    /**
+     * Era name strings. For example: "Anno Domini" and "Before Christ".
+     */
+    UnicodeString*  fEraNames;
+    int32_t         fEraNamesCount;
+
+    /**
+     * Narrow era strings. For example: "A" and "B".
+     */
+    UnicodeString*  fNarrowEras;
+    int32_t         fNarrowErasCount;
+
+    /**
+     * Month strings. For example: "January", "February", etc.
+     */
+    UnicodeString*  fMonths;
+    int32_t         fMonthsCount;
+
+    /**
+     * Short month strings. For example: "Jan", "Feb", etc.
+     */
+    UnicodeString*  fShortMonths;
+    int32_t         fShortMonthsCount;
+
+    /**
+     * Narrow month strings. For example: "J", "F", etc.
+     */
+    UnicodeString*  fNarrowMonths;
+    int32_t         fNarrowMonthsCount;
+
+    /**
+     * Standalone Month strings. For example: "January", "February", etc.
+     */
+    UnicodeString*  fStandaloneMonths;
+    int32_t         fStandaloneMonthsCount;
+
+    /**
+     * Standalone Short month strings. For example: "Jan", "Feb", etc.
+     */
+    UnicodeString*  fStandaloneShortMonths;
+    int32_t         fStandaloneShortMonthsCount;
+
+    /**
+     * Standalone Narrow month strings. For example: "J", "F", etc.
+     */
+    UnicodeString*  fStandaloneNarrowMonths;
+    int32_t         fStandaloneNarrowMonthsCount;
+
+    /**
+     * Weekday strings. For example: "Sunday", "Monday", etc.
+     */
+    UnicodeString*  fWeekdays;
+    int32_t         fWeekdaysCount;
+
+    /**
+     * Short weekday strings. For example: "Sun", "Mon", etc.
+     */
+    UnicodeString*  fShortWeekdays;
+    int32_t         fShortWeekdaysCount;
+
+    /**
+     * Narrow weekday strings. For example: "Sun", "Mon", etc.
+     */
+    UnicodeString*  fNarrowWeekdays;
+    int32_t         fNarrowWeekdaysCount;
+
+    /**
+     * Standalone Weekday strings. For example: "Sunday", "Monday", etc.
+     */
+    UnicodeString*  fStandaloneWeekdays;
+    int32_t         fStandaloneWeekdaysCount;
+
+    /**
+     * Standalone Short weekday strings. For example: "Sun", "Mon", etc.
+     */
+    UnicodeString*  fStandaloneShortWeekdays;
+    int32_t         fStandaloneShortWeekdaysCount;
+
+    /**
+     * Standalone Narrow weekday strings. For example: "Sun", "Mon", etc.
+     */
+    UnicodeString*  fStandaloneNarrowWeekdays;
+    int32_t         fStandaloneNarrowWeekdaysCount;
+
+    /**
+     * Ampm strings. For example: "AM" and "PM".
+     */
+    UnicodeString*  fAmPms;
+    int32_t         fAmPmsCount;
+
+    /**
+     * Quarter strings. For example: "1st quarter", "2nd quarter", etc.
+     */
+    UnicodeString  *fQuarters;
+    int32_t         fQuartersCount;
+
+    /**
+     * Short quarters. For example: "Q1", "Q2", etc.
+     */
+    UnicodeString  *fShortQuarters;
+    int32_t         fShortQuartersCount;
+
+    /**
+     * Standalone quarter strings. For example: "1st quarter", "2nd quarter", etc.
+     */
+    UnicodeString  *fStandaloneQuarters;
+    int32_t         fStandaloneQuartersCount;
+
+    /**
+     * Standalone short quarter strings. For example: "Q1", "Q2", etc.
+     */
+    UnicodeString  *fStandaloneShortQuarters;
+    int32_t         fStandaloneShortQuartersCount;
+
+    /**
+     * The format data of all the timezones in this locale.
+     */
+    UnicodeString   **fZoneStrings;         // Zone string array set by setZoneStrings
+    UnicodeString   **fLocaleZoneStrings;   // Zone string array created by the locale
+    int32_t         fZoneStringsRowCount;
+    int32_t         fZoneStringsColCount;
+
+    const ZoneStringFormat  *fZoneStringFormat;
+    ZoneStringFormat        *fZSFLocal;         // Local ZoneStringFormat instance
+    SafeZoneStringFormatPtr *fZSFCachePtr;      // Cached ZoneStringFormat
+    Locale                  fZSFLocale;         // Locale used for getting ZoneStringFormat
+
+    /**
+     * Pattern string used for localized time zone GMT format.  For example, "GMT{0}"
+     */
+    UnicodeString   fGmtFormat;
+
+    /**
+     * Pattern strings used for formatting zone offset in a localized time zone GMT string.
+     */
+    UnicodeString  *fGmtHourFormats;
+    int32_t         fGmtHourFormatsCount; 
+
+    enum GMTHourType {
+        GMT_NEGATIVE_HMS = 0,
+        GMT_NEGATIVE_HM,
+        GMT_POSITIVE_HMS,
+        GMT_POSITIVE_HM,
+        GMT_HOUR_COUNT
+    };
+
+    /**
+     * Localized date-time pattern characters. For example: use 'u' as 'y'.
+     */
+    UnicodeString   fLocalPatternChars;
+
+private:
+    /** valid/actual locale information 
+     *  these are always ICU locales, so the length should not be a problem
+     */
+    char validLocale[ULOC_FULLNAME_CAPACITY];
+    char actualLocale[ULOC_FULLNAME_CAPACITY];
+
+    DateFormatSymbols(); // default constructor not implemented
+
+    /**
+     * Called by the constructors to actually load data from the resources
+     *
+     * @param locale               The locale to get symbols for.
+     * @param type                 Calendar Type (as from Calendar::getType())
+     * @param status               Input/output parameter, set to success or
+     *                             failure code upon return.
+     * @param useLastResortData    determine if use last resort data
+     */
+    void initializeData(const Locale&, const char *type, UErrorCode& status, UBool useLastResortData = FALSE);
+
+    /**
+     * Copy or alias an array in another object, as appropriate.
+     *
+     * @param dstArray    the copy destination array.
+     * @param dstCount    fill in with the lenth of 'dstArray'.
+     * @param srcArray    the source array to be copied.
+     * @param srcCount    the length of items to be copied from the 'srcArray'.
+     */
+    static void assignArray(UnicodeString*& dstArray,
+                            int32_t& dstCount,
+                            const UnicodeString* srcArray,
+                            int32_t srcCount);
+
+    /**
+     * Return true if the given arrays' contents are equal, or if the arrays are
+     * identical (pointers are equal).
+     *
+     * @param array1   one array to be compared with.
+     * @param array2   another array to be compared with.
+     * @param count    the length of items to be copied.
+     * @return         true if the given arrays' contents are equal, or if the arrays are
+     *                 identical (pointers are equal).
+     */
+    static UBool arrayCompare(const UnicodeString* array1,
+                             const UnicodeString* array2,
+                             int32_t count);
+
+    /**
+     * Create a copy, in fZoneStrings, of the given zone strings array. The
+     * member variables fZoneStringsRowCount and fZoneStringsColCount should be
+     * set already by the caller.
+     */
+    void createZoneStrings(const UnicodeString *const * otherStrings);
+
+    /**
+     * Delete all the storage owned by this object.
+     */
+    void dispose(void);
+
+    /**
+     * Copy all of the other's data to this.
+     * @param other the object to be copied.
+     */
+    void copyData(const DateFormatSymbols& other);
+
+
+    /**
+     * Returns a ZoneStringFormat, used only by SimpleDateFormat for now.
+     */
+    const ZoneStringFormat* getZoneStringFormat(void) const;
+
+    /**
+     * Create a ZoneStringFormat by locale if not yet availble
+     */
+    void initZoneStringFormat(void);
+
+    /**
+     * Create zone strings array by locale if not yet available
+     */
+    void initZoneStringsArray(void);
+
+    /**
+     * Delete just the zone strings.
+     */
+    void disposeZoneStrings(void);
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _DTFMTSYM
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/dtintrv.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/dtintrv.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/dtintrv.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,159 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 2008, International Business Machines Corporation and
-* others. All Rights Reserved.
-*******************************************************************************
-*
-* File DTINTRV.H 
-*
-*******************************************************************************
-*/
-
-#ifndef __DTINTRV_H__
-#define __DTINTRV_H__
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-
-/**
- * \file
- * \brief C++ API: Date Interval data type
- */
-
-
-U_NAMESPACE_BEGIN
-
-
-/**
- * This class represents a date interval.
- * It is a pair of UDate representing from UDate 1 to UDate 2.
- * @draft ICU 4.0
-**/
-class U_COMMON_API DateInterval : public UObject {
-public:
-
-    /** 
-     * Construct a DateInterval given a from date and a to date.
-     * @param fromDate  The from date in date interval.
-     * @param toDate    The to date in date interval.
-     * @draft ICU 4.0
-     */
-    DateInterval(UDate fromDate, UDate toDate);
-
-    /**
-     * destructor
-     * @draft ICU 4.0
-     */
-    virtual ~DateInterval();
- 
-    /** 
-     * Get the from date.
-     * @return  the from date in dateInterval.
-     * @draft ICU 4.0
-     */
-    UDate getFromDate() const;
-
-    /** 
-     * Get the to date.
-     * @return  the to date in dateInterval.
-     * @draft ICU 4.0
-     */
-    UDate getToDate() const;
-
-
-    /**
-     * Return the class ID for this class. This is useful only for comparing to
-     * a return value from getDynamicClassID(). For example:
-     * <pre>
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       erived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @draft ICU 4.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
-     * method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone()
-     * methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @draft ICU 4.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-
-    
-    /**
-     * Copy constructor.
-     * @draft ICU 4.0
-     */
-    DateInterval(const DateInterval& other);
-
-    /**
-     * Default assignment operator
-     * @draft ICU 4.0
-     */
-    DateInterval& operator=(const DateInterval&);
-
-    /**
-     * Equality operator.
-     * @return TRUE if the two DateIntervals are the same
-     * @draft ICU 4.0
-     */
-    virtual UBool operator==(const DateInterval& other) const;
-
-    /**
-     * Non-equality operator
-     * @return TRUE if the two DateIntervals are not the same
-     * @draft ICU 4.0
-     */
-    UBool operator!=(const DateInterval& other) const;
-
-
-    /**
-     * clone this object. 
-     * The caller owns the result and should delete it when done.
-     * @return a cloned DateInterval
-     * @draft ICU 4.0
-     */
-     virtual DateInterval* clone() const;
-
-private:
-    /** 
-     * Default constructor, not implemented.
-     * @draft ICU 4.0
-     */
-    DateInterval();
-
-    UDate fromDate;
-    UDate toDate;
-
-} ;// end class DateInterval
-
-
-inline UDate 
-DateInterval::getFromDate() const { 
-    return fromDate; 
-}
-
-
-inline UDate 
-DateInterval::getToDate() const { 
-    return toDate; 
-}
-
-
-inline UBool 
-DateInterval::operator!=(const DateInterval& other) const { 
-    return ( !operator==(other) );
-}
-
-
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/dtintrv.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/dtintrv.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/dtintrv.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/dtintrv.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,159 @@
+/*
+*******************************************************************************
+* Copyright (C) 2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTINTRV.H 
+*
+*******************************************************************************
+*/
+
+#ifndef __DTINTRV_H__
+#define __DTINTRV_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Date Interval data type
+ */
+
+
+U_NAMESPACE_BEGIN
+
+
+/**
+ * This class represents a date interval.
+ * It is a pair of UDate representing from UDate 1 to UDate 2.
+ * @draft ICU 4.0
+**/
+class U_COMMON_API DateInterval : public UObject {
+public:
+
+    /** 
+     * Construct a DateInterval given a from date and a to date.
+     * @param fromDate  The from date in date interval.
+     * @param toDate    The to date in date interval.
+     * @draft ICU 4.0
+     */
+    DateInterval(UDate fromDate, UDate toDate);
+
+    /**
+     * destructor
+     * @draft ICU 4.0
+     */
+    virtual ~DateInterval();
+ 
+    /** 
+     * Get the from date.
+     * @return  the from date in dateInterval.
+     * @draft ICU 4.0
+     */
+    UDate getFromDate() const;
+
+    /** 
+     * Get the to date.
+     * @return  the to date in dateInterval.
+     * @draft ICU 4.0
+     */
+    UDate getToDate() const;
+
+
+    /**
+     * Return the class ID for this class. This is useful only for comparing to
+     * a return value from getDynamicClassID(). For example:
+     * <pre>
+     * .   Base* polymorphic_pointer = createPolymorphicObject();
+     * .   if (polymorphic_pointer->getDynamicClassID() ==
+     * .       erived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @draft ICU 4.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+     * method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @draft ICU 4.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+    
+    /**
+     * Copy constructor.
+     * @draft ICU 4.0
+     */
+    DateInterval(const DateInterval& other);
+
+    /**
+     * Default assignment operator
+     * @draft ICU 4.0
+     */
+    DateInterval& operator=(const DateInterval&);
+
+    /**
+     * Equality operator.
+     * @return TRUE if the two DateIntervals are the same
+     * @draft ICU 4.0
+     */
+    virtual UBool operator==(const DateInterval& other) const;
+
+    /**
+     * Non-equality operator
+     * @return TRUE if the two DateIntervals are not the same
+     * @draft ICU 4.0
+     */
+    UBool operator!=(const DateInterval& other) const;
+
+
+    /**
+     * clone this object. 
+     * The caller owns the result and should delete it when done.
+     * @return a cloned DateInterval
+     * @draft ICU 4.0
+     */
+     virtual DateInterval* clone() const;
+
+private:
+    /** 
+     * Default constructor, not implemented.
+     * @draft ICU 4.0
+     */
+    DateInterval();
+
+    UDate fromDate;
+    UDate toDate;
+
+} ;// end class DateInterval
+
+
+inline UDate 
+DateInterval::getFromDate() const { 
+    return fromDate; 
+}
+
+
+inline UDate 
+DateInterval::getToDate() const { 
+    return toDate; 
+}
+
+
+inline UBool 
+DateInterval::operator!=(const DateInterval& other) const { 
+    return ( !operator==(other) );
+}
+
+
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/dtitvfmt.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/dtitvfmt.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/dtitvfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,949 +0,0 @@
-/********************************************************************************
-* Copyright (C) 2008, International Business Machines Corporation and others. All Rights Reserved.
-*******************************************************************************
-*
-* File DTITVFMT.H
-*
-*******************************************************************************
-*/
-
-#ifndef __DTITVFMT_H__
-#define __DTITVFMT_H__
-
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Format and parse date interval in a language-independent manner.
- */
- 
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/ucal.h"
-#include "unicode/smpdtfmt.h"
-#include "unicode/dtintrv.h"
-#include "unicode/dtitvinf.h"
-
-U_NAMESPACE_BEGIN
-
-
-
-/**
- * DateIntervalFormat is a class for formatting and parsing date 
- * intervals in a language-independent manner. 
- * Date interval formatting is supported in Gregorian calendar only.
- * And only formatting is supported. Parsing is not supported.
- *
- * <P>
- * Date interval means from one date to another date,
- * for example, from "Jan 11, 2008" to "Jan 18, 2008".
- * We introduced class DateInterval to represent it.
- * DateInterval is a pair of UDate, which is 
- * the standard milliseconds since 24:00 GMT, Jan 1, 1970.
- *
- * <P>
- * DateIntervalFormat formats a DateInterval into
- * text as compactly as possible. 
- * For example, the date interval format from "Jan 11, 2008" to "Jan 18,. 2008"
- * is "Jan 11-18, 2008" for English.
- * And it parses text into DateInterval, 
- * although initially, parsing is not supported. 
- *
- * <P>
- * There is no structural information in date time patterns. 
- * For any punctuations and string literals inside a date time pattern, 
- * we do not know whether it is just a separator, or a prefix, or a suffix. 
- * Without such information, so, it is difficult to generate a sub-pattern 
- * (or super-pattern) by algorithm.
- * So, formatting a DateInterval is pattern-driven. It is very
- * similar to formatting in SimpleDateFormat.
- * We introduce class DateIntervalInfo to save date interval 
- * patterns, similar to date time pattern in SimpleDateFormat.
- *
- * <P>
- * Logically, the interval patterns are mappings
- * from (skeleton, the_largest_different_calendar_field)
- * to (date_interval_pattern).
- *
- * <P>
- * A skeleton 
- * <ol>
- * <li>
- * only keeps the field pattern letter and ignores all other parts 
- * in a pattern, such as space, punctuations, and string literals.
- * </li>
- * <li>
- * hides the order of fields. 
- * </li>
- * <li>
- * might hide a field's pattern letter length.
- * </li>
- * </ol>
- *
- * For those non-digit calendar fields, the pattern letter length is 
- * important, such as MMM, MMMM, and MMMMM; EEE and EEEE, 
- * and the field's pattern letter length is honored.
- *    
- * For the digit calendar fields,  such as M or MM, d or dd, yy or yyyy, 
- * the field pattern length is ignored and the best match, which is defined 
- * in date time patterns, will be returned without honor the field pattern
- * letter length in skeleton.
- *
- * <P>
- * The calendar fields we support for interval formatting are:
- * year, month, date, day-of-week, am-pm, hour, hour-of-day, and minute.
- * Those calendar fields can be defined in the following order:
- * year >  month > date > hour (in day) >  minute 
- *  
- * The largest different calendar fields between 2 calendars is the
- * first different calendar field in above order.
- *
- * For example: the largest different calendar fields between "Jan 10, 2007" 
- * and "Feb 20, 2008" is year.
- *
- * <P>
- * For other calendar fields, the compact interval formatting is not
- * supported. And the interval format will be fall back to fall-back
- * patterns, which is mostly "{date0} - {date1}".
- *   
- * <P>
- * There is a set of pre-defined static skeleton strings.
- * There are pre-defined interval patterns for those pre-defined skeletons
- * in locales' resource files.
- * For example, for a skeleton UDAT_YEAR_ABBR_MONTH_DAY, which is  "yMMMd",
- * in  en_US, if the largest different calendar field between date1 and date2 
- * is "year", the date interval pattern  is "MMM d, yyyy - MMM d, yyyy", 
- * such as "Jan 10, 2007 - Jan 10, 2008".
- * If the largest different calendar field between date1 and date2 is "month",
- * the date interval pattern is "MMM d - MMM d, yyyy",
- * such as "Jan 10 - Feb 10, 2007".
- * If the largest different calendar field between date1 and date2 is "day",
- * the date interval pattern is ""MMM d-d, yyyy", such as "Jan 10-20, 2007".
- *
- * For date skeleton, the interval patterns when year, or month, or date is 
- * different are defined in resource files.
- * For time skeleton, the interval patterns when am/pm, or hour, or minute is
- * different are defined in resource files.
- *
- * <P>
- * If a skeleton is not found in a locale's DateIntervalInfo, which means
- * the interval patterns for the skeleton is not defined in resource file,
- * the interval pattern will falls back to the interval "fallback" pattern 
- * defined in resource file.
- * If the interval "fallback" pattern is not defined, the default fall-back
- * is "{date0} - {data1}".
- *
- * <P>
- * For the combination of date and time, 
- * The rule to generate interval patterns are:
- * <ol>
- * <li>
- *    when the year, month, or day differs, falls back to fall-back
- *    interval pattern, which mostly is the concatenate the two original 
- *    expressions with a separator between, 
- *    For example, interval pattern from "Jan 10, 2007 10:10 am" 
- *    to "Jan 11, 2007 10:10am" is 
- *    "Jan 10, 2007 10:10 am - Jan 11, 2007 10:10am" 
- * </li>
- * <li>
- *    otherwise, present the date followed by the range expression 
- *    for the time.
- *    For example, interval pattern from "Jan 10, 2007 10:10 am" 
- *    to "Jan 10, 2007 11:10am" is "Jan 10, 2007 10:10 am - 11:10am" 
- * </li>
- * </ol>
- *
- *
- * <P>
- * If two dates are the same, the interval pattern is the single date pattern.
- * For example, interval pattern from "Jan 10, 2007" to "Jan 10, 2007" is 
- * "Jan 10, 2007".
- *
- * Or if the presenting fields between 2 dates have the exact same values,
- * the interval pattern is the  single date pattern. 
- * For example, if user only requests year and month,
- * the interval pattern from "Jan 10, 2007" to "Jan 20, 2007" is "Jan 2007".
- *
- * <P>
- * DateIntervalFormat needs the following information for correct 
- * formatting: time zone, calendar type, pattern, date format symbols, 
- * and date interval patterns.
- * It can be instantiated in 2 ways:
- * <ol>
- * <li>
- *    create an instance using default or given locale plus given skeleton.
- *    Users are encouraged to created date interval formatter this way and 
- *    to use the pre-defined skeleton macros, such as
- *    UDAT_YEAR_NUM_MONTH, which consists the calendar fields and
- *    the format style. 
- * </li>
- * <li>
- *    create an instance using default or given locale plus given skeleton
- *    plus a given DateIntervalInfo.
- *    This factory method is for powerful users who want to provide their own 
- *    interval patterns. 
- *    Locale provides the timezone, calendar, and format symbols information.
- *    Local plus skeleton provides full pattern information.
- *    DateIntervalInfo provides the date interval patterns.
- * </li>
- * </ol>
- *
- * <P>
- * For the calendar field pattern letter, such as G, y, M, d, a, h, H, m, s etc.
- * DateIntervalFormat uses the same syntax as that of
- * DateTime format.
- * 
- * <P>
- * Code Sample: general usage
- * <pre>
- * \code
- *   // the date interval object which the DateIntervalFormat formats on
- *   // and parses into
- *   DateInterval*  dtInterval = new DateInterval(1000*3600*24, 1000*3600*24*2);
- *   UErrorCode status = U_ZERO_ERROR;
- *   DateIntervalFormat* dtIntervalFmt = DateIntervalFormat::createInstance(
- *                           UDAT_YEAR_MONTH_DAY, 
- *                           Locale("en", "GB", ""), status);
- *   UnicodeUnicodeString dateIntervalString;
- *   FieldPosition pos = 0;
- *   // formatting
- *   dtIntervalFmt->format(dtInterval, dateIntervalUnicodeString, pos, status);
- *   delete dtIntervalFmt;
- * \endcode
- * </pre>
- */
-
-class U_I18N_API DateIntervalFormat : public Format {
-public:
-
-    /**
-     * Construct a DateIntervalFormat from skeleton and  the default locale.
-     *
-     * This is a convenient override of 
-     * createInstance(const UnicodeString& skeleton, const Locale& locale,
-     *                UErrorCode&)  
-     * with the value of locale as default locale.
-     *
-     * @param skeleton  the skeleton on which interval format based.
-     * @param status    output param set to success/failure code on exit
-     * @return          a date time interval formatter which the caller owns.
-     * @draft ICU 4.0
-     */
-    static DateIntervalFormat* U_EXPORT2 createInstance(
-                                               const UnicodeString& skeleton,
-                                               UErrorCode& status);
-
-    /**
-     * Construct a DateIntervalFormat from skeleton and a given locale.
-     * <P>
-     * In this factory method, 
-     * the date interval pattern information is load from resource files.
-     * Users are encouraged to created date interval formatter this way and 
-     * to use the pre-defined skeleton macros.
-     *
-     * <P>
-     * There are pre-defined skeletons (defined in udate.h) having predefined 
-     * interval patterns in resource files.
-     * Users are encouraged to use those macros.
-     * For example: 
-     * DateIntervalFormat::createInstance(UDAT_MONTH_DAY, status) 
-     *
-     * The given Locale provides the interval patterns.
-     * For example, for en_GB, if skeleton is UDAT_YEAR_ABBR_MONTH_WEEKDAY_DAY,
-     * which is "yMMMEEEd",
-     * the interval patterns defined in resource file to above skeleton are:
-     * "EEE, d MMM, yyyy - EEE, d MMM, yyyy" for year differs,
-     * "EEE, d MMM - EEE, d MMM, yyyy" for month differs,
-     * "EEE, d - EEE, d MMM, yyyy" for day differs,
-     * @param skeleton  the skeleton on which interval format based.
-     * @param locale    the given locale
-     * @param status    output param set to success/failure code on exit
-     * @return          a date time interval formatter which the caller owns.
-     * @draft ICU 4.0
-     */
-
-    static DateIntervalFormat* U_EXPORT2 createInstance(
-                                               const UnicodeString& skeleton,
-                                               const Locale& locale,
-                                               UErrorCode& status);
-
-    /**
-     * Construct a DateIntervalFormat from skeleton
-     *  DateIntervalInfo, and default locale.
-     *
-     * This is a convenient override of
-     * createInstance(const UnicodeString& skeleton, const Locale& locale, 
-     *                const DateIntervalInfo& dtitvinf, UErrorCode&)
-     * with the locale value as default locale.
-     *
-     * @param skeleton  the skeleton on which interval format based.
-     * @param dtitvinf  the DateIntervalInfo object. 
-     * @param status    output param set to success/failure code on exit
-     * @return          a date time interval formatter which the caller owns.
-     * @draft ICU 4.0
-     */
-    static DateIntervalFormat* U_EXPORT2 createInstance(
-                                              const UnicodeString& skeleton,
-                                              const DateIntervalInfo& dtitvinf,
-                                              UErrorCode& status);
-
-    /**
-     * Construct a DateIntervalFormat from skeleton
-     * a DateIntervalInfo, and the given locale.
-     *
-     * <P>
-     * In this factory method, user provides its own date interval pattern
-     * information, instead of using those pre-defined data in resource file. 
-     * This factory method is for powerful users who want to provide their own 
-     * interval patterns. 
-     * <P>
-     * There are pre-defined skeletons (defined in udate.h) having predefined 
-     * interval patterns in resource files.
-     * Users are encouraged to use those macros.
-     * For example: 
-     * DateIntervalFormat::createInstance(UDAT_MONTH_DAY, status) 
-     *
-     * The DateIntervalInfo provides the interval patterns.
-     * and the DateIntervalInfo ownership remains to the caller. 
-     *
-     * User are encouraged to set default interval pattern in DateIntervalInfo
-     * as well, if they want to set other interval patterns ( instead of
-     * reading the interval patterns from resource files).
-     * When the corresponding interval pattern for a largest calendar different
-     * field is not found ( if user not set it ), interval format fallback to
-     * the default interval pattern.
-     * If user does not provide default interval pattern, it fallback to
-     * "{date0} - {date1}" 
-     *
-     * @param skeleton  the skeleton on which interval format based.
-     * @param locale    the given locale
-     * @param dtitvinf  the DateIntervalInfo object.
-     * @param status    output param set to success/failure code on exit
-     * @return          a date time interval formatter which the caller owns.
-     * @draft ICU 4.0
-     */
-    static DateIntervalFormat* U_EXPORT2 createInstance(
-                                              const UnicodeString& skeleton,
-                                              const Locale& locale,
-                                              const DateIntervalInfo& dtitvinf,
-                                              UErrorCode& status);
-
-    /**
-     * Destructor.
-     * @draft ICU 4.0
-     */
-    virtual ~DateIntervalFormat();
-
-    /**
-     * Clone this Format object polymorphically. The caller owns the result and
-     * should delete it when done.
-     * @return    A copy of the object.
-     * @draft ICU 4.0
-     */
-    virtual Format* clone(void) const;
-
-    /**
-     * Return true if the given Format objects are semantically equal. Objects
-     * of different subclasses are considered unequal.
-     * @param other    the object to be compared with.
-     * @return         true if the given Format objects are semantically equal.
-     * @draft ICU 4.0
-     */
-    virtual UBool operator==(const Format& other) const;
-
-    /**
-     * Return true if the given Format objects are not semantically equal. 
-     * Objects of different subclasses are considered unequal.
-     * @param other the object to be compared with.
-     * @return      true if the given Format objects are not semantically equal.
-     * @draft ICU 4.0
-     */
-    UBool operator!=(const Format& other) const;
-
-    /**
-     * Format an object to produce a string. This method handles Formattable
-     * objects with a DateInterval type. 
-     * If a the Formattable object type is not a DateInterval,
-     * then it returns a failing UErrorCode.
-     *
-     * @param obj               The object to format. 
-     *                          Must be a DateInterval.
-     * @param appendTo          Output parameter to receive result.
-     *                          Result is appended to existing contents.
-     * @param fieldPosition     On input: an alignment field, if desired.
-     *                          On output: the offsets of the alignment field.
-     * @param status            Output param filled with success/failure status.
-     * @return                  Reference to 'appendTo' parameter.
-     * @draft ICU 4.0
-     */
-    virtual UnicodeString& format(const Formattable& obj,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& fieldPosition,
-                                  UErrorCode& status) const ;
-                                    
-                                    
-
-    /**
-     * Format a DateInterval to produce a string. 
-     *
-     * @param dtInterval        DateInterval to be formatted.
-     * @param appendTo          Output parameter to receive result.
-     *                          Result is appended to existing contents.
-     * @param fieldPosition     On input: an alignment field, if desired.
-     *                          On output: the offsets of the alignment field.
-     * @param status            Output param filled with success/failure status.
-     * @return                  Reference to 'appendTo' parameter.
-     * @draft ICU 4.0
-     */
-    UnicodeString& format(const DateInterval* dtInterval,
-                          UnicodeString& appendTo,
-                          FieldPosition& fieldPosition,
-                          UErrorCode& status) const ;
-                                    
-                                    
-    /**
-     * Format 2 Calendars to produce a string. 
-     *
-     * Note: "fromCalendar" and "toCalendar" are not const,
-     * since calendar is not const in  SimpleDateFormat::format(Calendar&),
-     *
-     * @param fromCalendar      calendar set to the from date in date interval
-     *                          to be formatted into date interval string
-     * @param toCalendar        calendar set to the to date in date interval
-     *                          to be formatted into date interval string
-     * @param appendTo          Output parameter to receive result.
-     *                          Result is appended to existing contents.
-     * @param fieldPosition     On input: an alignment field, if desired.
-     *                          On output: the offsets of the alignment field.
-     * @param status            Output param filled with success/failure status.
-     *                          Caller needs to make sure it is SUCCESS
-     *                          at the function entrance
-     * @return                  Reference to 'appendTo' parameter.
-     * @draft ICU 4.0
-     */
-    UnicodeString& format(Calendar& fromCalendar,
-                          Calendar& toCalendar,
-                          UnicodeString& appendTo,
-                          FieldPosition& fieldPosition,
-                          UErrorCode& status) const ;
-
-    /**
-     * Date interval parsing is not supported. Please do not use.
-     * <P>
-     * This method should handle parsing of
-     * date time interval strings into Formattable objects with 
-     * DateInterval type, which is a pair of UDate.
-     * <P>
-     * Before calling, set parse_pos.index to the offset you want to start
-     * parsing at in the source. After calling, parse_pos.index is the end of
-     * the text you parsed. If error occurs, index is unchanged.
-     * <P>
-     * When parsing, leading whitespace is discarded (with a successful parse),
-     * while trailing whitespace is left as is.
-     * <P>
-     * See Format::parseObject() for more.
-     *
-     * @param source    The string to be parsed into an object.
-     * @param result    Formattable to be set to the parse result.
-     *                  If parse fails, return contents are undefined.
-     * @param parse_pos The position to start parsing at. Since no parsing
-     *                  is supported, upon return this param is unchanged.
-     * @return          A newly created Formattable* object, or NULL
-     *                  on failure.  The caller owns this and should
-     *                  delete it when done.
-     * @internal ICU 4.0
-     */
-    virtual void parseObject(const UnicodeString& source,
-                             Formattable& result,
-                             ParsePosition& parse_pos) const;
-
-
-    /**
-     * Gets the date time interval patterns.
-     * @return the date time interval patterns associated with
-     * this date interval formatter.
-     * @draft ICU 4.0
-     */
-    const DateIntervalInfo* getDateIntervalInfo(void) const;
-
-
-    /**
-     * Set the date time interval patterns. 
-     * @param newIntervalPatterns   the given interval patterns to copy.
-     * @param status          output param set to success/failure code on exit
-     * @draft ICU 4.0
-     */
-    void setDateIntervalInfo(const DateIntervalInfo& newIntervalPatterns,
-                             UErrorCode& status);
-
-
-    /**
-     * Gets the date formatter
-     * @return the date formatter associated with this date interval formatter.
-     * @draft ICU 4.0
-     */
-    const DateFormat* getDateFormat(void) const;
-
-    /**
-     * Return the class ID for this class. This is useful only for comparing to
-     * a return value from getDynamicClassID(). For example:
-     * <pre>
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       erived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @draft ICU 4.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
-     * method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone()
-     * methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @draft ICU 4.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-
-protected:
-
-    /**
-     * Copy constructor.
-     * @draft ICU 4.0
-     */
-    DateIntervalFormat(const DateIntervalFormat&);
-
-    /**
-     * Assignment operator.
-     * @draft ICU 4.0
-     */
-    DateIntervalFormat& operator=(const DateIntervalFormat&);
-
-private:
-
-    /*
-     * This is for ICU internal use only. Please do not use.
-     * Save the interval pattern information.
-     * Interval pattern consists of 2 single date patterns and the separator.
-     * For example, interval pattern "MMM d - MMM d, yyyy" consists
-     * a single date pattern "MMM d", another single date pattern "MMM d, yyyy",
-     * and a separator "-".
-     * The pattern is divided into 2 parts. For above example,
-     * the first part is "MMM d - ", and the second part is "MMM d, yyyy".
-     * Also, the first date appears in an interval pattern could be 
-     * the earlier date or the later date.
-     * And such information is saved in the interval pattern as well.
-     * @internal ICU 4.0
-     */
-    struct PatternInfo {
-        UnicodeString firstPart;
-        UnicodeString secondPart;
-        /**
-         * Whether the first date in interval pattern is later date or not.
-         * Fallback format set the default ordering.
-         * And for a particular interval pattern, the order can be 
-         * overriden by prefixing the interval pattern with "latestFirst:" or 
-         * "earliestFirst:"
-         * For example, given 2 date, Jan 10, 2007 to Feb 10, 2007.
-         * if the fallback format is "{0} - {1}", 
-         * and the pattern is "d MMM - d MMM yyyy", the interval format is
-         * "10 Jan - 10 Feb, 2007".
-         * If the pattern is "latestFirst:d MMM - d MMM yyyy", 
-         * the interval format is "10 Feb - 10 Jan, 2007"
-         */
-        UBool         laterDateFirst;
-    };
-
-   
-    /**
-     * default constructor 
-     * @internal ICU 4.0
-     */
-    DateIntervalFormat();
-
-    /**
-     * Construct a DateIntervalFormat from DateFormat,
-     * a DateIntervalInfo, and skeleton.
-     * DateFormat provides the timezone, calendar,
-     * full pattern, and date format symbols information.
-     * It should be a SimpleDateFormat object which 
-     * has a pattern in it.
-     * the DateIntervalInfo provides the interval patterns.
-     *
-     * Note: the DateIntervalFormat takes ownership of both 
-     * DateFormat and DateIntervalInfo objects. 
-     * Caller should not delete them.
-     *
-     * @param dtfmt     the SimpleDateFormat object to be adopted.
-     * @param dtitvinf  the DateIntervalInfo object to be adopted.
-     * @param skeleton  the skeleton of the date formatter
-     * @param status    output param set to success/failure code on exit
-     * @internal ICU 4.0
-     */
-    DateIntervalFormat(DateFormat* dtfmt, DateIntervalInfo* dtItvInfo,
-                       const UnicodeString* skeleton, UErrorCode& status);
-
-    
-    /**
-     * Construct a DateIntervalFormat from DateFormat
-     * and a DateIntervalInfo.
-     *
-     * It is a wrapper of the constructor.
-     *
-     * @param dtfmt     the DateFormat object to be adopted.
-     * @param dtitvinf  the DateIntervalInfo object to be adopted.
-     * @param skeleton  the skeleton of this formatter.
-     * @param status    Output param set to success/failure code.
-     * @return          a date time interval formatter which the caller owns.
-     * @internal ICU 4.0
-     */
-    static DateIntervalFormat* U_EXPORT2 create(DateFormat* dtfmt,
-                                                DateIntervalInfo* dtitvinf,
-                                                const UnicodeString* skeleton,
-                                                UErrorCode& status);
-
-
-    /**
-     *  Below are for generating interval patterns locale to the formatter 
-     */
-
-
-    /**
-     * Format 2 Calendars using fall-back interval pattern
-     *
-     * The full pattern used in this fall-back format is the
-     * full pattern of the date formatter.
-     *
-     * @param fromCalendar      calendar set to the from date in date interval
-     *                          to be formatted into date interval string
-     * @param toCalendar        calendar set to the to date in date interval
-     *                          to be formatted into date interval string
-     * @param appendTo          Output parameter to receive result.
-     *                          Result is appended to existing contents.
-     * @param pos               On input: an alignment field, if desired.
-     *                          On output: the offsets of the alignment field.
-     * @param status            output param set to success/failure code on exit
-     * @return                  Reference to 'appendTo' parameter.
-     * @internal ICU 4.0
-     */
-    UnicodeString& fallbackFormat(Calendar& fromCalendar,
-                                  Calendar& toCalendar,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos,
-                                  UErrorCode& status) const;
-
-
-
-    /** 
-     * Initialize interval patterns locale to this formatter
-     * 
-     * This code is a bit complicated since 
-     * 1. the interval patterns saved in resource bundle files are interval
-     *    patterns based on date or time only.
-     *    It does not have interval patterns based on both date and time.
-     *    Interval patterns on both date and time are algorithm generated.
-     *
-     *    For example, it has interval patterns on skeleton "dMy" and "hm",
-     *    but it does not have interval patterns on skeleton "dMyhm".
-     *    
-     *    The rule to generate interval patterns for both date and time skeleton are
-     *    1) when the year, month, or day differs, concatenate the two original 
-     *    expressions with a separator between, 
-     *    For example, interval pattern from "Jan 10, 2007 10:10 am" 
-     *    to "Jan 11, 2007 10:10am" is 
-     *    "Jan 10, 2007 10:10 am - Jan 11, 2007 10:10am" 
-     *
-     *    2) otherwise, present the date followed by the range expression 
-     *    for the time.
-     *    For example, interval pattern from "Jan 10, 2007 10:10 am" 
-     *    to "Jan 10, 2007 11:10am" is 
-     *    "Jan 10, 2007 10:10 am - 11:10am" 
-     *
-     * 2. even a pattern does not request a certain calendar field,
-     *    the interval pattern needs to include such field if such fields are
-     *    different between 2 dates.
-     *    For example, a pattern/skeleton is "hm", but the interval pattern 
-     *    includes year, month, and date when year, month, and date differs.
-     * 
-     *
-     * @param status    output param set to success/failure code on exit
-     * @internal ICU 4.0 
-     */
-    void initializePattern(UErrorCode& status); 
-                              
-
-
-    /**
-     * Set fall back interval pattern given a calendar field,
-     * a skeleton, and a date time pattern generator.
-     * @param field      the largest different calendar field
-     * @param skeleton   a skeleton
-     * @param dtpng      date time pattern generator
-     * @param status     output param set to success/failure code on exit
-     * @internal ICU 4.0 
-     */
-    void setFallbackPattern(UCalendarDateFields field, 
-                            const UnicodeString& skeleton,
-                            DateTimePatternGenerator* dtpng,
-                            UErrorCode& status);
-                            
-
-
-    /** 
-     * get separated date and time skeleton from a combined skeleton.
-     *
-     * The difference between date skeleton and normalizedDateSkeleton are:
-     * 1. both 'y' and 'd' are appeared only once in normalizeDateSkeleton
-     * 2. 'E' and 'EE' are normalized into 'EEE'
-     * 3. 'MM' is normalized into 'M'
-     *
-     ** the difference between time skeleton and normalizedTimeSkeleton are:
-     * 1. both 'H' and 'h' are normalized as 'h' in normalized time skeleton,
-     * 2. 'a' is omitted in normalized time skeleton.
-     * 3. there is only one appearance for 'h', 'm','v', 'z' in normalized time
-     *    skeleton
-     *
-     *
-     *  @param skeleton               given combined skeleton.
-     *  @param date                   Output parameter for date only skeleton.
-     *  @param normalizedDate         Output parameter for normalized date only
-     *
-     *  @param time                   Output parameter for time only skeleton.
-     *  @param normalizedTime         Output parameter for normalized time only
-     *                                skeleton.
-     *
-     * @internal ICU 4.0 
-     */
-    static void  U_EXPORT2 getDateTimeSkeleton(const UnicodeString& skeleton,
-                                    UnicodeString& date,
-                                    UnicodeString& normalizedDate,
-                                    UnicodeString& time,
-                                    UnicodeString& normalizedTime);
-
-
-
-    /**
-     * Generate date or time interval pattern from resource,
-     * and set them into the interval pattern locale to this formatter.
-     *
-     * It needs to handle the following: 
-     * 1. need to adjust field width.
-     *    For example, the interval patterns saved in DateIntervalInfo
-     *    includes "dMMMy", but not "dMMMMy".
-     *    Need to get interval patterns for dMMMMy from dMMMy.
-     *    Another example, the interval patterns saved in DateIntervalInfo
-     *    includes "hmv", but not "hmz".
-     *    Need to get interval patterns for "hmz' from 'hmv'
-     *
-     * 2. there might be no pattern for 'y' differ for skeleton "Md",
-     *    in order to get interval patterns for 'y' differ,
-     *    need to look for it from skeleton 'yMd'
-     *
-     * @param dateSkeleton   normalized date skeleton
-     * @param timeSkeleton   normalized time skeleton
-     * @return               whether the resource is found for the skeleton.
-     *                       TRUE if interval pattern found for the skeleton,
-     *                       FALSE otherwise.
-     * @internal ICU 4.0
-     */
-    UBool setSeparateDateTimePtn(const UnicodeString& dateSkeleton, 
-                                 const UnicodeString& timeSkeleton);
-                                   
-
-
-
-    /**
-     * Generate interval pattern from existing resource
-     *
-     * It not only save the interval patterns,
-     * but also return the extended skeleton and its best match skeleton.
-     *
-     * @param field           largest different calendar field
-     * @param skeleton        skeleton
-     * @param bestSkeleton    the best match skeleton which has interval pattern
-     *                        defined in resource
-     * @param differenceInfo  the difference between skeleton and best skeleton
-     *         0 means the best matched skeleton is the same as input skeleton
-     *         1 means the fields are the same, but field width are different
-     *         2 means the only difference between fields are v/z,
-     *        -1 means there are other fields difference 
-     *
-     * @param extendedSkeleton      extended skeleton
-     * @param extendedBestSkeleton  extended best match skeleton
-     * @return                      whether the interval pattern is found 
-     *                              through extending skeleton or not.
-     *                              TRUE if interval pattern is found by
-     *                              extending skeleton, FALSE otherwise.
-     * @internal ICU 4.0
-     */
-    UBool setIntervalPattern(UCalendarDateFields field, 
-                             const UnicodeString* skeleton, 
-                             const UnicodeString* bestSkeleton, 
-                             int8_t differenceInfo, 
-                             UnicodeString* extendedSkeleton = NULL,
-                             UnicodeString* extendedBestSkeleton = NULL);
-
-    /**
-     * Adjust field width in best match interval pattern to match
-     * the field width in input skeleton.
-     *
-     * TODO (xji) make a general solution
-     * The adjusting rule can be:
-     * 1. always adjust
-     * 2. never adjust
-     * 3. default adjust, which means adjust according to the following rules
-     * 3.1 always adjust string, such as MMM and MMMM
-     * 3.2 never adjust between string and numeric, such as MM and MMM
-     * 3.3 always adjust year
-     * 3.4 do not adjust 'd', 'h', or 'm' if h presents
-     * 3.5 do not adjust 'M' if it is numeric(?)
-     *
-     * Since date interval format is well-formed format,
-     * date and time skeletons are normalized previously,
-     * till this stage, the adjust here is only "adjust strings, such as MMM
-     * and MMMM, EEE and EEEE.
-     *
-     * @param inputSkeleton            the input skeleton
-     * @param bestMatchSkeleton        the best match skeleton
-     * @param bestMatchIntervalpattern the best match interval pattern
-     * @param differenceInfo           the difference between 2 skeletons
-     *                                 1 means only field width differs
-     *                                 2 means v/z exchange
-     * @param adjustedIntervalPattern  adjusted interval pattern
-     * @internal ICU 4.0
-     */
-    static void U_EXPORT2 adjustFieldWidth(
-                            const UnicodeString& inputSkeleton,
-                            const UnicodeString& bestMatchSkeleton,
-                            const UnicodeString& bestMatchIntervalPattern,
-                            int8_t differenceInfo,
-                            UnicodeString& adjustedIntervalPattern);
-
-    /**
-     * Concat a single date pattern with a time interval pattern,
-     * set it into the intervalPatterns, while field is time field.
-     * This is used to handle time interval patterns on skeleton with
-     * both time and date. Present the date followed by 
-     * the range expression for the time.
-     * @param format         date and time format
-     * @param formatLen      format string length
-     * @param datePattern    date pattern
-     * @param field          time calendar field: AM_PM, HOUR, MINUTE
-     * @param status         output param set to success/failure code on exit
-     * @internal ICU 4.0 
-     */
-    void concatSingleDate2TimeInterval(const UChar* format,
-                                       int32_t formatLen,
-                                       const UnicodeString& datePattern,
-                                       UCalendarDateFields field,
-                                       UErrorCode& status); 
-
-    /**
-     * check whether a calendar field present in a skeleton.
-     * @param field      calendar field need to check
-     * @param skeleton   given skeleton on which to check the calendar field
-     * @return           true if field present in a skeleton.
-     * @internal ICU 4.0 
-     */
-    static UBool U_EXPORT2 fieldExistsInSkeleton(UCalendarDateFields field, 
-                                                 const UnicodeString& skeleton);
-
-
-    /**
-     * Split interval patterns into 2 part.
-     * @param intervalPattern  interval pattern
-     * @return the index in interval pattern which split the pattern into 2 part
-     * @internal ICU 4.0
-     */
-    static int32_t  U_EXPORT2 splitPatternInto2Part(const UnicodeString& intervalPattern);
-
-
-    /**
-     * Break interval patterns as 2 part and save them into pattern info.
-     * @param field            calendar field
-     * @param intervalPattern  interval pattern
-     * @internal ICU 4.0
-     */
-    void setIntervalPattern(UCalendarDateFields field,
-                            const UnicodeString& intervalPattern);
-
-
-    /**
-     * Break interval patterns as 2 part and save them into pattern info.
-     * @param field            calendar field
-     * @param intervalPattern  interval pattern
-     * @param laterDateFirst   whether later date appear first in interval pattern
-     * @internal ICU 4.0
-     */
-    void setIntervalPattern(UCalendarDateFields field,
-                            const UnicodeString& intervalPattern,
-                            UBool laterDateFirst);
-
-
-    /**
-     * Set pattern information.
-     *
-     * @param field            calendar field
-     * @param firstPart        the first part in interval pattern
-     * @param secondPart       the second part in interval pattern
-     * @param laterDateFirst   whether the first date in intervalPattern
-     *                         is earlier date or later date
-     * @internal ICU 4.0
-     */
-    void setPatternInfo(UCalendarDateFields field,
-                        const UnicodeString* firstPart,
-                        const UnicodeString* secondpart,
-                        UBool laterDateFirst);
-
-
-    // from calendar field to pattern letter
-    static const UChar fgCalendarFieldToPatternLetter[];
-
-
-    /**
-     * The interval patterns for this locale.
-     */
-    DateIntervalInfo*     fInfo;
-
-    /**
-     * The DateFormat object used to format single pattern
-     */
-    SimpleDateFormat*     fDateFormat;
-
-    /**
-     * The 2 calendars with the from and to date.
-     * could re-use the calendar in fDateFormat,
-     * but keeping 2 calendars make it clear and clean.
-     */
-    Calendar* fFromCalendar;
-    Calendar* fToCalendar;
-
-    /**
-     * Following are interval information relavent (locale) to this formatter.
-     */
-    UnicodeString fSkeleton;
-    PatternInfo fIntervalPatterns[DateIntervalInfo::kIPI_MAX_INDEX];
-};
- 
-
-
- 
-
-
-inline UBool 
-DateIntervalFormat::operator!=(const Format& other) const  {
-    return !operator==(other); 
-}
- 
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _DTITVFMT_H__
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/dtitvfmt.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/dtitvfmt.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/dtitvfmt.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/dtitvfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,949 @@
+/********************************************************************************
+* Copyright (C) 2008, International Business Machines Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTITVFMT.H
+*
+*******************************************************************************
+*/
+
+#ifndef __DTITVFMT_H__
+#define __DTITVFMT_H__
+
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Format and parse date interval in a language-independent manner.
+ */
+ 
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/ucal.h"
+#include "unicode/smpdtfmt.h"
+#include "unicode/dtintrv.h"
+#include "unicode/dtitvinf.h"
+
+U_NAMESPACE_BEGIN
+
+
+
+/**
+ * DateIntervalFormat is a class for formatting and parsing date 
+ * intervals in a language-independent manner. 
+ * Date interval formatting is supported in Gregorian calendar only.
+ * And only formatting is supported. Parsing is not supported.
+ *
+ * <P>
+ * Date interval means from one date to another date,
+ * for example, from "Jan 11, 2008" to "Jan 18, 2008".
+ * We introduced class DateInterval to represent it.
+ * DateInterval is a pair of UDate, which is 
+ * the standard milliseconds since 24:00 GMT, Jan 1, 1970.
+ *
+ * <P>
+ * DateIntervalFormat formats a DateInterval into
+ * text as compactly as possible. 
+ * For example, the date interval format from "Jan 11, 2008" to "Jan 18,. 2008"
+ * is "Jan 11-18, 2008" for English.
+ * And it parses text into DateInterval, 
+ * although initially, parsing is not supported. 
+ *
+ * <P>
+ * There is no structural information in date time patterns. 
+ * For any punctuations and string literals inside a date time pattern, 
+ * we do not know whether it is just a separator, or a prefix, or a suffix. 
+ * Without such information, so, it is difficult to generate a sub-pattern 
+ * (or super-pattern) by algorithm.
+ * So, formatting a DateInterval is pattern-driven. It is very
+ * similar to formatting in SimpleDateFormat.
+ * We introduce class DateIntervalInfo to save date interval 
+ * patterns, similar to date time pattern in SimpleDateFormat.
+ *
+ * <P>
+ * Logically, the interval patterns are mappings
+ * from (skeleton, the_largest_different_calendar_field)
+ * to (date_interval_pattern).
+ *
+ * <P>
+ * A skeleton 
+ * <ol>
+ * <li>
+ * only keeps the field pattern letter and ignores all other parts 
+ * in a pattern, such as space, punctuations, and string literals.
+ * </li>
+ * <li>
+ * hides the order of fields. 
+ * </li>
+ * <li>
+ * might hide a field's pattern letter length.
+ * </li>
+ * </ol>
+ *
+ * For those non-digit calendar fields, the pattern letter length is 
+ * important, such as MMM, MMMM, and MMMMM; EEE and EEEE, 
+ * and the field's pattern letter length is honored.
+ *    
+ * For the digit calendar fields,  such as M or MM, d or dd, yy or yyyy, 
+ * the field pattern length is ignored and the best match, which is defined 
+ * in date time patterns, will be returned without honor the field pattern
+ * letter length in skeleton.
+ *
+ * <P>
+ * The calendar fields we support for interval formatting are:
+ * year, month, date, day-of-week, am-pm, hour, hour-of-day, and minute.
+ * Those calendar fields can be defined in the following order:
+ * year >  month > date > hour (in day) >  minute 
+ *  
+ * The largest different calendar fields between 2 calendars is the
+ * first different calendar field in above order.
+ *
+ * For example: the largest different calendar fields between "Jan 10, 2007" 
+ * and "Feb 20, 2008" is year.
+ *
+ * <P>
+ * For other calendar fields, the compact interval formatting is not
+ * supported. And the interval format will be fall back to fall-back
+ * patterns, which is mostly "{date0} - {date1}".
+ *   
+ * <P>
+ * There is a set of pre-defined static skeleton strings.
+ * There are pre-defined interval patterns for those pre-defined skeletons
+ * in locales' resource files.
+ * For example, for a skeleton UDAT_YEAR_ABBR_MONTH_DAY, which is  "yMMMd",
+ * in  en_US, if the largest different calendar field between date1 and date2 
+ * is "year", the date interval pattern  is "MMM d, yyyy - MMM d, yyyy", 
+ * such as "Jan 10, 2007 - Jan 10, 2008".
+ * If the largest different calendar field between date1 and date2 is "month",
+ * the date interval pattern is "MMM d - MMM d, yyyy",
+ * such as "Jan 10 - Feb 10, 2007".
+ * If the largest different calendar field between date1 and date2 is "day",
+ * the date interval pattern is ""MMM d-d, yyyy", such as "Jan 10-20, 2007".
+ *
+ * For date skeleton, the interval patterns when year, or month, or date is 
+ * different are defined in resource files.
+ * For time skeleton, the interval patterns when am/pm, or hour, or minute is
+ * different are defined in resource files.
+ *
+ * <P>
+ * If a skeleton is not found in a locale's DateIntervalInfo, which means
+ * the interval patterns for the skeleton is not defined in resource file,
+ * the interval pattern will falls back to the interval "fallback" pattern 
+ * defined in resource file.
+ * If the interval "fallback" pattern is not defined, the default fall-back
+ * is "{date0} - {data1}".
+ *
+ * <P>
+ * For the combination of date and time, 
+ * The rule to generate interval patterns are:
+ * <ol>
+ * <li>
+ *    when the year, month, or day differs, falls back to fall-back
+ *    interval pattern, which mostly is the concatenate the two original 
+ *    expressions with a separator between, 
+ *    For example, interval pattern from "Jan 10, 2007 10:10 am" 
+ *    to "Jan 11, 2007 10:10am" is 
+ *    "Jan 10, 2007 10:10 am - Jan 11, 2007 10:10am" 
+ * </li>
+ * <li>
+ *    otherwise, present the date followed by the range expression 
+ *    for the time.
+ *    For example, interval pattern from "Jan 10, 2007 10:10 am" 
+ *    to "Jan 10, 2007 11:10am" is "Jan 10, 2007 10:10 am - 11:10am" 
+ * </li>
+ * </ol>
+ *
+ *
+ * <P>
+ * If two dates are the same, the interval pattern is the single date pattern.
+ * For example, interval pattern from "Jan 10, 2007" to "Jan 10, 2007" is 
+ * "Jan 10, 2007".
+ *
+ * Or if the presenting fields between 2 dates have the exact same values,
+ * the interval pattern is the  single date pattern. 
+ * For example, if user only requests year and month,
+ * the interval pattern from "Jan 10, 2007" to "Jan 20, 2007" is "Jan 2007".
+ *
+ * <P>
+ * DateIntervalFormat needs the following information for correct 
+ * formatting: time zone, calendar type, pattern, date format symbols, 
+ * and date interval patterns.
+ * It can be instantiated in 2 ways:
+ * <ol>
+ * <li>
+ *    create an instance using default or given locale plus given skeleton.
+ *    Users are encouraged to created date interval formatter this way and 
+ *    to use the pre-defined skeleton macros, such as
+ *    UDAT_YEAR_NUM_MONTH, which consists the calendar fields and
+ *    the format style. 
+ * </li>
+ * <li>
+ *    create an instance using default or given locale plus given skeleton
+ *    plus a given DateIntervalInfo.
+ *    This factory method is for powerful users who want to provide their own 
+ *    interval patterns. 
+ *    Locale provides the timezone, calendar, and format symbols information.
+ *    Local plus skeleton provides full pattern information.
+ *    DateIntervalInfo provides the date interval patterns.
+ * </li>
+ * </ol>
+ *
+ * <P>
+ * For the calendar field pattern letter, such as G, y, M, d, a, h, H, m, s etc.
+ * DateIntervalFormat uses the same syntax as that of
+ * DateTime format.
+ * 
+ * <P>
+ * Code Sample: general usage
+ * <pre>
+ * \code
+ *   // the date interval object which the DateIntervalFormat formats on
+ *   // and parses into
+ *   DateInterval*  dtInterval = new DateInterval(1000*3600*24, 1000*3600*24*2);
+ *   UErrorCode status = U_ZERO_ERROR;
+ *   DateIntervalFormat* dtIntervalFmt = DateIntervalFormat::createInstance(
+ *                           UDAT_YEAR_MONTH_DAY, 
+ *                           Locale("en", "GB", ""), status);
+ *   UnicodeUnicodeString dateIntervalString;
+ *   FieldPosition pos = 0;
+ *   // formatting
+ *   dtIntervalFmt->format(dtInterval, dateIntervalUnicodeString, pos, status);
+ *   delete dtIntervalFmt;
+ * \endcode
+ * </pre>
+ */
+
+class U_I18N_API DateIntervalFormat : public Format {
+public:
+
+    /**
+     * Construct a DateIntervalFormat from skeleton and  the default locale.
+     *
+     * This is a convenient override of 
+     * createInstance(const UnicodeString& skeleton, const Locale& locale,
+     *                UErrorCode&)  
+     * with the value of locale as default locale.
+     *
+     * @param skeleton  the skeleton on which interval format based.
+     * @param status    output param set to success/failure code on exit
+     * @return          a date time interval formatter which the caller owns.
+     * @draft ICU 4.0
+     */
+    static DateIntervalFormat* U_EXPORT2 createInstance(
+                                               const UnicodeString& skeleton,
+                                               UErrorCode& status);
+
+    /**
+     * Construct a DateIntervalFormat from skeleton and a given locale.
+     * <P>
+     * In this factory method, 
+     * the date interval pattern information is load from resource files.
+     * Users are encouraged to created date interval formatter this way and 
+     * to use the pre-defined skeleton macros.
+     *
+     * <P>
+     * There are pre-defined skeletons (defined in udate.h) having predefined 
+     * interval patterns in resource files.
+     * Users are encouraged to use those macros.
+     * For example: 
+     * DateIntervalFormat::createInstance(UDAT_MONTH_DAY, status) 
+     *
+     * The given Locale provides the interval patterns.
+     * For example, for en_GB, if skeleton is UDAT_YEAR_ABBR_MONTH_WEEKDAY_DAY,
+     * which is "yMMMEEEd",
+     * the interval patterns defined in resource file to above skeleton are:
+     * "EEE, d MMM, yyyy - EEE, d MMM, yyyy" for year differs,
+     * "EEE, d MMM - EEE, d MMM, yyyy" for month differs,
+     * "EEE, d - EEE, d MMM, yyyy" for day differs,
+     * @param skeleton  the skeleton on which interval format based.
+     * @param locale    the given locale
+     * @param status    output param set to success/failure code on exit
+     * @return          a date time interval formatter which the caller owns.
+     * @draft ICU 4.0
+     */
+
+    static DateIntervalFormat* U_EXPORT2 createInstance(
+                                               const UnicodeString& skeleton,
+                                               const Locale& locale,
+                                               UErrorCode& status);
+
+    /**
+     * Construct a DateIntervalFormat from skeleton
+     *  DateIntervalInfo, and default locale.
+     *
+     * This is a convenient override of
+     * createInstance(const UnicodeString& skeleton, const Locale& locale, 
+     *                const DateIntervalInfo& dtitvinf, UErrorCode&)
+     * with the locale value as default locale.
+     *
+     * @param skeleton  the skeleton on which interval format based.
+     * @param dtitvinf  the DateIntervalInfo object. 
+     * @param status    output param set to success/failure code on exit
+     * @return          a date time interval formatter which the caller owns.
+     * @draft ICU 4.0
+     */
+    static DateIntervalFormat* U_EXPORT2 createInstance(
+                                              const UnicodeString& skeleton,
+                                              const DateIntervalInfo& dtitvinf,
+                                              UErrorCode& status);
+
+    /**
+     * Construct a DateIntervalFormat from skeleton
+     * a DateIntervalInfo, and the given locale.
+     *
+     * <P>
+     * In this factory method, user provides its own date interval pattern
+     * information, instead of using those pre-defined data in resource file. 
+     * This factory method is for powerful users who want to provide their own 
+     * interval patterns. 
+     * <P>
+     * There are pre-defined skeletons (defined in udate.h) having predefined 
+     * interval patterns in resource files.
+     * Users are encouraged to use those macros.
+     * For example: 
+     * DateIntervalFormat::createInstance(UDAT_MONTH_DAY, status) 
+     *
+     * The DateIntervalInfo provides the interval patterns.
+     * and the DateIntervalInfo ownership remains to the caller. 
+     *
+     * User are encouraged to set default interval pattern in DateIntervalInfo
+     * as well, if they want to set other interval patterns ( instead of
+     * reading the interval patterns from resource files).
+     * When the corresponding interval pattern for a largest calendar different
+     * field is not found ( if user not set it ), interval format fallback to
+     * the default interval pattern.
+     * If user does not provide default interval pattern, it fallback to
+     * "{date0} - {date1}" 
+     *
+     * @param skeleton  the skeleton on which interval format based.
+     * @param locale    the given locale
+     * @param dtitvinf  the DateIntervalInfo object.
+     * @param status    output param set to success/failure code on exit
+     * @return          a date time interval formatter which the caller owns.
+     * @draft ICU 4.0
+     */
+    static DateIntervalFormat* U_EXPORT2 createInstance(
+                                              const UnicodeString& skeleton,
+                                              const Locale& locale,
+                                              const DateIntervalInfo& dtitvinf,
+                                              UErrorCode& status);
+
+    /**
+     * Destructor.
+     * @draft ICU 4.0
+     */
+    virtual ~DateIntervalFormat();
+
+    /**
+     * Clone this Format object polymorphically. The caller owns the result and
+     * should delete it when done.
+     * @return    A copy of the object.
+     * @draft ICU 4.0
+     */
+    virtual Format* clone(void) const;
+
+    /**
+     * Return true if the given Format objects are semantically equal. Objects
+     * of different subclasses are considered unequal.
+     * @param other    the object to be compared with.
+     * @return         true if the given Format objects are semantically equal.
+     * @draft ICU 4.0
+     */
+    virtual UBool operator==(const Format& other) const;
+
+    /**
+     * Return true if the given Format objects are not semantically equal. 
+     * Objects of different subclasses are considered unequal.
+     * @param other the object to be compared with.
+     * @return      true if the given Format objects are not semantically equal.
+     * @draft ICU 4.0
+     */
+    UBool operator!=(const Format& other) const;
+
+    /**
+     * Format an object to produce a string. This method handles Formattable
+     * objects with a DateInterval type. 
+     * If a the Formattable object type is not a DateInterval,
+     * then it returns a failing UErrorCode.
+     *
+     * @param obj               The object to format. 
+     *                          Must be a DateInterval.
+     * @param appendTo          Output parameter to receive result.
+     *                          Result is appended to existing contents.
+     * @param fieldPosition     On input: an alignment field, if desired.
+     *                          On output: the offsets of the alignment field.
+     * @param status            Output param filled with success/failure status.
+     * @return                  Reference to 'appendTo' parameter.
+     * @draft ICU 4.0
+     */
+    virtual UnicodeString& format(const Formattable& obj,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& fieldPosition,
+                                  UErrorCode& status) const ;
+                                    
+                                    
+
+    /**
+     * Format a DateInterval to produce a string. 
+     *
+     * @param dtInterval        DateInterval to be formatted.
+     * @param appendTo          Output parameter to receive result.
+     *                          Result is appended to existing contents.
+     * @param fieldPosition     On input: an alignment field, if desired.
+     *                          On output: the offsets of the alignment field.
+     * @param status            Output param filled with success/failure status.
+     * @return                  Reference to 'appendTo' parameter.
+     * @draft ICU 4.0
+     */
+    UnicodeString& format(const DateInterval* dtInterval,
+                          UnicodeString& appendTo,
+                          FieldPosition& fieldPosition,
+                          UErrorCode& status) const ;
+                                    
+                                    
+    /**
+     * Format 2 Calendars to produce a string. 
+     *
+     * Note: "fromCalendar" and "toCalendar" are not const,
+     * since calendar is not const in  SimpleDateFormat::format(Calendar&),
+     *
+     * @param fromCalendar      calendar set to the from date in date interval
+     *                          to be formatted into date interval string
+     * @param toCalendar        calendar set to the to date in date interval
+     *                          to be formatted into date interval string
+     * @param appendTo          Output parameter to receive result.
+     *                          Result is appended to existing contents.
+     * @param fieldPosition     On input: an alignment field, if desired.
+     *                          On output: the offsets of the alignment field.
+     * @param status            Output param filled with success/failure status.
+     *                          Caller needs to make sure it is SUCCESS
+     *                          at the function entrance
+     * @return                  Reference to 'appendTo' parameter.
+     * @draft ICU 4.0
+     */
+    UnicodeString& format(Calendar& fromCalendar,
+                          Calendar& toCalendar,
+                          UnicodeString& appendTo,
+                          FieldPosition& fieldPosition,
+                          UErrorCode& status) const ;
+
+    /**
+     * Date interval parsing is not supported. Please do not use.
+     * <P>
+     * This method should handle parsing of
+     * date time interval strings into Formattable objects with 
+     * DateInterval type, which is a pair of UDate.
+     * <P>
+     * Before calling, set parse_pos.index to the offset you want to start
+     * parsing at in the source. After calling, parse_pos.index is the end of
+     * the text you parsed. If error occurs, index is unchanged.
+     * <P>
+     * When parsing, leading whitespace is discarded (with a successful parse),
+     * while trailing whitespace is left as is.
+     * <P>
+     * See Format::parseObject() for more.
+     *
+     * @param source    The string to be parsed into an object.
+     * @param result    Formattable to be set to the parse result.
+     *                  If parse fails, return contents are undefined.
+     * @param parse_pos The position to start parsing at. Since no parsing
+     *                  is supported, upon return this param is unchanged.
+     * @return          A newly created Formattable* object, or NULL
+     *                  on failure.  The caller owns this and should
+     *                  delete it when done.
+     * @internal ICU 4.0
+     */
+    virtual void parseObject(const UnicodeString& source,
+                             Formattable& result,
+                             ParsePosition& parse_pos) const;
+
+
+    /**
+     * Gets the date time interval patterns.
+     * @return the date time interval patterns associated with
+     * this date interval formatter.
+     * @draft ICU 4.0
+     */
+    const DateIntervalInfo* getDateIntervalInfo(void) const;
+
+
+    /**
+     * Set the date time interval patterns. 
+     * @param newIntervalPatterns   the given interval patterns to copy.
+     * @param status          output param set to success/failure code on exit
+     * @draft ICU 4.0
+     */
+    void setDateIntervalInfo(const DateIntervalInfo& newIntervalPatterns,
+                             UErrorCode& status);
+
+
+    /**
+     * Gets the date formatter
+     * @return the date formatter associated with this date interval formatter.
+     * @draft ICU 4.0
+     */
+    const DateFormat* getDateFormat(void) const;
+
+    /**
+     * Return the class ID for this class. This is useful only for comparing to
+     * a return value from getDynamicClassID(). For example:
+     * <pre>
+     * .   Base* polymorphic_pointer = createPolymorphicObject();
+     * .   if (polymorphic_pointer->getDynamicClassID() ==
+     * .       erived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @draft ICU 4.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+     * method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @draft ICU 4.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+protected:
+
+    /**
+     * Copy constructor.
+     * @draft ICU 4.0
+     */
+    DateIntervalFormat(const DateIntervalFormat&);
+
+    /**
+     * Assignment operator.
+     * @draft ICU 4.0
+     */
+    DateIntervalFormat& operator=(const DateIntervalFormat&);
+
+private:
+
+    /*
+     * This is for ICU internal use only. Please do not use.
+     * Save the interval pattern information.
+     * Interval pattern consists of 2 single date patterns and the separator.
+     * For example, interval pattern "MMM d - MMM d, yyyy" consists
+     * a single date pattern "MMM d", another single date pattern "MMM d, yyyy",
+     * and a separator "-".
+     * The pattern is divided into 2 parts. For above example,
+     * the first part is "MMM d - ", and the second part is "MMM d, yyyy".
+     * Also, the first date appears in an interval pattern could be 
+     * the earlier date or the later date.
+     * And such information is saved in the interval pattern as well.
+     * @internal ICU 4.0
+     */
+    struct PatternInfo {
+        UnicodeString firstPart;
+        UnicodeString secondPart;
+        /**
+         * Whether the first date in interval pattern is later date or not.
+         * Fallback format set the default ordering.
+         * And for a particular interval pattern, the order can be 
+         * overriden by prefixing the interval pattern with "latestFirst:" or 
+         * "earliestFirst:"
+         * For example, given 2 date, Jan 10, 2007 to Feb 10, 2007.
+         * if the fallback format is "{0} - {1}", 
+         * and the pattern is "d MMM - d MMM yyyy", the interval format is
+         * "10 Jan - 10 Feb, 2007".
+         * If the pattern is "latestFirst:d MMM - d MMM yyyy", 
+         * the interval format is "10 Feb - 10 Jan, 2007"
+         */
+        UBool         laterDateFirst;
+    };
+
+   
+    /**
+     * default constructor 
+     * @internal ICU 4.0
+     */
+    DateIntervalFormat();
+
+    /**
+     * Construct a DateIntervalFormat from DateFormat,
+     * a DateIntervalInfo, and skeleton.
+     * DateFormat provides the timezone, calendar,
+     * full pattern, and date format symbols information.
+     * It should be a SimpleDateFormat object which 
+     * has a pattern in it.
+     * the DateIntervalInfo provides the interval patterns.
+     *
+     * Note: the DateIntervalFormat takes ownership of both 
+     * DateFormat and DateIntervalInfo objects. 
+     * Caller should not delete them.
+     *
+     * @param dtfmt     the SimpleDateFormat object to be adopted.
+     * @param dtitvinf  the DateIntervalInfo object to be adopted.
+     * @param skeleton  the skeleton of the date formatter
+     * @param status    output param set to success/failure code on exit
+     * @internal ICU 4.0
+     */
+    DateIntervalFormat(DateFormat* dtfmt, DateIntervalInfo* dtItvInfo,
+                       const UnicodeString* skeleton, UErrorCode& status);
+
+    
+    /**
+     * Construct a DateIntervalFormat from DateFormat
+     * and a DateIntervalInfo.
+     *
+     * It is a wrapper of the constructor.
+     *
+     * @param dtfmt     the DateFormat object to be adopted.
+     * @param dtitvinf  the DateIntervalInfo object to be adopted.
+     * @param skeleton  the skeleton of this formatter.
+     * @param status    Output param set to success/failure code.
+     * @return          a date time interval formatter which the caller owns.
+     * @internal ICU 4.0
+     */
+    static DateIntervalFormat* U_EXPORT2 create(DateFormat* dtfmt,
+                                                DateIntervalInfo* dtitvinf,
+                                                const UnicodeString* skeleton,
+                                                UErrorCode& status);
+
+
+    /**
+     *  Below are for generating interval patterns locale to the formatter 
+     */
+
+
+    /**
+     * Format 2 Calendars using fall-back interval pattern
+     *
+     * The full pattern used in this fall-back format is the
+     * full pattern of the date formatter.
+     *
+     * @param fromCalendar      calendar set to the from date in date interval
+     *                          to be formatted into date interval string
+     * @param toCalendar        calendar set to the to date in date interval
+     *                          to be formatted into date interval string
+     * @param appendTo          Output parameter to receive result.
+     *                          Result is appended to existing contents.
+     * @param pos               On input: an alignment field, if desired.
+     *                          On output: the offsets of the alignment field.
+     * @param status            output param set to success/failure code on exit
+     * @return                  Reference to 'appendTo' parameter.
+     * @internal ICU 4.0
+     */
+    UnicodeString& fallbackFormat(Calendar& fromCalendar,
+                                  Calendar& toCalendar,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos,
+                                  UErrorCode& status) const;
+
+
+
+    /** 
+     * Initialize interval patterns locale to this formatter
+     * 
+     * This code is a bit complicated since 
+     * 1. the interval patterns saved in resource bundle files are interval
+     *    patterns based on date or time only.
+     *    It does not have interval patterns based on both date and time.
+     *    Interval patterns on both date and time are algorithm generated.
+     *
+     *    For example, it has interval patterns on skeleton "dMy" and "hm",
+     *    but it does not have interval patterns on skeleton "dMyhm".
+     *    
+     *    The rule to generate interval patterns for both date and time skeleton are
+     *    1) when the year, month, or day differs, concatenate the two original 
+     *    expressions with a separator between, 
+     *    For example, interval pattern from "Jan 10, 2007 10:10 am" 
+     *    to "Jan 11, 2007 10:10am" is 
+     *    "Jan 10, 2007 10:10 am - Jan 11, 2007 10:10am" 
+     *
+     *    2) otherwise, present the date followed by the range expression 
+     *    for the time.
+     *    For example, interval pattern from "Jan 10, 2007 10:10 am" 
+     *    to "Jan 10, 2007 11:10am" is 
+     *    "Jan 10, 2007 10:10 am - 11:10am" 
+     *
+     * 2. even a pattern does not request a certain calendar field,
+     *    the interval pattern needs to include such field if such fields are
+     *    different between 2 dates.
+     *    For example, a pattern/skeleton is "hm", but the interval pattern 
+     *    includes year, month, and date when year, month, and date differs.
+     * 
+     *
+     * @param status    output param set to success/failure code on exit
+     * @internal ICU 4.0 
+     */
+    void initializePattern(UErrorCode& status); 
+                              
+
+
+    /**
+     * Set fall back interval pattern given a calendar field,
+     * a skeleton, and a date time pattern generator.
+     * @param field      the largest different calendar field
+     * @param skeleton   a skeleton
+     * @param dtpng      date time pattern generator
+     * @param status     output param set to success/failure code on exit
+     * @internal ICU 4.0 
+     */
+    void setFallbackPattern(UCalendarDateFields field, 
+                            const UnicodeString& skeleton,
+                            DateTimePatternGenerator* dtpng,
+                            UErrorCode& status);
+                            
+
+
+    /** 
+     * get separated date and time skeleton from a combined skeleton.
+     *
+     * The difference between date skeleton and normalizedDateSkeleton are:
+     * 1. both 'y' and 'd' are appeared only once in normalizeDateSkeleton
+     * 2. 'E' and 'EE' are normalized into 'EEE'
+     * 3. 'MM' is normalized into 'M'
+     *
+     ** the difference between time skeleton and normalizedTimeSkeleton are:
+     * 1. both 'H' and 'h' are normalized as 'h' in normalized time skeleton,
+     * 2. 'a' is omitted in normalized time skeleton.
+     * 3. there is only one appearance for 'h', 'm','v', 'z' in normalized time
+     *    skeleton
+     *
+     *
+     *  @param skeleton               given combined skeleton.
+     *  @param date                   Output parameter for date only skeleton.
+     *  @param normalizedDate         Output parameter for normalized date only
+     *
+     *  @param time                   Output parameter for time only skeleton.
+     *  @param normalizedTime         Output parameter for normalized time only
+     *                                skeleton.
+     *
+     * @internal ICU 4.0 
+     */
+    static void  U_EXPORT2 getDateTimeSkeleton(const UnicodeString& skeleton,
+                                    UnicodeString& date,
+                                    UnicodeString& normalizedDate,
+                                    UnicodeString& time,
+                                    UnicodeString& normalizedTime);
+
+
+
+    /**
+     * Generate date or time interval pattern from resource,
+     * and set them into the interval pattern locale to this formatter.
+     *
+     * It needs to handle the following: 
+     * 1. need to adjust field width.
+     *    For example, the interval patterns saved in DateIntervalInfo
+     *    includes "dMMMy", but not "dMMMMy".
+     *    Need to get interval patterns for dMMMMy from dMMMy.
+     *    Another example, the interval patterns saved in DateIntervalInfo
+     *    includes "hmv", but not "hmz".
+     *    Need to get interval patterns for "hmz' from 'hmv'
+     *
+     * 2. there might be no pattern for 'y' differ for skeleton "Md",
+     *    in order to get interval patterns for 'y' differ,
+     *    need to look for it from skeleton 'yMd'
+     *
+     * @param dateSkeleton   normalized date skeleton
+     * @param timeSkeleton   normalized time skeleton
+     * @return               whether the resource is found for the skeleton.
+     *                       TRUE if interval pattern found for the skeleton,
+     *                       FALSE otherwise.
+     * @internal ICU 4.0
+     */
+    UBool setSeparateDateTimePtn(const UnicodeString& dateSkeleton, 
+                                 const UnicodeString& timeSkeleton);
+                                   
+
+
+
+    /**
+     * Generate interval pattern from existing resource
+     *
+     * It not only save the interval patterns,
+     * but also return the extended skeleton and its best match skeleton.
+     *
+     * @param field           largest different calendar field
+     * @param skeleton        skeleton
+     * @param bestSkeleton    the best match skeleton which has interval pattern
+     *                        defined in resource
+     * @param differenceInfo  the difference between skeleton and best skeleton
+     *         0 means the best matched skeleton is the same as input skeleton
+     *         1 means the fields are the same, but field width are different
+     *         2 means the only difference between fields are v/z,
+     *        -1 means there are other fields difference 
+     *
+     * @param extendedSkeleton      extended skeleton
+     * @param extendedBestSkeleton  extended best match skeleton
+     * @return                      whether the interval pattern is found 
+     *                              through extending skeleton or not.
+     *                              TRUE if interval pattern is found by
+     *                              extending skeleton, FALSE otherwise.
+     * @internal ICU 4.0
+     */
+    UBool setIntervalPattern(UCalendarDateFields field, 
+                             const UnicodeString* skeleton, 
+                             const UnicodeString* bestSkeleton, 
+                             int8_t differenceInfo, 
+                             UnicodeString* extendedSkeleton = NULL,
+                             UnicodeString* extendedBestSkeleton = NULL);
+
+    /**
+     * Adjust field width in best match interval pattern to match
+     * the field width in input skeleton.
+     *
+     * TODO (xji) make a general solution
+     * The adjusting rule can be:
+     * 1. always adjust
+     * 2. never adjust
+     * 3. default adjust, which means adjust according to the following rules
+     * 3.1 always adjust string, such as MMM and MMMM
+     * 3.2 never adjust between string and numeric, such as MM and MMM
+     * 3.3 always adjust year
+     * 3.4 do not adjust 'd', 'h', or 'm' if h presents
+     * 3.5 do not adjust 'M' if it is numeric(?)
+     *
+     * Since date interval format is well-formed format,
+     * date and time skeletons are normalized previously,
+     * till this stage, the adjust here is only "adjust strings, such as MMM
+     * and MMMM, EEE and EEEE.
+     *
+     * @param inputSkeleton            the input skeleton
+     * @param bestMatchSkeleton        the best match skeleton
+     * @param bestMatchIntervalpattern the best match interval pattern
+     * @param differenceInfo           the difference between 2 skeletons
+     *                                 1 means only field width differs
+     *                                 2 means v/z exchange
+     * @param adjustedIntervalPattern  adjusted interval pattern
+     * @internal ICU 4.0
+     */
+    static void U_EXPORT2 adjustFieldWidth(
+                            const UnicodeString& inputSkeleton,
+                            const UnicodeString& bestMatchSkeleton,
+                            const UnicodeString& bestMatchIntervalPattern,
+                            int8_t differenceInfo,
+                            UnicodeString& adjustedIntervalPattern);
+
+    /**
+     * Concat a single date pattern with a time interval pattern,
+     * set it into the intervalPatterns, while field is time field.
+     * This is used to handle time interval patterns on skeleton with
+     * both time and date. Present the date followed by 
+     * the range expression for the time.
+     * @param format         date and time format
+     * @param formatLen      format string length
+     * @param datePattern    date pattern
+     * @param field          time calendar field: AM_PM, HOUR, MINUTE
+     * @param status         output param set to success/failure code on exit
+     * @internal ICU 4.0 
+     */
+    void concatSingleDate2TimeInterval(const UChar* format,
+                                       int32_t formatLen,
+                                       const UnicodeString& datePattern,
+                                       UCalendarDateFields field,
+                                       UErrorCode& status); 
+
+    /**
+     * check whether a calendar field present in a skeleton.
+     * @param field      calendar field need to check
+     * @param skeleton   given skeleton on which to check the calendar field
+     * @return           true if field present in a skeleton.
+     * @internal ICU 4.0 
+     */
+    static UBool U_EXPORT2 fieldExistsInSkeleton(UCalendarDateFields field, 
+                                                 const UnicodeString& skeleton);
+
+
+    /**
+     * Split interval patterns into 2 part.
+     * @param intervalPattern  interval pattern
+     * @return the index in interval pattern which split the pattern into 2 part
+     * @internal ICU 4.0
+     */
+    static int32_t  U_EXPORT2 splitPatternInto2Part(const UnicodeString& intervalPattern);
+
+
+    /**
+     * Break interval patterns as 2 part and save them into pattern info.
+     * @param field            calendar field
+     * @param intervalPattern  interval pattern
+     * @internal ICU 4.0
+     */
+    void setIntervalPattern(UCalendarDateFields field,
+                            const UnicodeString& intervalPattern);
+
+
+    /**
+     * Break interval patterns as 2 part and save them into pattern info.
+     * @param field            calendar field
+     * @param intervalPattern  interval pattern
+     * @param laterDateFirst   whether later date appear first in interval pattern
+     * @internal ICU 4.0
+     */
+    void setIntervalPattern(UCalendarDateFields field,
+                            const UnicodeString& intervalPattern,
+                            UBool laterDateFirst);
+
+
+    /**
+     * Set pattern information.
+     *
+     * @param field            calendar field
+     * @param firstPart        the first part in interval pattern
+     * @param secondPart       the second part in interval pattern
+     * @param laterDateFirst   whether the first date in intervalPattern
+     *                         is earlier date or later date
+     * @internal ICU 4.0
+     */
+    void setPatternInfo(UCalendarDateFields field,
+                        const UnicodeString* firstPart,
+                        const UnicodeString* secondpart,
+                        UBool laterDateFirst);
+
+
+    // from calendar field to pattern letter
+    static const UChar fgCalendarFieldToPatternLetter[];
+
+
+    /**
+     * The interval patterns for this locale.
+     */
+    DateIntervalInfo*     fInfo;
+
+    /**
+     * The DateFormat object used to format single pattern
+     */
+    SimpleDateFormat*     fDateFormat;
+
+    /**
+     * The 2 calendars with the from and to date.
+     * could re-use the calendar in fDateFormat,
+     * but keeping 2 calendars make it clear and clean.
+     */
+    Calendar* fFromCalendar;
+    Calendar* fToCalendar;
+
+    /**
+     * Following are interval information relavent (locale) to this formatter.
+     */
+    UnicodeString fSkeleton;
+    PatternInfo fIntervalPatterns[DateIntervalInfo::kIPI_MAX_INDEX];
+};
+ 
+
+
+ 
+
+
+inline UBool 
+DateIntervalFormat::operator!=(const Format& other) const  {
+    return !operator==(other); 
+}
+ 
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _DTITVFMT_H__
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/dtitvinf.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/dtitvinf.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/dtitvinf.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,528 +0,0 @@
-/*
- *******************************************************************************
- * Copyright (C) 2008, International Business Machines Corporation and
- * others. All Rights Reserved.
- *******************************************************************************
- *
- * File DTITVINF.H
- *
- *******************************************************************************
- */
-
-#ifndef __DTITVINF_H__
-#define __DTITVINF_H__
-
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief C++ API: Date/Time interval patterns for formatting date/time interval
- */
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/udat.h"
-#include "unicode/locid.h"
-#include "unicode/ucal.h"
-#include "unicode/dtptngen.h"
-//#include "dtitv_impl.h"
-
-/**
- * @internal ICU 4.0
- */
- 
-union UHashTok;
-
-
-U_NAMESPACE_BEGIN
-
-U_CDECL_BEGIN 
-
-/**
- * @internal ICU 4.0
- */
-static UBool U_CALLCONV hashTableValueComparator(UHashTok val1, UHashTok val2) ;
-
-U_CDECL_END 
-
-
-/**
- * DateIntervalInfo is a public class for encapsulating localizable
- * date time interval patterns. It is used by DateIntervalFormat.
- *
- * <P>
- * For most users, ordinary use of DateIntervalFormat does not need to create
- * DateIntervalInfo object directly.
- * DateIntervalFormat will take care of it when creating a date interval
- * formatter when user pass in skeleton and locale.
- *
- * <P>
- * For power users, who want to create their own date interval patterns,
- * or want to re-set date interval patterns, they could do so by
- * directly creating DateIntervalInfo and manupulating it.
- *
- * <P>
- * Logically, the interval patterns are mappings
- * from (skeleton, the_largest_different_calendar_field)
- * to (date_interval_pattern).
- *
- * <P>
- * A skeleton 
- * <ol>
- * <li>
- * only keeps the field pattern letter and ignores all other parts 
- * in a pattern, such as space, punctuations, and string literals.
- * <li>
- * hides the order of fields. 
- * <li>
- * might hide a field's pattern letter length.
- *
- * For those non-digit calendar fields, the pattern letter length is 
- * important, such as MMM, MMMM, and MMMMM; EEE and EEEE, 
- * and the field's pattern letter length is honored.
- *    
- * For the digit calendar fields,  such as M or MM, d or dd, yy or yyyy, 
- * the field pattern length is ignored and the best match, which is defined 
- * in date time patterns, will be returned without honor the field pattern
- * letter length in skeleton.
- * </ol>
- *
- * <P>
- * The calendar fields we support for interval formatting are:
- * year, month, date, day-of-week, am-pm, hour, hour-of-day, and minute.
- * Those calendar fields can be defined in the following order:
- * year >  month > date > am-pm > hour >  minute 
- *  
- * The largest different calendar fields between 2 calendars is the
- * first different calendar field in above order.
- *
- * For example: the largest different calendar fields between "Jan 10, 2007" 
- * and "Feb 20, 2008" is year.
- *   
- * <P>
- * There is a set of pre-defined static skeleton strings.
- * There are pre-defined interval patterns for those pre-defined skeletons
- * in locales' resource files.
- * For example, for a skeleton UDAT_YEAR_ABBR_MONTH_DAY, which is  "yMMMd",
- * in  en_US, if the largest different calendar field between date1 and date2 
- * is "year", the date interval pattern  is "MMM d, yyyy - MMM d, yyyy", 
- * such as "Jan 10, 2007 - Jan 10, 2008".
- * If the largest different calendar field between date1 and date2 is "month",
- * the date interval pattern is "MMM d - MMM d, yyyy",
- * such as "Jan 10 - Feb 10, 2007".
- * If the largest different calendar field between date1 and date2 is "day",
- * the date interval pattern is ""MMM d-d, yyyy", such as "Jan 10-20, 2007".
- *
- * For date skeleton, the interval patterns when year, or month, or date is 
- * different are defined in resource files.
- * For time skeleton, the interval patterns when am/pm, or hour, or minute is
- * different are defined in resource files.
- *
- *
- * <P>
- * There are 2 dates in interval pattern. For most locales, the first date
- * in an interval pattern is the earlier date. There might be a locale in which
- * the first date in an interval pattern is the later date.
- * We use fallback format for the default order for the locale.
- * For example, if the fallback format is "{0} - {1}", it means
- * the first date in the interval pattern for this locale is earlier date.
- * If the fallback format is "{1} - {0}", it means the first date is the 
- * later date.
- * For a particular interval pattern, the default order can be overriden
- * by prefixing "latestFirst:" or "earliestFirst:" to the interval pattern.
- * For example, if the fallback format is "{0}-{1}",
- * but for skeleton "yMMMd", the interval pattern when day is different is 
- * "latestFirst:d-d MMM yy", it means by default, the first date in interval
- * pattern is the earlier date. But for skeleton "yMMMd", when day is different,
- * the first date in "d-d MMM yy" is the later date.
- * 
- * <P>
- * The recommended way to create a DateIntervalFormat object is to pass in 
- * the locale. 
- * By using a Locale parameter, the DateIntervalFormat object is 
- * initialized with the pre-defined interval patterns for a given or 
- * default locale.
- * <P>
- * Users can also create DateIntervalFormat object 
- * by supplying their own interval patterns.
- * It provides flexibility for power users.
- *
- * <P>
- * After a DateIntervalInfo object is created, clients may modify
- * the interval patterns using setIntervalPattern function as so desired.
- * Currently, users can only set interval patterns when the following 
- * calendar fields are different: ERA, YEAR, MONTH, DATE,  DAY_OF_MONTH, 
- * DAY_OF_WEEK, AM_PM,  HOUR, HOUR_OF_DAY, and MINUTE.
- * Interval patterns when other calendar fields are different is not supported.
- * <P>
- * DateIntervalInfo objects are cloneable. 
- * When clients obtain a DateIntervalInfo object, 
- * they can feel free to modify it as necessary.
- * <P>
- * DateIntervalInfo are not expected to be subclassed. 
- * Data for a calendar is loaded out of resource bundles. 
- * To ICU 4.0, date interval patterns are only supported in Gregorian calendar. 
- * @draft ICU 4.0
-**/
-
-class U_I18N_API DateIntervalInfo : public UObject {
-public:
-    /**
-     * Default constructor.
-     * It does not initialize any interval patterns except
-     * that it initialize default fall-back pattern as "{0} - {1}",
-     * which can be reset by setFallbackIntervalPattern().
-     * It should be followed by setFallbackIntervalPattern() and 
-     * setIntervalPattern(), 
-     * and is recommended to be used only for power users who
-     * wants to create their own interval patterns and use them to create
-     * date interval formatter.
-     * @param status   output param set to success/failure code on exit
-     * @internal ICU 4.0
-     */
-    DateIntervalInfo(UErrorCode& status);
-
-
-    /** 
-     * Construct DateIntervalInfo for the given locale,
-     * @param locale  the interval patterns are loaded from the Gregorian 
-     *                calendar data in this locale.
-     * @param status  output param set to success/failure code on exit
-     * @draft ICU 4.0
-     */
-    DateIntervalInfo(const Locale& locale, UErrorCode& status);
-
-
-    /**
-     * Copy constructor.
-     * @draft ICU 4.0
-     */
-    DateIntervalInfo(const DateIntervalInfo&);
-
-    /**
-     * Assignment operator
-     * @draft ICU 4.0
-     */
-    DateIntervalInfo& operator=(const DateIntervalInfo&);
-
-    /**
-     * Clone this object polymorphically.
-     * The caller owns the result and should delete it when done.
-     * @return   a copy of the object
-     * @draft    ICU4.0
-     */
-    virtual DateIntervalInfo* clone(void) const;
-
-    /**
-     * Destructor.
-     * It is virtual to be safe, but it is not designed to be subclassed.
-     * @draft ICU 4.0
-     */
-    virtual ~DateIntervalInfo();
-
-
-    /**
-     * Return true if another object is semantically equal to this one.
-     *
-     * @param other    the DateIntervalInfo object to be compared with.
-     * @return         true if other is semantically equal to this.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator==(const DateIntervalInfo& other) const;
-
-    /**
-     * Return true if another object is semantically unequal to this one.
-     *
-     * @param other    the DateIntervalInfo object to be compared with.
-     * @return         true if other is semantically unequal to this.
-     * @stable ICU 4.0
-     */
-    UBool operator!=(const DateIntervalInfo& other) const;
-
-
-
-    /** 
-     * Provides a way for client to build interval patterns.
-     * User could construct DateIntervalInfo by providing a list of skeletons
-     * and their patterns.
-     * <P>
-     * For example:
-     * <pre>
-     * UErrorCode status = U_ZERO_ERROR;
-     * DateIntervalInfo dIntervalInfo = new DateIntervalInfo();
-     * dIntervalInfo->setFallbackIntervalPattern("{0} ~ {1}");
-     * dIntervalInfo->setIntervalPattern("yMd", UCAL_YEAR, "'from' yyyy-M-d 'to' yyyy-M-d", status); 
-     * dIntervalInfo->setIntervalPattern("yMMMd", UCAL_MONTH, "'from' yyyy MMM d 'to' MMM d", status);
-     * dIntervalInfo->setIntervalPattern("yMMMd", UCAL_DAY, "yyyy MMM d-d", status, status);
-     * </pre>
-     *
-     * Restriction: 
-     * Currently, users can only set interval patterns when the following 
-     * calendar fields are different: ERA, YEAR, MONTH, DATE,  DAY_OF_MONTH, 
-     * DAY_OF_WEEK, AM_PM,  HOUR, HOUR_OF_DAY, and MINUTE.
-     * Interval patterns when other calendar fields are different are 
-     * not supported.
-     *
-     * @param skeleton         the skeleton on which interval pattern based
-     * @param lrgDiffCalUnit   the largest different calendar unit.
-     * @param intervalPattern  the interval pattern on the largest different
-     *                         calendar unit.
-     *                         For example, if lrgDiffCalUnit is 
-     *                         "year", the interval pattern for en_US when year
-     *                         is different could be "'from' yyyy 'to' yyyy".
-     * @param status           output param set to success/failure code on exit
-     * @draft ICU 4.0
-     */
-    void setIntervalPattern(const UnicodeString& skeleton, 
-                            UCalendarDateFields lrgDiffCalUnit, 
-                            const UnicodeString& intervalPattern,
-                            UErrorCode& status);
-
-    /**
-     * Get the interval pattern given skeleton and 
-     * the largest different calendar field.
-     * @param skeleton   the skeleton
-     * @param field      the largest different calendar field
-     * @param result     output param to receive the pattern
-     * @param status     output param set to success/failure code on exit
-     * @return a reference to 'result'
-     * @draft ICU 4.0 
-     */
-    UnicodeString& getIntervalPattern(const UnicodeString& skeleton,
-                                      UCalendarDateFields field,
-                                      UnicodeString& result,
-                                      UErrorCode& status) const; 
-
-    /**
-     * Get the fallback interval pattern.
-     * @param  result   output param to receive the pattern
-     * @return a reference to 'result'
-     * @draft ICU 4.0 
-     */
-    UnicodeString& getFallbackIntervalPattern(UnicodeString& result) const;
-
-
-    /**
-     * Re-set the fallback interval pattern.
-     *
-     * In construction, default fallback pattern is set as "{0} - {1}".
-     * And constructor taking locale as parameter will set the
-     * fallback pattern as what defined in the locale resource file.
-     *
-     * This method provides a way for user to replace the fallback pattern.
-     *
-     * @param fallbackPattern  fall-back interval pattern.
-     * @param status           output param set to success/failure code on exit
-     * @draft ICU 4.0 
-     */
-    void setFallbackIntervalPattern(const UnicodeString& fallbackPattern,
-                                    UErrorCode& status);
-
-
-    /** Get default order -- whether the first date in pattern is later date
-                             or not.
-     * return default date ordering in interval pattern. TRUE if the first date
-     *        in pattern is later date, FALSE otherwise.
-     * @draft ICU 4.0 
-     */
-    UBool getDefaultOrder() const;
-
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 4.0
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 4.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-
-private:
-    /**
-     * DateIntervalFormat will need access to
-     * getBestSkeleton(), parseSkeleton(), enum IntervalPatternIndex,
-     * and calendarFieldToPatternIndex().
-     *
-     * Instead of making above public,
-     * make DateIntervalFormat a friend of DateIntervalInfo.
-     */
-    friend class DateIntervalFormat;
-
-    friend UBool U_CALLCONV hashTableValueComparator(UHashTok val1, UHashTok val2) ;
-
-    /**
-     * Following is for saving the interval patterns.
-     * We only support interval patterns on
-     * ERA, YEAR, MONTH, DAY, AM_PM, HOUR, and MINUTE
-     */
-    enum IntervalPatternIndex
-    {
-        kIPI_ERA,
-        kIPI_YEAR,
-        kIPI_MONTH,
-        kIPI_DATE,
-        kIPI_AM_PM,
-        kIPI_HOUR,
-        kIPI_MINUTE,
-        kIPI_MAX_INDEX
-    };
-
-    /** 
-     * Initialize the DateIntervalInfo from locale
-     * @param locale   the given locale.
-     * @param status   output param set to success/failure code on exit
-     * @internal ICU 4.0 
-     */
-    void initializeData(const Locale& locale, UErrorCode& status);
-
-
-    /* Set Interval pattern.
-     *
-     * It sets interval pattern into the hash map.
-     *
-     * @param skeleton         skeleton on which the interval pattern based
-     * @param lrgDiffCalUnit   the largest different calendar unit.
-     * @param intervalPattern  the interval pattern on the largest different
-     *                         calendar unit.
-     * @param status           output param set to success/failure code on exit
-     * @internal ICU 4.0
-     */
-    void setIntervalPatternInternally(const UnicodeString& skeleton,
-                                      UCalendarDateFields lrgDiffCalUnit,
-                                      const UnicodeString& intervalPattern,
-                                      UErrorCode& status); 
-
-
-    /**given an input skeleton, get the best match skeleton 
-     * which has pre-defined interval pattern in resource file.
-     * Also return the difference between the input skeleton
-     * and the best match skeleton.
-     *
-     * TODO (xji): set field weight or
-     *             isolate the funtionality in DateTimePatternGenerator
-     * @param  skeleton               input skeleton
-     * @param  bestMatchDistanceInfo  the difference between input skeleton
-     *                                and best match skeleton.
-     *         0, if there is exact match for input skeleton
-     *         1, if there is only field width difference between 
-     *            the best match and the input skeleton
-     *         2, the only field difference is 'v' and 'z'
-     *        -1, if there is calendar field difference between
-     *            the best match and the input skeleton
-     * @return                        best match skeleton
-     * @internal ICU 4.0
-     */
-    const UnicodeString* getBestSkeleton(const UnicodeString& skeleton,
-                                         int8_t& bestMatchDistanceInfo) const;
-
-
-    /**
-     * Parse skeleton, save each field's width.
-     * It is used for looking for best match skeleton,
-     * and adjust pattern field width.
-     * @param skeleton            skeleton to be parsed
-     * @param skeletonFieldWidth  parsed skeleton field width
-     * @internal ICU 4.0
-     */
-    static void U_EXPORT2 parseSkeleton(const UnicodeString& skeleton, 
-                                        int32_t* skeletonFieldWidth);
-
-
-    /**
-     * Check whether one field width is numeric while the other is string.
-     *
-     * TODO (xji): make it general
-     *
-     * @param fieldWidth          one field width
-     * @param anotherFieldWidth   another field width
-     * @param patternLetter       pattern letter char
-     * @return true if one field width is numeric and the other is string,
-     *         false otherwise.
-     * @internal ICU 4.0
-     */
-    static UBool U_EXPORT2 stringNumeric(int32_t fieldWidth,
-                                         int32_t anotherFieldWidth,
-                                         char patternLetter);
-
-
-    /** 
-     * Convert calendar field to the interval pattern index in 
-     * hash table.
-     *
-     * Since we only support the following calendar fields: 
-     * ERA, YEAR, MONTH, DATE,  DAY_OF_MONTH, DAY_OF_WEEK, 
-     * AM_PM,  HOUR, HOUR_OF_DAY, and MINUTE,
-     * We reserve only 4 interval patterns for a skeleton.
-     *
-     * @param field    calendar field
-     * @param status   output param set to success/failure code on exit
-     * @return  interval pattern index in hash table
-     * @internal ICU 4.0
-     */
-    static IntervalPatternIndex U_EXPORT2 calendarFieldToIntervalIndex(
-                                                      UCalendarDateFields field,
-                                                      UErrorCode& status);
-
-
-    /**
-     * delete hash table (of type fIntervalPatterns).
-     *
-     * @param hTable  hash table to be deleted
-     * @internal ICU 4.0
-     */
-    void deleteHash(Hashtable* hTable);
-
-
-    /**
-     * initialize hash table (of type fIntervalPatterns).
-     *
-     * @param status   output param set to success/failure code on exit
-     * @return         hash table initialized
-     * @internal ICU 4.0
-     */
-    Hashtable* initHash(UErrorCode& status);
-
-
-
-    /**
-     * copy hash table (of type fIntervalPatterns).
-     *
-     * @param source   the source to copy from
-     * @param target   the target to copy to
-     * @param status   output param set to success/failure code on exit
-     * @internal ICU 4.0
-     */
-    void copyHash(const Hashtable* source, Hashtable* target, UErrorCode& status);
-
-
-    // data members
-    // fallback interval pattern 
-    UnicodeString fFallbackIntervalPattern;
-    // default order
-    UBool fFirstDateInPtnIsLaterDate;
-
-    // HashMap<UnicodeString, UnicodeString[kIPI_MAX_INDEX]>
-    // HashMap( skeleton, pattern[largest_different_field] )
-    Hashtable* fIntervalPatterns;
-
-};// end class DateIntervalInfo
-
-
-inline UBool
-DateIntervalInfo::operator!=(const DateIntervalInfo& other) const {
-    return !operator==(other);
-}
-
-
-U_NAMESPACE_END
-
-#endif
-
-#endif
-

Copied: MacRuby/trunk/icu-1060/unicode/dtitvinf.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/dtitvinf.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/dtitvinf.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/dtitvinf.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,528 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ *
+ * File DTITVINF.H
+ *
+ *******************************************************************************
+ */
+
+#ifndef __DTITVINF_H__
+#define __DTITVINF_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: Date/Time interval patterns for formatting date/time interval
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/udat.h"
+#include "unicode/locid.h"
+#include "unicode/ucal.h"
+#include "unicode/dtptngen.h"
+//#include "dtitv_impl.h"
+
+/**
+ * @internal ICU 4.0
+ */
+ 
+union UHashTok;
+
+
+U_NAMESPACE_BEGIN
+
+U_CDECL_BEGIN 
+
+/**
+ * @internal ICU 4.0
+ */
+static UBool U_CALLCONV hashTableValueComparator(UHashTok val1, UHashTok val2) ;
+
+U_CDECL_END 
+
+
+/**
+ * DateIntervalInfo is a public class for encapsulating localizable
+ * date time interval patterns. It is used by DateIntervalFormat.
+ *
+ * <P>
+ * For most users, ordinary use of DateIntervalFormat does not need to create
+ * DateIntervalInfo object directly.
+ * DateIntervalFormat will take care of it when creating a date interval
+ * formatter when user pass in skeleton and locale.
+ *
+ * <P>
+ * For power users, who want to create their own date interval patterns,
+ * or want to re-set date interval patterns, they could do so by
+ * directly creating DateIntervalInfo and manupulating it.
+ *
+ * <P>
+ * Logically, the interval patterns are mappings
+ * from (skeleton, the_largest_different_calendar_field)
+ * to (date_interval_pattern).
+ *
+ * <P>
+ * A skeleton 
+ * <ol>
+ * <li>
+ * only keeps the field pattern letter and ignores all other parts 
+ * in a pattern, such as space, punctuations, and string literals.
+ * <li>
+ * hides the order of fields. 
+ * <li>
+ * might hide a field's pattern letter length.
+ *
+ * For those non-digit calendar fields, the pattern letter length is 
+ * important, such as MMM, MMMM, and MMMMM; EEE and EEEE, 
+ * and the field's pattern letter length is honored.
+ *    
+ * For the digit calendar fields,  such as M or MM, d or dd, yy or yyyy, 
+ * the field pattern length is ignored and the best match, which is defined 
+ * in date time patterns, will be returned without honor the field pattern
+ * letter length in skeleton.
+ * </ol>
+ *
+ * <P>
+ * The calendar fields we support for interval formatting are:
+ * year, month, date, day-of-week, am-pm, hour, hour-of-day, and minute.
+ * Those calendar fields can be defined in the following order:
+ * year >  month > date > am-pm > hour >  minute 
+ *  
+ * The largest different calendar fields between 2 calendars is the
+ * first different calendar field in above order.
+ *
+ * For example: the largest different calendar fields between "Jan 10, 2007" 
+ * and "Feb 20, 2008" is year.
+ *   
+ * <P>
+ * There is a set of pre-defined static skeleton strings.
+ * There are pre-defined interval patterns for those pre-defined skeletons
+ * in locales' resource files.
+ * For example, for a skeleton UDAT_YEAR_ABBR_MONTH_DAY, which is  "yMMMd",
+ * in  en_US, if the largest different calendar field between date1 and date2 
+ * is "year", the date interval pattern  is "MMM d, yyyy - MMM d, yyyy", 
+ * such as "Jan 10, 2007 - Jan 10, 2008".
+ * If the largest different calendar field between date1 and date2 is "month",
+ * the date interval pattern is "MMM d - MMM d, yyyy",
+ * such as "Jan 10 - Feb 10, 2007".
+ * If the largest different calendar field between date1 and date2 is "day",
+ * the date interval pattern is ""MMM d-d, yyyy", such as "Jan 10-20, 2007".
+ *
+ * For date skeleton, the interval patterns when year, or month, or date is 
+ * different are defined in resource files.
+ * For time skeleton, the interval patterns when am/pm, or hour, or minute is
+ * different are defined in resource files.
+ *
+ *
+ * <P>
+ * There are 2 dates in interval pattern. For most locales, the first date
+ * in an interval pattern is the earlier date. There might be a locale in which
+ * the first date in an interval pattern is the later date.
+ * We use fallback format for the default order for the locale.
+ * For example, if the fallback format is "{0} - {1}", it means
+ * the first date in the interval pattern for this locale is earlier date.
+ * If the fallback format is "{1} - {0}", it means the first date is the 
+ * later date.
+ * For a particular interval pattern, the default order can be overriden
+ * by prefixing "latestFirst:" or "earliestFirst:" to the interval pattern.
+ * For example, if the fallback format is "{0}-{1}",
+ * but for skeleton "yMMMd", the interval pattern when day is different is 
+ * "latestFirst:d-d MMM yy", it means by default, the first date in interval
+ * pattern is the earlier date. But for skeleton "yMMMd", when day is different,
+ * the first date in "d-d MMM yy" is the later date.
+ * 
+ * <P>
+ * The recommended way to create a DateIntervalFormat object is to pass in 
+ * the locale. 
+ * By using a Locale parameter, the DateIntervalFormat object is 
+ * initialized with the pre-defined interval patterns for a given or 
+ * default locale.
+ * <P>
+ * Users can also create DateIntervalFormat object 
+ * by supplying their own interval patterns.
+ * It provides flexibility for power users.
+ *
+ * <P>
+ * After a DateIntervalInfo object is created, clients may modify
+ * the interval patterns using setIntervalPattern function as so desired.
+ * Currently, users can only set interval patterns when the following 
+ * calendar fields are different: ERA, YEAR, MONTH, DATE,  DAY_OF_MONTH, 
+ * DAY_OF_WEEK, AM_PM,  HOUR, HOUR_OF_DAY, and MINUTE.
+ * Interval patterns when other calendar fields are different is not supported.
+ * <P>
+ * DateIntervalInfo objects are cloneable. 
+ * When clients obtain a DateIntervalInfo object, 
+ * they can feel free to modify it as necessary.
+ * <P>
+ * DateIntervalInfo are not expected to be subclassed. 
+ * Data for a calendar is loaded out of resource bundles. 
+ * To ICU 4.0, date interval patterns are only supported in Gregorian calendar. 
+ * @draft ICU 4.0
+**/
+
+class U_I18N_API DateIntervalInfo : public UObject {
+public:
+    /**
+     * Default constructor.
+     * It does not initialize any interval patterns except
+     * that it initialize default fall-back pattern as "{0} - {1}",
+     * which can be reset by setFallbackIntervalPattern().
+     * It should be followed by setFallbackIntervalPattern() and 
+     * setIntervalPattern(), 
+     * and is recommended to be used only for power users who
+     * wants to create their own interval patterns and use them to create
+     * date interval formatter.
+     * @param status   output param set to success/failure code on exit
+     * @internal ICU 4.0
+     */
+    DateIntervalInfo(UErrorCode& status);
+
+
+    /** 
+     * Construct DateIntervalInfo for the given locale,
+     * @param locale  the interval patterns are loaded from the Gregorian 
+     *                calendar data in this locale.
+     * @param status  output param set to success/failure code on exit
+     * @draft ICU 4.0
+     */
+    DateIntervalInfo(const Locale& locale, UErrorCode& status);
+
+
+    /**
+     * Copy constructor.
+     * @draft ICU 4.0
+     */
+    DateIntervalInfo(const DateIntervalInfo&);
+
+    /**
+     * Assignment operator
+     * @draft ICU 4.0
+     */
+    DateIntervalInfo& operator=(const DateIntervalInfo&);
+
+    /**
+     * Clone this object polymorphically.
+     * The caller owns the result and should delete it when done.
+     * @return   a copy of the object
+     * @draft    ICU4.0
+     */
+    virtual DateIntervalInfo* clone(void) const;
+
+    /**
+     * Destructor.
+     * It is virtual to be safe, but it is not designed to be subclassed.
+     * @draft ICU 4.0
+     */
+    virtual ~DateIntervalInfo();
+
+
+    /**
+     * Return true if another object is semantically equal to this one.
+     *
+     * @param other    the DateIntervalInfo object to be compared with.
+     * @return         true if other is semantically equal to this.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator==(const DateIntervalInfo& other) const;
+
+    /**
+     * Return true if another object is semantically unequal to this one.
+     *
+     * @param other    the DateIntervalInfo object to be compared with.
+     * @return         true if other is semantically unequal to this.
+     * @stable ICU 4.0
+     */
+    UBool operator!=(const DateIntervalInfo& other) const;
+
+
+
+    /** 
+     * Provides a way for client to build interval patterns.
+     * User could construct DateIntervalInfo by providing a list of skeletons
+     * and their patterns.
+     * <P>
+     * For example:
+     * <pre>
+     * UErrorCode status = U_ZERO_ERROR;
+     * DateIntervalInfo dIntervalInfo = new DateIntervalInfo();
+     * dIntervalInfo->setFallbackIntervalPattern("{0} ~ {1}");
+     * dIntervalInfo->setIntervalPattern("yMd", UCAL_YEAR, "'from' yyyy-M-d 'to' yyyy-M-d", status); 
+     * dIntervalInfo->setIntervalPattern("yMMMd", UCAL_MONTH, "'from' yyyy MMM d 'to' MMM d", status);
+     * dIntervalInfo->setIntervalPattern("yMMMd", UCAL_DAY, "yyyy MMM d-d", status, status);
+     * </pre>
+     *
+     * Restriction: 
+     * Currently, users can only set interval patterns when the following 
+     * calendar fields are different: ERA, YEAR, MONTH, DATE,  DAY_OF_MONTH, 
+     * DAY_OF_WEEK, AM_PM,  HOUR, HOUR_OF_DAY, and MINUTE.
+     * Interval patterns when other calendar fields are different are 
+     * not supported.
+     *
+     * @param skeleton         the skeleton on which interval pattern based
+     * @param lrgDiffCalUnit   the largest different calendar unit.
+     * @param intervalPattern  the interval pattern on the largest different
+     *                         calendar unit.
+     *                         For example, if lrgDiffCalUnit is 
+     *                         "year", the interval pattern for en_US when year
+     *                         is different could be "'from' yyyy 'to' yyyy".
+     * @param status           output param set to success/failure code on exit
+     * @draft ICU 4.0
+     */
+    void setIntervalPattern(const UnicodeString& skeleton, 
+                            UCalendarDateFields lrgDiffCalUnit, 
+                            const UnicodeString& intervalPattern,
+                            UErrorCode& status);
+
+    /**
+     * Get the interval pattern given skeleton and 
+     * the largest different calendar field.
+     * @param skeleton   the skeleton
+     * @param field      the largest different calendar field
+     * @param result     output param to receive the pattern
+     * @param status     output param set to success/failure code on exit
+     * @return a reference to 'result'
+     * @draft ICU 4.0 
+     */
+    UnicodeString& getIntervalPattern(const UnicodeString& skeleton,
+                                      UCalendarDateFields field,
+                                      UnicodeString& result,
+                                      UErrorCode& status) const; 
+
+    /**
+     * Get the fallback interval pattern.
+     * @param  result   output param to receive the pattern
+     * @return a reference to 'result'
+     * @draft ICU 4.0 
+     */
+    UnicodeString& getFallbackIntervalPattern(UnicodeString& result) const;
+
+
+    /**
+     * Re-set the fallback interval pattern.
+     *
+     * In construction, default fallback pattern is set as "{0} - {1}".
+     * And constructor taking locale as parameter will set the
+     * fallback pattern as what defined in the locale resource file.
+     *
+     * This method provides a way for user to replace the fallback pattern.
+     *
+     * @param fallbackPattern  fall-back interval pattern.
+     * @param status           output param set to success/failure code on exit
+     * @draft ICU 4.0 
+     */
+    void setFallbackIntervalPattern(const UnicodeString& fallbackPattern,
+                                    UErrorCode& status);
+
+
+    /** Get default order -- whether the first date in pattern is later date
+                             or not.
+     * return default date ordering in interval pattern. TRUE if the first date
+     *        in pattern is later date, FALSE otherwise.
+     * @draft ICU 4.0 
+     */
+    UBool getDefaultOrder() const;
+
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 4.0
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 4.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+
+private:
+    /**
+     * DateIntervalFormat will need access to
+     * getBestSkeleton(), parseSkeleton(), enum IntervalPatternIndex,
+     * and calendarFieldToPatternIndex().
+     *
+     * Instead of making above public,
+     * make DateIntervalFormat a friend of DateIntervalInfo.
+     */
+    friend class DateIntervalFormat;
+
+    friend UBool U_CALLCONV hashTableValueComparator(UHashTok val1, UHashTok val2) ;
+
+    /**
+     * Following is for saving the interval patterns.
+     * We only support interval patterns on
+     * ERA, YEAR, MONTH, DAY, AM_PM, HOUR, and MINUTE
+     */
+    enum IntervalPatternIndex
+    {
+        kIPI_ERA,
+        kIPI_YEAR,
+        kIPI_MONTH,
+        kIPI_DATE,
+        kIPI_AM_PM,
+        kIPI_HOUR,
+        kIPI_MINUTE,
+        kIPI_MAX_INDEX
+    };
+
+    /** 
+     * Initialize the DateIntervalInfo from locale
+     * @param locale   the given locale.
+     * @param status   output param set to success/failure code on exit
+     * @internal ICU 4.0 
+     */
+    void initializeData(const Locale& locale, UErrorCode& status);
+
+
+    /* Set Interval pattern.
+     *
+     * It sets interval pattern into the hash map.
+     *
+     * @param skeleton         skeleton on which the interval pattern based
+     * @param lrgDiffCalUnit   the largest different calendar unit.
+     * @param intervalPattern  the interval pattern on the largest different
+     *                         calendar unit.
+     * @param status           output param set to success/failure code on exit
+     * @internal ICU 4.0
+     */
+    void setIntervalPatternInternally(const UnicodeString& skeleton,
+                                      UCalendarDateFields lrgDiffCalUnit,
+                                      const UnicodeString& intervalPattern,
+                                      UErrorCode& status); 
+
+
+    /**given an input skeleton, get the best match skeleton 
+     * which has pre-defined interval pattern in resource file.
+     * Also return the difference between the input skeleton
+     * and the best match skeleton.
+     *
+     * TODO (xji): set field weight or
+     *             isolate the funtionality in DateTimePatternGenerator
+     * @param  skeleton               input skeleton
+     * @param  bestMatchDistanceInfo  the difference between input skeleton
+     *                                and best match skeleton.
+     *         0, if there is exact match for input skeleton
+     *         1, if there is only field width difference between 
+     *            the best match and the input skeleton
+     *         2, the only field difference is 'v' and 'z'
+     *        -1, if there is calendar field difference between
+     *            the best match and the input skeleton
+     * @return                        best match skeleton
+     * @internal ICU 4.0
+     */
+    const UnicodeString* getBestSkeleton(const UnicodeString& skeleton,
+                                         int8_t& bestMatchDistanceInfo) const;
+
+
+    /**
+     * Parse skeleton, save each field's width.
+     * It is used for looking for best match skeleton,
+     * and adjust pattern field width.
+     * @param skeleton            skeleton to be parsed
+     * @param skeletonFieldWidth  parsed skeleton field width
+     * @internal ICU 4.0
+     */
+    static void U_EXPORT2 parseSkeleton(const UnicodeString& skeleton, 
+                                        int32_t* skeletonFieldWidth);
+
+
+    /**
+     * Check whether one field width is numeric while the other is string.
+     *
+     * TODO (xji): make it general
+     *
+     * @param fieldWidth          one field width
+     * @param anotherFieldWidth   another field width
+     * @param patternLetter       pattern letter char
+     * @return true if one field width is numeric and the other is string,
+     *         false otherwise.
+     * @internal ICU 4.0
+     */
+    static UBool U_EXPORT2 stringNumeric(int32_t fieldWidth,
+                                         int32_t anotherFieldWidth,
+                                         char patternLetter);
+
+
+    /** 
+     * Convert calendar field to the interval pattern index in 
+     * hash table.
+     *
+     * Since we only support the following calendar fields: 
+     * ERA, YEAR, MONTH, DATE,  DAY_OF_MONTH, DAY_OF_WEEK, 
+     * AM_PM,  HOUR, HOUR_OF_DAY, and MINUTE,
+     * We reserve only 4 interval patterns for a skeleton.
+     *
+     * @param field    calendar field
+     * @param status   output param set to success/failure code on exit
+     * @return  interval pattern index in hash table
+     * @internal ICU 4.0
+     */
+    static IntervalPatternIndex U_EXPORT2 calendarFieldToIntervalIndex(
+                                                      UCalendarDateFields field,
+                                                      UErrorCode& status);
+
+
+    /**
+     * delete hash table (of type fIntervalPatterns).
+     *
+     * @param hTable  hash table to be deleted
+     * @internal ICU 4.0
+     */
+    void deleteHash(Hashtable* hTable);
+
+
+    /**
+     * initialize hash table (of type fIntervalPatterns).
+     *
+     * @param status   output param set to success/failure code on exit
+     * @return         hash table initialized
+     * @internal ICU 4.0
+     */
+    Hashtable* initHash(UErrorCode& status);
+
+
+
+    /**
+     * copy hash table (of type fIntervalPatterns).
+     *
+     * @param source   the source to copy from
+     * @param target   the target to copy to
+     * @param status   output param set to success/failure code on exit
+     * @internal ICU 4.0
+     */
+    void copyHash(const Hashtable* source, Hashtable* target, UErrorCode& status);
+
+
+    // data members
+    // fallback interval pattern 
+    UnicodeString fFallbackIntervalPattern;
+    // default order
+    UBool fFirstDateInPtnIsLaterDate;
+
+    // HashMap<UnicodeString, UnicodeString[kIPI_MAX_INDEX]>
+    // HashMap( skeleton, pattern[largest_different_field] )
+    Hashtable* fIntervalPatterns;
+
+};// end class DateIntervalInfo
+
+
+inline UBool
+DateIntervalInfo::operator!=(const DateIntervalInfo& other) const {
+    return !operator==(other);
+}
+
+
+U_NAMESPACE_END
+
+#endif
+
+#endif
+

Deleted: MacRuby/trunk/icu-1060/unicode/dtptngen.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/dtptngen.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/dtptngen.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,423 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 2007-2009, International Business Machines Corporation and
-* others. All Rights Reserved.
-*******************************************************************************
-*
-* File DTPTNGEN.H
-*
-*******************************************************************************
-*/
-
-#ifndef __DTPTNGEN_H__
-#define __DTPTNGEN_H__
-
-#include "unicode/datefmt.h"
-#include "unicode/locid.h"
-#include "unicode/udat.h"
-#include "unicode/udatpg.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * \file
- * \brief C++ API: Date/Time Pattern Generator
- */
-
-
-class Hashtable;
-class FormatParser;
-class DateTimeMatcher;
-class DistanceInfo;
-class PatternMap;
-class PtnSkeleton;
-
-/**
- * This class provides flexible generation of date format patterns, like "yy-MM-dd". 
- * The user can build up the generator by adding successive patterns. Once that 
- * is done, a query can be made using a "skeleton", which is a pattern which just
- * includes the desired fields and lengths. The generator will return the "best fit" 
- * pattern corresponding to that skeleton.
- * <p>The main method people will use is getBestPattern(String skeleton),
- * since normally this class is pre-built with data from a particular locale. 
- * However, generators can be built directly from other data as well.
- * <p><i>Issue: may be useful to also have a function that returns the list of 
- * fields in a pattern, in order, since we have that internally.
- * That would be useful for getting the UI order of field elements.</i>
- * @stable ICU 3.8
-**/
-class U_I18N_API DateTimePatternGenerator : public UObject {
-public:
-    /**
-     * Construct a flexible generator according to default locale.
-     * @param status  Output param set to success/failure code on exit,
-     *               which must not indicate a failure before the function call.
-     * @stable ICU 3.8
-     */
-    static DateTimePatternGenerator* U_EXPORT2 createInstance(UErrorCode& status);
-
-    /**
-     * Construct a flexible generator according to data for a given locale.
-     * @param uLocale
-     * @param status  Output param set to success/failure code on exit,
-     *               which must not indicate a failure before the function call.
-     * @stable ICU 3.8
-     */
-    static DateTimePatternGenerator* U_EXPORT2 createInstance(const Locale& uLocale, UErrorCode& status);
-
-    /**
-     * Create an empty generator, to be constructed with addPattern(...) etc.
-     * @param status  Output param set to success/failure code on exit,
-     *               which must not indicate a failure before the function call.
-     * @stable ICU 3.8
-     */
-     static DateTimePatternGenerator* U_EXPORT2 createEmptyInstance(UErrorCode& status);
-     
-    /**
-     * Destructor.
-     * @stable ICU 3.8
-     */
-    virtual ~DateTimePatternGenerator();
-
-    /**
-     * Clone DateTimePatternGenerator object. Clients are responsible for 
-     * deleting the DateTimePatternGenerator object cloned.
-     * @stable ICU 3.8
-     */
-    DateTimePatternGenerator* clone() const;
-
-     /**
-      * Return true if another object is semantically equal to this one.
-      *
-      * @param other    the DateTimePatternGenerator object to be compared with.
-      * @return         true if other is semantically equal to this.
-      * @stable ICU 3.8
-      */
-    UBool operator==(const DateTimePatternGenerator& other) const;
-    
-    /**
-     * Return true if another object is semantically unequal to this one.
-     *
-     * @param other    the DateTimePatternGenerator object to be compared with.
-     * @return         true if other is semantically unequal to this.
-     * @stable ICU 3.8
-     */
-    UBool operator!=(const DateTimePatternGenerator& other) const;
-
-    /**
-     * Utility to return a unique skeleton from a given pattern. For example,
-     * both "MMM-dd" and "dd/MMM" produce the skeleton "MMMdd".
-     *
-     * @param pattern   Input pattern, such as "dd/MMM"
-     * @param status  Output param set to success/failure code on exit,
-     *                  which must not indicate a failure before the function call.
-     * @return skeleton such as "MMMdd"
-     * @stable ICU 3.8
-     */
-    UnicodeString getSkeleton(const UnicodeString& pattern, UErrorCode& status);
-
-    /**
-     * Utility to return a unique base skeleton from a given pattern. This is
-     * the same as the skeleton, except that differences in length are minimized
-     * so as to only preserve the difference between string and numeric form. So
-     * for example, both "MMM-dd" and "d/MMM" produce the skeleton "MMMd"
-     * (notice the single d).
-     *
-     * @param pattern  Input pattern, such as "dd/MMM"
-     * @param status  Output param set to success/failure code on exit,
-     *               which must not indicate a failure before the function call.
-     * @return base skeleton, such as "Md"
-     * @stable ICU 3.8
-     */
-    UnicodeString getBaseSkeleton(const UnicodeString& pattern, UErrorCode& status);
-
-    /**
-     * Adds a pattern to the generator. If the pattern has the same skeleton as
-     * an existing pattern, and the override parameter is set, then the previous
-     * value is overriden. Otherwise, the previous value is retained. In either
-     * case, the conflicting status is set and previous vale is stored in 
-     * conflicting pattern.
-     * <p>
-     * Note that single-field patterns (like "MMM") are automatically added, and
-     * don't need to be added explicitly!
-     *
-     * @param pattern   Input pattern, such as "dd/MMM"
-     * @param override  When existing values are to be overridden use true, 
-     *                   otherwise use false.
-     * @param conflictingPattern  Previous pattern with the same skeleton.
-     * @param status  Output param set to success/failure code on exit,
-     *               which must not indicate a failure before the function call.
-     * @return conflicting status.  The value could be UDATPG_NO_CONFLICT, 
-     *                             UDATPG_BASE_CONFLICT or UDATPG_CONFLICT.
-     * @stable ICU 3.8
-     */
-    UDateTimePatternConflict addPattern(const UnicodeString& pattern, 
-                                        UBool override, 
-                                        UnicodeString& conflictingPattern,
-                                        UErrorCode& status);
-
-    /**
-     * An AppendItem format is a pattern used to append a field if there is no
-     * good match. For example, suppose that the input skeleton is "GyyyyMMMd",
-     * and there is no matching pattern internally, but there is a pattern
-     * matching "yyyyMMMd", say "d-MM-yyyy". Then that pattern is used, plus the
-     * G. The way these two are conjoined is by using the AppendItemFormat for G
-     * (era). So if that value is, say "{0}, {1}" then the final resulting
-     * pattern is "d-MM-yyyy, G".
-     * <p>
-     * There are actually three available variables: {0} is the pattern so far,
-     * {1} is the element we are adding, and {2} is the name of the element.
-     * <p>
-     * This reflects the way that the CLDR data is organized.
-     *
-     * @param field  such as UDATPG_ERA_FIELD.
-     * @param value  pattern, such as "{0}, {1}"
-     * @stable ICU 3.8
-     */
-    void setAppendItemFormat(UDateTimePatternField field, const UnicodeString& value);
-
-    /**
-     * Getter corresponding to setAppendItemFormat. Values below 0 or at or
-     * above UDATPG_FIELD_COUNT are illegal arguments.
-     *
-     * @param  field  such as UDATPG_ERA_FIELD.
-     * @return append pattern for field
-     * @stable ICU 3.8
-     */
-    const UnicodeString& getAppendItemFormat(UDateTimePatternField field) const;
-
-    /**
-     * Sets the names of field, eg "era" in English for ERA. These are only
-     * used if the corresponding AppendItemFormat is used, and if it contains a
-     * {2} variable.
-     * <p>
-     * This reflects the way that the CLDR data is organized.
-     *
-     * @param field   such as UDATPG_ERA_FIELD.
-     * @param value   name of the field
-     * @stable ICU 3.8
-     */
-    void setAppendItemName(UDateTimePatternField field, const UnicodeString& value);
-
-    /**
-     * Getter corresponding to setAppendItemNames. Values below 0 or at or above
-     * UDATPG_FIELD_COUNT are illegal arguments.
-     *
-     * @param field  such as UDATPG_ERA_FIELD.
-     * @return name for field
-     * @stable ICU 3.8
-     */
-    const UnicodeString& getAppendItemName(UDateTimePatternField field) const;
-
-    /**
-     * The date time format is a message format pattern used to compose date and
-     * time patterns. The default value is "{0} {1}", where {0} will be replaced
-     * by the date pattern and {1} will be replaced by the time pattern.
-     * <p>
-     * This is used when the input skeleton contains both date and time fields,
-     * but there is not a close match among the added patterns. For example,
-     * suppose that this object was created by adding "dd-MMM" and "hh:mm", and
-     * its datetimeFormat is the default "{0} {1}". Then if the input skeleton
-     * is "MMMdhmm", there is not an exact match, so the input skeleton is
-     * broken up into two components "MMMd" and "hmm". There are close matches
-     * for those two skeletons, so the result is put together with this pattern,
-     * resulting in "d-MMM h:mm".
-     *
-     * @param dateTimeFormat
-     *            message format pattern, here {0} will be replaced by the date
-     *            pattern and {1} will be replaced by the time pattern.
-     * @stable ICU 3.8
-     */
-    void setDateTimeFormat(const UnicodeString& dateTimeFormat);
-
-    /**
-     * Getter corresponding to setDateTimeFormat.
-     * @return DateTimeFormat.
-     * @stable ICU 3.8
-     */
-    const UnicodeString& getDateTimeFormat() const;
-
-    /**
-     * Return the best pattern matching the input skeleton. It is guaranteed to
-     * have all of the fields in the skeleton.
-     *
-     * @param skeleton
-     *            The skeleton is a pattern containing only the variable fields.
-     *            For example, "MMMdd" and "mmhh" are skeletons.
-     * @param status  Output param set to success/failure code on exit,
-     *               which must not indicate a failure before the function call.
-     * @return bestPattern
-     *            The best pattern found from the given skeleton.
-     * @stable ICU 3.8
-     */
-     UnicodeString getBestPattern(const UnicodeString& skeleton, UErrorCode& status);
-
-
-    /**
-     * Adjusts the field types (width and subtype) of a pattern to match what is
-     * in a skeleton. That is, if you supply a pattern like "d-M H:m", and a
-     * skeleton of "MMMMddhhmm", then the input pattern is adjusted to be
-     * "dd-MMMM hh:mm". This is used internally to get the best match for the
-     * input skeleton, but can also be used externally.
-     *
-     * @param pattern Input pattern
-     * @param skeleton
-     *            The skeleton is a pattern containing only the variable fields.
-     *            For example, "MMMdd" and "mmhh" are skeletons.
-     * @param status  Output param set to success/failure code on exit,
-     *               which must not indicate a failure before the function call.
-     * @return pattern adjusted to match the skeleton fields widths and subtypes.
-     * @stable ICU 3.8
-     */
-     UnicodeString replaceFieldTypes(const UnicodeString& pattern, 
-                                     const UnicodeString& skeleton, 
-                                     UErrorCode& status);
-
-    /**
-     * Return a list of all the skeletons (in canonical form) from this class.
-     *
-     * Call getPatternForSkeleton() to get the corresponding pattern.
-     *
-     * @param status  Output param set to success/failure code on exit,
-     *               which must not indicate a failure before the function call.
-     * @return StringEnumeration with the skeletons.
-     *         The caller must delete the object.
-     * @stable ICU 3.8
-     */
-     StringEnumeration* getSkeletons(UErrorCode& status) const;
-
-     /**
-      * Get the pattern corresponding to a given skeleton.
-      * @param skeleton 
-      * @return pattern corresponding to a given skeleton.
-      * @stable ICU 3.8
-      */
-     const UnicodeString& getPatternForSkeleton(const UnicodeString& skeleton) const;
-     
-    /**
-     * Return a list of all the base skeletons (in canonical form) from this class.
-     *
-     * @param status  Output param set to success/failure code on exit,
-     *               which must not indicate a failure before the function call.
-     * @return a StringEnumeration with the base skeletons.
-     *         The caller must delete the object.
-     * @stable ICU 3.8
-     */
-     StringEnumeration* getBaseSkeletons(UErrorCode& status) const;
-     
-     /**
-      * Return a list of redundant patterns are those which if removed, make no 
-      * difference in the resulting getBestPattern values. This method returns a 
-      * list of them, to help check the consistency of the patterns used to build 
-      * this generator.
-      * 
-      * @param status  Output param set to success/failure code on exit,
-      *               which must not indicate a failure before the function call.
-      * @return a StringEnumeration with the redundant pattern.
-      *         The caller must delete the object.
-      * @internal ICU 3.8
-      */
-     StringEnumeration* getRedundants(UErrorCode& status);
-      
-    /**
-     * The decimal value is used in formatting fractions of seconds. If the
-     * skeleton contains fractional seconds, then this is used with the
-     * fractional seconds. For example, suppose that the input pattern is
-     * "hhmmssSSSS", and the best matching pattern internally is "H:mm:ss", and
-     * the decimal string is ",". Then the resulting pattern is modified to be
-     * "H:mm:ss,SSSS"
-     *
-     * @param decimal 
-     * @stable ICU 3.8
-     */
-    void setDecimal(const UnicodeString& decimal);
-
-    /**
-     * Getter corresponding to setDecimal.
-     * @return UnicodeString corresponding to the decimal point
-     * @stable ICU 3.8
-     */
-    const UnicodeString& getDecimal() const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 3.8
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 3.8
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-private:
-    /**
-     * Constructor.
-     * @stable ICU 3.8
-     */
-    DateTimePatternGenerator(UErrorCode & status);
-
-    /**
-     * Constructor.
-     * @stable ICU 3.8
-     */
-    DateTimePatternGenerator(const Locale& locale, UErrorCode & status);
-
-    /**
-     * Copy constructor.
-     * @param other DateTimePatternGenerator to copy
-     * @stable ICU 3.8
-     */
-    DateTimePatternGenerator(const DateTimePatternGenerator& other);
-
-    /**
-     * Default assignment operator.
-     * @param other DateTimePatternGenerator to copy
-     * @stable ICU 3.8
-     */
-    DateTimePatternGenerator& operator=(const DateTimePatternGenerator& other);
-
-    Locale pLocale;  // pattern locale
-    FormatParser *fp;
-    DateTimeMatcher* dtMatcher;
-    DistanceInfo *distanceInfo;
-    PatternMap *patternMap;
-    UnicodeString appendItemFormats[UDATPG_FIELD_COUNT];
-    UnicodeString appendItemNames[UDATPG_FIELD_COUNT];
-    UnicodeString dateTimeFormat;
-    UnicodeString decimal;
-    DateTimeMatcher *skipMatcher;
-    Hashtable *fAvailableFormatKeyHash;
-    UnicodeString hackPattern;
-    UnicodeString emptyString;
-    UChar fDefaultHourFormatChar;
-
-    void initData(const Locale &locale, UErrorCode &status);
-    void addCanonicalItems();
-    void addICUPatterns(const Locale& locale, UErrorCode& status);
-    void hackTimes(const UnicodeString& hackPattern, UErrorCode& status);
-    void addCLDRData(const Locale& locale);
-    UDateTimePatternConflict addPatternWithSkeleton(const UnicodeString& pattern, const UnicodeString * skeletonToUse, UBool override, UnicodeString& conflictingPattern, UErrorCode& status);
-    void initHashtable(UErrorCode& status);
-    void setDateTimeFromCalendar(const Locale& locale, UErrorCode& status);
-    void setDecimalSymbols(const Locale& locale, UErrorCode& status);
-    UDateTimePatternField getAppendFormatNumber(const char* field) const;
-    UDateTimePatternField getAppendNameNumber(const char* field) const;
-    void getAppendName(UDateTimePatternField field, UnicodeString& value);
-    int32_t getCanonicalIndex(const UnicodeString& field);
-    const UnicodeString* getBestRaw(DateTimeMatcher& source, int32_t includeMask, DistanceInfo* missingFields, const PtnSkeleton** specifiedSkeletonPtr = 0);
-    UnicodeString adjustFieldTypes(const UnicodeString& pattern, const PtnSkeleton* specifiedSkeleton, UBool fixFractionalSeconds);
-    UnicodeString getBestAppending(int32_t missingFields);
-    int32_t getTopBitNumber(int32_t foundMask);
-    void setAvailableFormat(const UnicodeString &key, UErrorCode& status);
-    UBool isAvailableFormatSet(const UnicodeString &key) const;
-    void copyHashtable(Hashtable *other, UErrorCode &status);
-    UBool isCanonicalItem(const UnicodeString& item) const;
-} ;// end class DateTimePatternGenerator
-
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/dtptngen.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/dtptngen.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/dtptngen.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/dtptngen.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,423 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2009, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTPTNGEN.H
+*
+*******************************************************************************
+*/
+
+#ifndef __DTPTNGEN_H__
+#define __DTPTNGEN_H__
+
+#include "unicode/datefmt.h"
+#include "unicode/locid.h"
+#include "unicode/udat.h"
+#include "unicode/udatpg.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \file
+ * \brief C++ API: Date/Time Pattern Generator
+ */
+
+
+class Hashtable;
+class FormatParser;
+class DateTimeMatcher;
+class DistanceInfo;
+class PatternMap;
+class PtnSkeleton;
+
+/**
+ * This class provides flexible generation of date format patterns, like "yy-MM-dd". 
+ * The user can build up the generator by adding successive patterns. Once that 
+ * is done, a query can be made using a "skeleton", which is a pattern which just
+ * includes the desired fields and lengths. The generator will return the "best fit" 
+ * pattern corresponding to that skeleton.
+ * <p>The main method people will use is getBestPattern(String skeleton),
+ * since normally this class is pre-built with data from a particular locale. 
+ * However, generators can be built directly from other data as well.
+ * <p><i>Issue: may be useful to also have a function that returns the list of 
+ * fields in a pattern, in order, since we have that internally.
+ * That would be useful for getting the UI order of field elements.</i>
+ * @stable ICU 3.8
+**/
+class U_I18N_API DateTimePatternGenerator : public UObject {
+public:
+    /**
+     * Construct a flexible generator according to default locale.
+     * @param status  Output param set to success/failure code on exit,
+     *               which must not indicate a failure before the function call.
+     * @stable ICU 3.8
+     */
+    static DateTimePatternGenerator* U_EXPORT2 createInstance(UErrorCode& status);
+
+    /**
+     * Construct a flexible generator according to data for a given locale.
+     * @param uLocale
+     * @param status  Output param set to success/failure code on exit,
+     *               which must not indicate a failure before the function call.
+     * @stable ICU 3.8
+     */
+    static DateTimePatternGenerator* U_EXPORT2 createInstance(const Locale& uLocale, UErrorCode& status);
+
+    /**
+     * Create an empty generator, to be constructed with addPattern(...) etc.
+     * @param status  Output param set to success/failure code on exit,
+     *               which must not indicate a failure before the function call.
+     * @stable ICU 3.8
+     */
+     static DateTimePatternGenerator* U_EXPORT2 createEmptyInstance(UErrorCode& status);
+     
+    /**
+     * Destructor.
+     * @stable ICU 3.8
+     */
+    virtual ~DateTimePatternGenerator();
+
+    /**
+     * Clone DateTimePatternGenerator object. Clients are responsible for 
+     * deleting the DateTimePatternGenerator object cloned.
+     * @stable ICU 3.8
+     */
+    DateTimePatternGenerator* clone() const;
+
+     /**
+      * Return true if another object is semantically equal to this one.
+      *
+      * @param other    the DateTimePatternGenerator object to be compared with.
+      * @return         true if other is semantically equal to this.
+      * @stable ICU 3.8
+      */
+    UBool operator==(const DateTimePatternGenerator& other) const;
+    
+    /**
+     * Return true if another object is semantically unequal to this one.
+     *
+     * @param other    the DateTimePatternGenerator object to be compared with.
+     * @return         true if other is semantically unequal to this.
+     * @stable ICU 3.8
+     */
+    UBool operator!=(const DateTimePatternGenerator& other) const;
+
+    /**
+     * Utility to return a unique skeleton from a given pattern. For example,
+     * both "MMM-dd" and "dd/MMM" produce the skeleton "MMMdd".
+     *
+     * @param pattern   Input pattern, such as "dd/MMM"
+     * @param status  Output param set to success/failure code on exit,
+     *                  which must not indicate a failure before the function call.
+     * @return skeleton such as "MMMdd"
+     * @stable ICU 3.8
+     */
+    UnicodeString getSkeleton(const UnicodeString& pattern, UErrorCode& status);
+
+    /**
+     * Utility to return a unique base skeleton from a given pattern. This is
+     * the same as the skeleton, except that differences in length are minimized
+     * so as to only preserve the difference between string and numeric form. So
+     * for example, both "MMM-dd" and "d/MMM" produce the skeleton "MMMd"
+     * (notice the single d).
+     *
+     * @param pattern  Input pattern, such as "dd/MMM"
+     * @param status  Output param set to success/failure code on exit,
+     *               which must not indicate a failure before the function call.
+     * @return base skeleton, such as "Md"
+     * @stable ICU 3.8
+     */
+    UnicodeString getBaseSkeleton(const UnicodeString& pattern, UErrorCode& status);
+
+    /**
+     * Adds a pattern to the generator. If the pattern has the same skeleton as
+     * an existing pattern, and the override parameter is set, then the previous
+     * value is overriden. Otherwise, the previous value is retained. In either
+     * case, the conflicting status is set and previous vale is stored in 
+     * conflicting pattern.
+     * <p>
+     * Note that single-field patterns (like "MMM") are automatically added, and
+     * don't need to be added explicitly!
+     *
+     * @param pattern   Input pattern, such as "dd/MMM"
+     * @param override  When existing values are to be overridden use true, 
+     *                   otherwise use false.
+     * @param conflictingPattern  Previous pattern with the same skeleton.
+     * @param status  Output param set to success/failure code on exit,
+     *               which must not indicate a failure before the function call.
+     * @return conflicting status.  The value could be UDATPG_NO_CONFLICT, 
+     *                             UDATPG_BASE_CONFLICT or UDATPG_CONFLICT.
+     * @stable ICU 3.8
+     */
+    UDateTimePatternConflict addPattern(const UnicodeString& pattern, 
+                                        UBool override, 
+                                        UnicodeString& conflictingPattern,
+                                        UErrorCode& status);
+
+    /**
+     * An AppendItem format is a pattern used to append a field if there is no
+     * good match. For example, suppose that the input skeleton is "GyyyyMMMd",
+     * and there is no matching pattern internally, but there is a pattern
+     * matching "yyyyMMMd", say "d-MM-yyyy". Then that pattern is used, plus the
+     * G. The way these two are conjoined is by using the AppendItemFormat for G
+     * (era). So if that value is, say "{0}, {1}" then the final resulting
+     * pattern is "d-MM-yyyy, G".
+     * <p>
+     * There are actually three available variables: {0} is the pattern so far,
+     * {1} is the element we are adding, and {2} is the name of the element.
+     * <p>
+     * This reflects the way that the CLDR data is organized.
+     *
+     * @param field  such as UDATPG_ERA_FIELD.
+     * @param value  pattern, such as "{0}, {1}"
+     * @stable ICU 3.8
+     */
+    void setAppendItemFormat(UDateTimePatternField field, const UnicodeString& value);
+
+    /**
+     * Getter corresponding to setAppendItemFormat. Values below 0 or at or
+     * above UDATPG_FIELD_COUNT are illegal arguments.
+     *
+     * @param  field  such as UDATPG_ERA_FIELD.
+     * @return append pattern for field
+     * @stable ICU 3.8
+     */
+    const UnicodeString& getAppendItemFormat(UDateTimePatternField field) const;
+
+    /**
+     * Sets the names of field, eg "era" in English for ERA. These are only
+     * used if the corresponding AppendItemFormat is used, and if it contains a
+     * {2} variable.
+     * <p>
+     * This reflects the way that the CLDR data is organized.
+     *
+     * @param field   such as UDATPG_ERA_FIELD.
+     * @param value   name of the field
+     * @stable ICU 3.8
+     */
+    void setAppendItemName(UDateTimePatternField field, const UnicodeString& value);
+
+    /**
+     * Getter corresponding to setAppendItemNames. Values below 0 or at or above
+     * UDATPG_FIELD_COUNT are illegal arguments.
+     *
+     * @param field  such as UDATPG_ERA_FIELD.
+     * @return name for field
+     * @stable ICU 3.8
+     */
+    const UnicodeString& getAppendItemName(UDateTimePatternField field) const;
+
+    /**
+     * The date time format is a message format pattern used to compose date and
+     * time patterns. The default value is "{0} {1}", where {0} will be replaced
+     * by the date pattern and {1} will be replaced by the time pattern.
+     * <p>
+     * This is used when the input skeleton contains both date and time fields,
+     * but there is not a close match among the added patterns. For example,
+     * suppose that this object was created by adding "dd-MMM" and "hh:mm", and
+     * its datetimeFormat is the default "{0} {1}". Then if the input skeleton
+     * is "MMMdhmm", there is not an exact match, so the input skeleton is
+     * broken up into two components "MMMd" and "hmm". There are close matches
+     * for those two skeletons, so the result is put together with this pattern,
+     * resulting in "d-MMM h:mm".
+     *
+     * @param dateTimeFormat
+     *            message format pattern, here {0} will be replaced by the date
+     *            pattern and {1} will be replaced by the time pattern.
+     * @stable ICU 3.8
+     */
+    void setDateTimeFormat(const UnicodeString& dateTimeFormat);
+
+    /**
+     * Getter corresponding to setDateTimeFormat.
+     * @return DateTimeFormat.
+     * @stable ICU 3.8
+     */
+    const UnicodeString& getDateTimeFormat() const;
+
+    /**
+     * Return the best pattern matching the input skeleton. It is guaranteed to
+     * have all of the fields in the skeleton.
+     *
+     * @param skeleton
+     *            The skeleton is a pattern containing only the variable fields.
+     *            For example, "MMMdd" and "mmhh" are skeletons.
+     * @param status  Output param set to success/failure code on exit,
+     *               which must not indicate a failure before the function call.
+     * @return bestPattern
+     *            The best pattern found from the given skeleton.
+     * @stable ICU 3.8
+     */
+     UnicodeString getBestPattern(const UnicodeString& skeleton, UErrorCode& status);
+
+
+    /**
+     * Adjusts the field types (width and subtype) of a pattern to match what is
+     * in a skeleton. That is, if you supply a pattern like "d-M H:m", and a
+     * skeleton of "MMMMddhhmm", then the input pattern is adjusted to be
+     * "dd-MMMM hh:mm". This is used internally to get the best match for the
+     * input skeleton, but can also be used externally.
+     *
+     * @param pattern Input pattern
+     * @param skeleton
+     *            The skeleton is a pattern containing only the variable fields.
+     *            For example, "MMMdd" and "mmhh" are skeletons.
+     * @param status  Output param set to success/failure code on exit,
+     *               which must not indicate a failure before the function call.
+     * @return pattern adjusted to match the skeleton fields widths and subtypes.
+     * @stable ICU 3.8
+     */
+     UnicodeString replaceFieldTypes(const UnicodeString& pattern, 
+                                     const UnicodeString& skeleton, 
+                                     UErrorCode& status);
+
+    /**
+     * Return a list of all the skeletons (in canonical form) from this class.
+     *
+     * Call getPatternForSkeleton() to get the corresponding pattern.
+     *
+     * @param status  Output param set to success/failure code on exit,
+     *               which must not indicate a failure before the function call.
+     * @return StringEnumeration with the skeletons.
+     *         The caller must delete the object.
+     * @stable ICU 3.8
+     */
+     StringEnumeration* getSkeletons(UErrorCode& status) const;
+
+     /**
+      * Get the pattern corresponding to a given skeleton.
+      * @param skeleton 
+      * @return pattern corresponding to a given skeleton.
+      * @stable ICU 3.8
+      */
+     const UnicodeString& getPatternForSkeleton(const UnicodeString& skeleton) const;
+     
+    /**
+     * Return a list of all the base skeletons (in canonical form) from this class.
+     *
+     * @param status  Output param set to success/failure code on exit,
+     *               which must not indicate a failure before the function call.
+     * @return a StringEnumeration with the base skeletons.
+     *         The caller must delete the object.
+     * @stable ICU 3.8
+     */
+     StringEnumeration* getBaseSkeletons(UErrorCode& status) const;
+     
+     /**
+      * Return a list of redundant patterns are those which if removed, make no 
+      * difference in the resulting getBestPattern values. This method returns a 
+      * list of them, to help check the consistency of the patterns used to build 
+      * this generator.
+      * 
+      * @param status  Output param set to success/failure code on exit,
+      *               which must not indicate a failure before the function call.
+      * @return a StringEnumeration with the redundant pattern.
+      *         The caller must delete the object.
+      * @internal ICU 3.8
+      */
+     StringEnumeration* getRedundants(UErrorCode& status);
+      
+    /**
+     * The decimal value is used in formatting fractions of seconds. If the
+     * skeleton contains fractional seconds, then this is used with the
+     * fractional seconds. For example, suppose that the input pattern is
+     * "hhmmssSSSS", and the best matching pattern internally is "H:mm:ss", and
+     * the decimal string is ",". Then the resulting pattern is modified to be
+     * "H:mm:ss,SSSS"
+     *
+     * @param decimal 
+     * @stable ICU 3.8
+     */
+    void setDecimal(const UnicodeString& decimal);
+
+    /**
+     * Getter corresponding to setDecimal.
+     * @return UnicodeString corresponding to the decimal point
+     * @stable ICU 3.8
+     */
+    const UnicodeString& getDecimal() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 3.8
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 3.8
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+private:
+    /**
+     * Constructor.
+     * @stable ICU 3.8
+     */
+    DateTimePatternGenerator(UErrorCode & status);
+
+    /**
+     * Constructor.
+     * @stable ICU 3.8
+     */
+    DateTimePatternGenerator(const Locale& locale, UErrorCode & status);
+
+    /**
+     * Copy constructor.
+     * @param other DateTimePatternGenerator to copy
+     * @stable ICU 3.8
+     */
+    DateTimePatternGenerator(const DateTimePatternGenerator& other);
+
+    /**
+     * Default assignment operator.
+     * @param other DateTimePatternGenerator to copy
+     * @stable ICU 3.8
+     */
+    DateTimePatternGenerator& operator=(const DateTimePatternGenerator& other);
+
+    Locale pLocale;  // pattern locale
+    FormatParser *fp;
+    DateTimeMatcher* dtMatcher;
+    DistanceInfo *distanceInfo;
+    PatternMap *patternMap;
+    UnicodeString appendItemFormats[UDATPG_FIELD_COUNT];
+    UnicodeString appendItemNames[UDATPG_FIELD_COUNT];
+    UnicodeString dateTimeFormat;
+    UnicodeString decimal;
+    DateTimeMatcher *skipMatcher;
+    Hashtable *fAvailableFormatKeyHash;
+    UnicodeString hackPattern;
+    UnicodeString emptyString;
+    UChar fDefaultHourFormatChar;
+
+    void initData(const Locale &locale, UErrorCode &status);
+    void addCanonicalItems();
+    void addICUPatterns(const Locale& locale, UErrorCode& status);
+    void hackTimes(const UnicodeString& hackPattern, UErrorCode& status);
+    void addCLDRData(const Locale& locale);
+    UDateTimePatternConflict addPatternWithSkeleton(const UnicodeString& pattern, const UnicodeString * skeletonToUse, UBool override, UnicodeString& conflictingPattern, UErrorCode& status);
+    void initHashtable(UErrorCode& status);
+    void setDateTimeFromCalendar(const Locale& locale, UErrorCode& status);
+    void setDecimalSymbols(const Locale& locale, UErrorCode& status);
+    UDateTimePatternField getAppendFormatNumber(const char* field) const;
+    UDateTimePatternField getAppendNameNumber(const char* field) const;
+    void getAppendName(UDateTimePatternField field, UnicodeString& value);
+    int32_t getCanonicalIndex(const UnicodeString& field);
+    const UnicodeString* getBestRaw(DateTimeMatcher& source, int32_t includeMask, DistanceInfo* missingFields, const PtnSkeleton** specifiedSkeletonPtr = 0);
+    UnicodeString adjustFieldTypes(const UnicodeString& pattern, const PtnSkeleton* specifiedSkeleton, UBool fixFractionalSeconds);
+    UnicodeString getBestAppending(int32_t missingFields);
+    int32_t getTopBitNumber(int32_t foundMask);
+    void setAvailableFormat(const UnicodeString &key, UErrorCode& status);
+    UBool isAvailableFormatSet(const UnicodeString &key) const;
+    void copyHashtable(Hashtable *other, UErrorCode &status);
+    UBool isCanonicalItem(const UnicodeString& item) const;
+} ;// end class DateTimePatternGenerator
+
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/dtrule.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/dtrule.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/dtrule.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,250 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 2007-2008, International Business Machines Corporation and         *
-* others. All Rights Reserved.                                                *
-*******************************************************************************
-*/
-#ifndef DTRULE_H
-#define DTRULE_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Rule for specifying date and time in an year
- */
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/uobject.h"
-
-U_NAMESPACE_BEGIN
-/**
- * <code>DateTimeRule</code> is a class representing a time in a year by
- * a rule specified by month, day of month, day of week and
- * time in the day.
- * 
- * @stable ICU 4.0
- */
-class U_I18N_API DateTimeRule : public UObject {
-public:
-
-    /**
-     * Date rule type constants.
-     * @stable ICU 4.0
-     */
-    enum DateRuleType {
-        DOM = 0,        /**< The exact day of month,
-                             for example, March 11. */
-        DOW,            /**< The Nth occurence of the day of week,
-                             for example, 2nd Sunday in March. */
-        DOW_GEQ_DOM,    /**< The first occurence of the day of week on or after the day of monnth,
-                             for example, first Sunday on or after March 8. */
-        DOW_LEQ_DOM     /**< The last occurence of the day of week on or before the day of month,
-                             for example, first Sunday on or before March 14. */
-    };
-
-    /**
-     * Time rule type constants.
-     * @stable ICU 4.0
-     */
-    enum TimeRuleType {
-        WALL_TIME = 0,  /**< The local wall clock time */
-        STANDARD_TIME,  /**< The local standard time */
-        UTC_TIME        /**< The UTC time */
-    };
-
-    /**
-     * Constructs a <code>DateTimeRule</code> by the day of month and
-     * the time rule.  The date rule type for an instance created by
-     * this constructor is <code>DOM</code>.
-     * 
-     * @param month         The rule month, for example, <code>Calendar::JANUARY</code>
-     * @param dayOfMonth    The day of month, 1-based.
-     * @param millisInDay   The milliseconds in the rule date.
-     * @param timeType      The time type, <code>WALL_TIME</code> or <code>STANDARD_TIME</code>
-     *                      or <code>UTC_TIME</code>.
-     * @stable ICU 4.0
-     */
-    DateTimeRule(int32_t month, int32_t dayOfMonth,
-        int32_t millisInDay, TimeRuleType timeType);
-
-    /**
-     * Constructs a <code>DateTimeRule</code> by the day of week and its oridinal
-     * number and the time rule.  The date rule type for an instance created
-     * by this constructor is <code>DOW</code>.
-     * 
-     * @param month         The rule month, for example, <code>Calendar::JANUARY</code>.
-     * @param weekInMonth   The ordinal number of the day of week.  Negative number
-     *                      may be used for specifying a rule date counted from the
-     *                      end of the rule month.
-     * @param dayOfWeek     The day of week, for example, <code>Calendar::SUNDAY</code>.
-     * @param millisInDay   The milliseconds in the rule date.
-     * @param timeType      The time type, <code>WALL_TIME</code> or <code>STANDARD_TIME</code>
-     *                      or <code>UTC_TIME</code>.
-     * @stable ICU 4.0
-     */
-    DateTimeRule(int32_t month, int32_t weekInMonth, int32_t dayOfWeek,
-        int32_t millisInDay, TimeRuleType timeType);
-
-    /**
-     * Constructs a <code>DateTimeRule</code> by the first/last day of week
-     * on or after/before the day of month and the time rule.  The date rule
-     * type for an instance created by this constructor is either
-     * <code>DOM_GEQ_DOM</code> or <code>DOM_LEQ_DOM</code>.
-     * 
-     * @param month         The rule month, for example, <code>Calendar::JANUARY</code>
-     * @param dayOfMonth    The day of month, 1-based.
-     * @param dayOfWeek     The day of week, for example, <code>Calendar::SUNDAY</code>.
-     * @param after         true if the rule date is on or after the day of month.
-     * @param millisInDay   The milliseconds in the rule date.
-     * @param timeType      The time type, <code>WALL_TIME</code> or <code>STANDARD_TIME</code>
-     *                      or <code>UTC_TIME</code>.
-     * @stable ICU 4.0
-     */
-    DateTimeRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek, UBool after,
-        int32_t millisInDay, TimeRuleType timeType);
-
-    /**
-     * Copy constructor.
-     * @param source    The DateTimeRule object to be copied.
-     * @stable ICU 4.0
-     */
-    DateTimeRule(const DateTimeRule& source);
-
-    /**
-     * Destructor.
-     * @stable ICU 4.0
-     */
-    ~DateTimeRule();
-
-    /**
-     * Clone this DateTimeRule object polymorphically. The caller owns the result and
-     * should delete it when done.
-     * @return    A copy of the object.
-     * @stable ICU 4.0
-     */
-    DateTimeRule* clone(void) const;
-
-    /**
-     * Assignment operator.
-     * @param right The object to be copied.
-     * @stable ICU 4.0
-     */
-    DateTimeRule& operator=(const DateTimeRule& right);
-
-    /**
-     * Return true if the given DateTimeRule objects are semantically equal. Objects
-     * of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given DateTimeRule objects are semantically equal.
-     * @stable ICU 4.0
-     */
-    UBool operator==(const DateTimeRule& that) const;
-
-    /**
-     * Return true if the given DateTimeRule objects are semantically unequal. Objects
-     * of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given DateTimeRule objects are semantically unequal.
-     * @stable ICU 4.0
-     */
-    UBool operator!=(const DateTimeRule& that) const;
-
-    /**
-     * Gets the date rule type, such as <code>DOM</code>
-     * @return The date rule type.
-     * @stable ICU 4.0
-     */
-    DateRuleType getDateRuleType(void) const;
-
-    /**
-     * Gets the time rule type
-     * @return The time rule type, either <code>WALL_TIME</code> or <code>STANDARD_TIME</code>
-     *         or <code>UTC_TIME</code>.
-     * @stable ICU 4.0
-     */
-    TimeRuleType getTimeRuleType(void) const;
-
-    /**
-     * Gets the rule month.
-     * @return The rule month.
-     * @stable ICU 4.0
-     */
-    int32_t getRuleMonth(void) const;
-
-    /**
-     * Gets the rule day of month.  When the date rule type
-     * is <code>DOW</code>, the value is always 0.
-     * @return The rule day of month
-     * @stable ICU 4.0
-     */
-    int32_t getRuleDayOfMonth(void) const;
-
-    /**
-     * Gets the rule day of week.  When the date rule type
-     * is <code>DOM</code>, the value is always 0.
-     * @return The rule day of week.
-     * @stable ICU 4.0
-     */
-    int32_t getRuleDayOfWeek(void) const;
-
-    /**
-     * Gets the ordinal number of the occurence of the day of week
-     * in the month.  When the date rule type is not <code>DOW</code>,
-     * the value is always 0.
-     * @return The rule day of week ordinal number in the month.
-     * @stable ICU 4.0
-     */
-    int32_t getRuleWeekInMonth(void) const;
-
-    /**
-     * Gets the rule time in the rule day.
-     * @return The time in the rule day in milliseconds.
-     * @stable ICU 4.0
-     */
-    int32_t getRuleMillisInDay(void) const;
-
-private:
-    int32_t fMonth;
-    int32_t fDayOfMonth;
-    int32_t fDayOfWeek;
-    int32_t fWeekInMonth;
-    int32_t fMillisInDay;
-    DateRuleType fDateRuleType;
-    TimeRuleType fTimeRuleType;
-
-public:
-    /**
-     * Return the class ID for this class. This is useful only for comparing to
-     * a return value from getDynamicClassID(). For example:
-     * <pre>
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       erived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 4.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
-     * method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone()
-     * methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 4.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // DTRULE_H
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/dtrule.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/dtrule.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/dtrule.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/dtrule.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,250 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2008, International Business Machines Corporation and         *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*/
+#ifndef DTRULE_H
+#define DTRULE_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Rule for specifying date and time in an year
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+/**
+ * <code>DateTimeRule</code> is a class representing a time in a year by
+ * a rule specified by month, day of month, day of week and
+ * time in the day.
+ * 
+ * @stable ICU 4.0
+ */
+class U_I18N_API DateTimeRule : public UObject {
+public:
+
+    /**
+     * Date rule type constants.
+     * @stable ICU 4.0
+     */
+    enum DateRuleType {
+        DOM = 0,        /**< The exact day of month,
+                             for example, March 11. */
+        DOW,            /**< The Nth occurence of the day of week,
+                             for example, 2nd Sunday in March. */
+        DOW_GEQ_DOM,    /**< The first occurence of the day of week on or after the day of monnth,
+                             for example, first Sunday on or after March 8. */
+        DOW_LEQ_DOM     /**< The last occurence of the day of week on or before the day of month,
+                             for example, first Sunday on or before March 14. */
+    };
+
+    /**
+     * Time rule type constants.
+     * @stable ICU 4.0
+     */
+    enum TimeRuleType {
+        WALL_TIME = 0,  /**< The local wall clock time */
+        STANDARD_TIME,  /**< The local standard time */
+        UTC_TIME        /**< The UTC time */
+    };
+
+    /**
+     * Constructs a <code>DateTimeRule</code> by the day of month and
+     * the time rule.  The date rule type for an instance created by
+     * this constructor is <code>DOM</code>.
+     * 
+     * @param month         The rule month, for example, <code>Calendar::JANUARY</code>
+     * @param dayOfMonth    The day of month, 1-based.
+     * @param millisInDay   The milliseconds in the rule date.
+     * @param timeType      The time type, <code>WALL_TIME</code> or <code>STANDARD_TIME</code>
+     *                      or <code>UTC_TIME</code>.
+     * @stable ICU 4.0
+     */
+    DateTimeRule(int32_t month, int32_t dayOfMonth,
+        int32_t millisInDay, TimeRuleType timeType);
+
+    /**
+     * Constructs a <code>DateTimeRule</code> by the day of week and its oridinal
+     * number and the time rule.  The date rule type for an instance created
+     * by this constructor is <code>DOW</code>.
+     * 
+     * @param month         The rule month, for example, <code>Calendar::JANUARY</code>.
+     * @param weekInMonth   The ordinal number of the day of week.  Negative number
+     *                      may be used for specifying a rule date counted from the
+     *                      end of the rule month.
+     * @param dayOfWeek     The day of week, for example, <code>Calendar::SUNDAY</code>.
+     * @param millisInDay   The milliseconds in the rule date.
+     * @param timeType      The time type, <code>WALL_TIME</code> or <code>STANDARD_TIME</code>
+     *                      or <code>UTC_TIME</code>.
+     * @stable ICU 4.0
+     */
+    DateTimeRule(int32_t month, int32_t weekInMonth, int32_t dayOfWeek,
+        int32_t millisInDay, TimeRuleType timeType);
+
+    /**
+     * Constructs a <code>DateTimeRule</code> by the first/last day of week
+     * on or after/before the day of month and the time rule.  The date rule
+     * type for an instance created by this constructor is either
+     * <code>DOM_GEQ_DOM</code> or <code>DOM_LEQ_DOM</code>.
+     * 
+     * @param month         The rule month, for example, <code>Calendar::JANUARY</code>
+     * @param dayOfMonth    The day of month, 1-based.
+     * @param dayOfWeek     The day of week, for example, <code>Calendar::SUNDAY</code>.
+     * @param after         true if the rule date is on or after the day of month.
+     * @param millisInDay   The milliseconds in the rule date.
+     * @param timeType      The time type, <code>WALL_TIME</code> or <code>STANDARD_TIME</code>
+     *                      or <code>UTC_TIME</code>.
+     * @stable ICU 4.0
+     */
+    DateTimeRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek, UBool after,
+        int32_t millisInDay, TimeRuleType timeType);
+
+    /**
+     * Copy constructor.
+     * @param source    The DateTimeRule object to be copied.
+     * @stable ICU 4.0
+     */
+    DateTimeRule(const DateTimeRule& source);
+
+    /**
+     * Destructor.
+     * @stable ICU 4.0
+     */
+    ~DateTimeRule();
+
+    /**
+     * Clone this DateTimeRule object polymorphically. The caller owns the result and
+     * should delete it when done.
+     * @return    A copy of the object.
+     * @stable ICU 4.0
+     */
+    DateTimeRule* clone(void) const;
+
+    /**
+     * Assignment operator.
+     * @param right The object to be copied.
+     * @stable ICU 4.0
+     */
+    DateTimeRule& operator=(const DateTimeRule& right);
+
+    /**
+     * Return true if the given DateTimeRule objects are semantically equal. Objects
+     * of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given DateTimeRule objects are semantically equal.
+     * @stable ICU 4.0
+     */
+    UBool operator==(const DateTimeRule& that) const;
+
+    /**
+     * Return true if the given DateTimeRule objects are semantically unequal. Objects
+     * of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given DateTimeRule objects are semantically unequal.
+     * @stable ICU 4.0
+     */
+    UBool operator!=(const DateTimeRule& that) const;
+
+    /**
+     * Gets the date rule type, such as <code>DOM</code>
+     * @return The date rule type.
+     * @stable ICU 4.0
+     */
+    DateRuleType getDateRuleType(void) const;
+
+    /**
+     * Gets the time rule type
+     * @return The time rule type, either <code>WALL_TIME</code> or <code>STANDARD_TIME</code>
+     *         or <code>UTC_TIME</code>.
+     * @stable ICU 4.0
+     */
+    TimeRuleType getTimeRuleType(void) const;
+
+    /**
+     * Gets the rule month.
+     * @return The rule month.
+     * @stable ICU 4.0
+     */
+    int32_t getRuleMonth(void) const;
+
+    /**
+     * Gets the rule day of month.  When the date rule type
+     * is <code>DOW</code>, the value is always 0.
+     * @return The rule day of month
+     * @stable ICU 4.0
+     */
+    int32_t getRuleDayOfMonth(void) const;
+
+    /**
+     * Gets the rule day of week.  When the date rule type
+     * is <code>DOM</code>, the value is always 0.
+     * @return The rule day of week.
+     * @stable ICU 4.0
+     */
+    int32_t getRuleDayOfWeek(void) const;
+
+    /**
+     * Gets the ordinal number of the occurence of the day of week
+     * in the month.  When the date rule type is not <code>DOW</code>,
+     * the value is always 0.
+     * @return The rule day of week ordinal number in the month.
+     * @stable ICU 4.0
+     */
+    int32_t getRuleWeekInMonth(void) const;
+
+    /**
+     * Gets the rule time in the rule day.
+     * @return The time in the rule day in milliseconds.
+     * @stable ICU 4.0
+     */
+    int32_t getRuleMillisInDay(void) const;
+
+private:
+    int32_t fMonth;
+    int32_t fDayOfMonth;
+    int32_t fDayOfWeek;
+    int32_t fWeekInMonth;
+    int32_t fMillisInDay;
+    DateRuleType fDateRuleType;
+    TimeRuleType fTimeRuleType;
+
+public:
+    /**
+     * Return the class ID for this class. This is useful only for comparing to
+     * a return value from getDynamicClassID(). For example:
+     * <pre>
+     * .   Base* polymorphic_pointer = createPolymorphicObject();
+     * .   if (polymorphic_pointer->getDynamicClassID() ==
+     * .       erived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 4.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+     * method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 4.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // DTRULE_H
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/fieldpos.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/fieldpos.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/fieldpos.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,291 +0,0 @@
-/*
- ********************************************************************************
- *   Copyright (C) 1997-2006, International Business Machines
- *   Corporation and others.  All Rights Reserved.
- ********************************************************************************
- *
- * File FIELDPOS.H
- *
- * Modification History:
- *
- *   Date        Name        Description
- *   02/25/97    aliu        Converted from java.
- *   03/17/97    clhuang     Updated per Format implementation.
- *    07/17/98    stephen        Added default/copy ctors, and operators =, ==, !=
- ********************************************************************************
- */
-
-// *****************************************************************************
-// This file was generated from the java source file FieldPosition.java
-// *****************************************************************************
- 
-#ifndef FIELDPOS_H
-#define FIELDPOS_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: FieldPosition identifies the fields in a formatted output.
- */
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/uobject.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * <code>FieldPosition</code> is a simple class used by <code>Format</code>
- * and its subclasses to identify fields in formatted output. Fields are
- * identified by constants, whose names typically end with <code>_FIELD</code>,
- * defined in the various subclasses of <code>Format</code>. See
- * <code>ERA_FIELD</code> and its friends in <code>DateFormat</code> for
- * an example.
- *
- * <p>
- * <code>FieldPosition</code> keeps track of the position of the
- * field within the formatted output with two indices: the index
- * of the first character of the field and the index of the last
- * character of the field.
- *
- * <p>
- * One version of the <code>format</code> method in the various
- * <code>Format</code> classes requires a <code>FieldPosition</code>
- * object as an argument. You use this <code>format</code> method
- * to perform partial formatting or to get information about the
- * formatted output (such as the position of a field).
- *
- * The FieldPosition class is not suitable for subclassing.
- *
- * <p>
- * Below is an example of using <code>FieldPosition</code> to aid
- * alignment of an array of formatted floating-point numbers on
- * their decimal points:
- * <pre>
- * \code
- *       double doubleNum[] = {123456789.0, -12345678.9, 1234567.89, -123456.789,
- *                  12345.6789, -1234.56789, 123.456789, -12.3456789, 1.23456789};
- *       int dNumSize = (int)(sizeof(doubleNum)/sizeof(double));
- *       
- *       UErrorCode status = U_ZERO_ERROR;
- *       DecimalFormat* fmt = (DecimalFormat*) NumberFormat::createInstance(status);
- *       fmt->setDecimalSeparatorAlwaysShown(true);
- *       
- *       const int tempLen = 20;
- *       char temp[tempLen];
- *       
- *       for (int i=0; i<dNumSize; i++) {
- *           FieldPosition pos(NumberFormat::INTEGER_FIELD);
- *           UnicodeString buf;
- *           char fmtText[tempLen];
- *           ToCharString(fmt->format(doubleNum[i], buf, pos), fmtText);
- *           for (int j=0; j<tempLen; j++) temp[j] = ' '; // clear with spaces
- *           temp[__min(tempLen, tempLen-pos.getEndIndex())] = '\0';
- *           cout << temp << fmtText   << endl;
- *       }
- *       delete fmt;
- * \endcode
- * </pre>
- * <p>
- * The code will generate the following output:
- * <pre>
- * \code
- *           123,456,789.000
- *           -12,345,678.900
- *             1,234,567.880
- *              -123,456.789
- *                12,345.678
- *                -1,234.567
- *                   123.456
- *                   -12.345
- *                     1.234
- *  \endcode
- * </pre>
- */
-class U_I18N_API FieldPosition : public UObject {
-public:
-    /**
-     * DONT_CARE may be specified as the field to indicate that the
-     * caller doesn't need to specify a field.  Do not subclass.
-     */
-    enum { DONT_CARE = -1 };
-
-    /**
-     * Creates a FieldPosition object with a non-specified field.
-     * @stable ICU 2.0
-     */
-    FieldPosition() 
-        : UObject(), fField(DONT_CARE), fBeginIndex(0), fEndIndex(0) {}
-
-    /**
-     * Creates a FieldPosition object for the given field.  Fields are
-     * identified by constants, whose names typically end with _FIELD,
-     * in the various subclasses of Format.
-     *
-     * @see NumberFormat#INTEGER_FIELD
-     * @see NumberFormat#FRACTION_FIELD
-     * @see DateFormat#YEAR_FIELD
-     * @see DateFormat#MONTH_FIELD
-     * @stable ICU 2.0
-     */
-    FieldPosition(int32_t field) 
-        : UObject(), fField(field), fBeginIndex(0), fEndIndex(0) {}
-
-    /**
-     * Copy constructor
-     * @param copy the object to be copied from.
-     * @stable ICU 2.0
-     */
-    FieldPosition(const FieldPosition& copy) 
-        : UObject(copy), fField(copy.fField), fBeginIndex(copy.fBeginIndex), fEndIndex(copy.fEndIndex) {}
-
-    /**
-     * Destructor
-     * @stable ICU 2.0
-     */
-    virtual ~FieldPosition();
-
-    /**
-     * Assignment operator
-     * @param copy the object to be copied from.
-     * @stable ICU 2.0
-     */
-    FieldPosition&      operator=(const FieldPosition& copy);
-
-    /** 
-     * Equality operator.
-     * @param that    the object to be compared with.
-     * @return        TRUE if the two field positions are equal, FALSE otherwise.
-     * @stable ICU 2.0
-     */
-    UBool              operator==(const FieldPosition& that) const;
-
-    /** 
-     * Equality operator.
-     * @param that    the object to be compared with.
-     * @return        TRUE if the two field positions are not equal, FALSE otherwise.
-     * @stable ICU 2.0
-     */
-    UBool              operator!=(const FieldPosition& that) const;
-
-    /**
-     * Clone this object.
-     * Clones can be used concurrently in multiple threads.
-     * If an error occurs, then NULL is returned.
-     * The caller must delete the clone.
-     *
-     * @return a clone of this object
-     *
-     * @see getDynamicClassID
-     * @stable ICU 2.8
-     */
-    FieldPosition *clone() const;
-
-    /**
-     * Retrieve the field identifier.
-     * @return    the field identifier.
-     * @stable ICU 2.0
-     */
-    int32_t getField(void) const { return fField; }
-
-    /**
-     * Retrieve the index of the first character in the requested field.
-     * @return    the index of the first character in the requested field.
-     * @stable ICU 2.0
-     */
-    int32_t getBeginIndex(void) const { return fBeginIndex; }
-
-    /**
-     * Retrieve the index of the character following the last character in the
-     * requested field.
-     * @return    the index of the character following the last character in the
-     *            requested field.
-     * @stable ICU 2.0
-     */
-    int32_t getEndIndex(void) const { return fEndIndex; }
- 
-    /**
-     * Set the field.
-     * @param f    the new value of the field.
-     * @stable ICU 2.0
-     */
-    void setField(int32_t f) { fField = f; }
-
-    /**
-     * Set the begin index.  For use by subclasses of Format.
-     * @param bi    the new value of the begin index
-     * @stable ICU 2.0
-     */
-    void setBeginIndex(int32_t bi) { fBeginIndex = bi; }
-
-    /**
-     * Set the end index.  For use by subclasses of Format.
-     * @param ei    the new value of the end index
-     * @stable ICU 2.0
-     */
-    void setEndIndex(int32_t ei) { fEndIndex = ei; }
-    
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.2
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.2
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-private:
-    /**
-     * Input: Desired field to determine start and end offsets for.
-     * The meaning depends on the subclass of Format.
-     */
-    int32_t fField;
-
-    /**
-     * Output: Start offset of field in text.
-     * If the field does not occur in the text, 0 is returned.
-     */
-    int32_t fBeginIndex;
-
-    /**
-     * Output: End offset of field in text.
-     * If the field does not occur in the text, 0 is returned.
-     */
-    int32_t fEndIndex;
-};
-
-inline FieldPosition&
-FieldPosition::operator=(const FieldPosition& copy)
-{
-    fField         = copy.fField;
-    fEndIndex     = copy.fEndIndex;
-    fBeginIndex = copy.fBeginIndex;
-    return *this;
-}
-
-inline UBool
-FieldPosition::operator==(const FieldPosition& copy) const
-{
-    return (fField == copy.fField &&
-        fEndIndex == copy.fEndIndex &&
-        fBeginIndex == copy.fBeginIndex);
-}
-
-inline UBool
-FieldPosition::operator!=(const FieldPosition& copy) const
-{
-    return !operator==(copy);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _FIELDPOS
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/fieldpos.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/fieldpos.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/fieldpos.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/fieldpos.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,291 @@
+/*
+ ********************************************************************************
+ *   Copyright (C) 1997-2006, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ ********************************************************************************
+ *
+ * File FIELDPOS.H
+ *
+ * Modification History:
+ *
+ *   Date        Name        Description
+ *   02/25/97    aliu        Converted from java.
+ *   03/17/97    clhuang     Updated per Format implementation.
+ *    07/17/98    stephen        Added default/copy ctors, and operators =, ==, !=
+ ********************************************************************************
+ */
+
+// *****************************************************************************
+// This file was generated from the java source file FieldPosition.java
+// *****************************************************************************
+ 
+#ifndef FIELDPOS_H
+#define FIELDPOS_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: FieldPosition identifies the fields in a formatted output.
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * <code>FieldPosition</code> is a simple class used by <code>Format</code>
+ * and its subclasses to identify fields in formatted output. Fields are
+ * identified by constants, whose names typically end with <code>_FIELD</code>,
+ * defined in the various subclasses of <code>Format</code>. See
+ * <code>ERA_FIELD</code> and its friends in <code>DateFormat</code> for
+ * an example.
+ *
+ * <p>
+ * <code>FieldPosition</code> keeps track of the position of the
+ * field within the formatted output with two indices: the index
+ * of the first character of the field and the index of the last
+ * character of the field.
+ *
+ * <p>
+ * One version of the <code>format</code> method in the various
+ * <code>Format</code> classes requires a <code>FieldPosition</code>
+ * object as an argument. You use this <code>format</code> method
+ * to perform partial formatting or to get information about the
+ * formatted output (such as the position of a field).
+ *
+ * The FieldPosition class is not suitable for subclassing.
+ *
+ * <p>
+ * Below is an example of using <code>FieldPosition</code> to aid
+ * alignment of an array of formatted floating-point numbers on
+ * their decimal points:
+ * <pre>
+ * \code
+ *       double doubleNum[] = {123456789.0, -12345678.9, 1234567.89, -123456.789,
+ *                  12345.6789, -1234.56789, 123.456789, -12.3456789, 1.23456789};
+ *       int dNumSize = (int)(sizeof(doubleNum)/sizeof(double));
+ *       
+ *       UErrorCode status = U_ZERO_ERROR;
+ *       DecimalFormat* fmt = (DecimalFormat*) NumberFormat::createInstance(status);
+ *       fmt->setDecimalSeparatorAlwaysShown(true);
+ *       
+ *       const int tempLen = 20;
+ *       char temp[tempLen];
+ *       
+ *       for (int i=0; i<dNumSize; i++) {
+ *           FieldPosition pos(NumberFormat::INTEGER_FIELD);
+ *           UnicodeString buf;
+ *           char fmtText[tempLen];
+ *           ToCharString(fmt->format(doubleNum[i], buf, pos), fmtText);
+ *           for (int j=0; j<tempLen; j++) temp[j] = ' '; // clear with spaces
+ *           temp[__min(tempLen, tempLen-pos.getEndIndex())] = '\0';
+ *           cout << temp << fmtText   << endl;
+ *       }
+ *       delete fmt;
+ * \endcode
+ * </pre>
+ * <p>
+ * The code will generate the following output:
+ * <pre>
+ * \code
+ *           123,456,789.000
+ *           -12,345,678.900
+ *             1,234,567.880
+ *              -123,456.789
+ *                12,345.678
+ *                -1,234.567
+ *                   123.456
+ *                   -12.345
+ *                     1.234
+ *  \endcode
+ * </pre>
+ */
+class U_I18N_API FieldPosition : public UObject {
+public:
+    /**
+     * DONT_CARE may be specified as the field to indicate that the
+     * caller doesn't need to specify a field.  Do not subclass.
+     */
+    enum { DONT_CARE = -1 };
+
+    /**
+     * Creates a FieldPosition object with a non-specified field.
+     * @stable ICU 2.0
+     */
+    FieldPosition() 
+        : UObject(), fField(DONT_CARE), fBeginIndex(0), fEndIndex(0) {}
+
+    /**
+     * Creates a FieldPosition object for the given field.  Fields are
+     * identified by constants, whose names typically end with _FIELD,
+     * in the various subclasses of Format.
+     *
+     * @see NumberFormat#INTEGER_FIELD
+     * @see NumberFormat#FRACTION_FIELD
+     * @see DateFormat#YEAR_FIELD
+     * @see DateFormat#MONTH_FIELD
+     * @stable ICU 2.0
+     */
+    FieldPosition(int32_t field) 
+        : UObject(), fField(field), fBeginIndex(0), fEndIndex(0) {}
+
+    /**
+     * Copy constructor
+     * @param copy the object to be copied from.
+     * @stable ICU 2.0
+     */
+    FieldPosition(const FieldPosition& copy) 
+        : UObject(copy), fField(copy.fField), fBeginIndex(copy.fBeginIndex), fEndIndex(copy.fEndIndex) {}
+
+    /**
+     * Destructor
+     * @stable ICU 2.0
+     */
+    virtual ~FieldPosition();
+
+    /**
+     * Assignment operator
+     * @param copy the object to be copied from.
+     * @stable ICU 2.0
+     */
+    FieldPosition&      operator=(const FieldPosition& copy);
+
+    /** 
+     * Equality operator.
+     * @param that    the object to be compared with.
+     * @return        TRUE if the two field positions are equal, FALSE otherwise.
+     * @stable ICU 2.0
+     */
+    UBool              operator==(const FieldPosition& that) const;
+
+    /** 
+     * Equality operator.
+     * @param that    the object to be compared with.
+     * @return        TRUE if the two field positions are not equal, FALSE otherwise.
+     * @stable ICU 2.0
+     */
+    UBool              operator!=(const FieldPosition& that) const;
+
+    /**
+     * Clone this object.
+     * Clones can be used concurrently in multiple threads.
+     * If an error occurs, then NULL is returned.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @stable ICU 2.8
+     */
+    FieldPosition *clone() const;
+
+    /**
+     * Retrieve the field identifier.
+     * @return    the field identifier.
+     * @stable ICU 2.0
+     */
+    int32_t getField(void) const { return fField; }
+
+    /**
+     * Retrieve the index of the first character in the requested field.
+     * @return    the index of the first character in the requested field.
+     * @stable ICU 2.0
+     */
+    int32_t getBeginIndex(void) const { return fBeginIndex; }
+
+    /**
+     * Retrieve the index of the character following the last character in the
+     * requested field.
+     * @return    the index of the character following the last character in the
+     *            requested field.
+     * @stable ICU 2.0
+     */
+    int32_t getEndIndex(void) const { return fEndIndex; }
+ 
+    /**
+     * Set the field.
+     * @param f    the new value of the field.
+     * @stable ICU 2.0
+     */
+    void setField(int32_t f) { fField = f; }
+
+    /**
+     * Set the begin index.  For use by subclasses of Format.
+     * @param bi    the new value of the begin index
+     * @stable ICU 2.0
+     */
+    void setBeginIndex(int32_t bi) { fBeginIndex = bi; }
+
+    /**
+     * Set the end index.  For use by subclasses of Format.
+     * @param ei    the new value of the end index
+     * @stable ICU 2.0
+     */
+    void setEndIndex(int32_t ei) { fEndIndex = ei; }
+    
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.2
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+    /**
+     * Input: Desired field to determine start and end offsets for.
+     * The meaning depends on the subclass of Format.
+     */
+    int32_t fField;
+
+    /**
+     * Output: Start offset of field in text.
+     * If the field does not occur in the text, 0 is returned.
+     */
+    int32_t fBeginIndex;
+
+    /**
+     * Output: End offset of field in text.
+     * If the field does not occur in the text, 0 is returned.
+     */
+    int32_t fEndIndex;
+};
+
+inline FieldPosition&
+FieldPosition::operator=(const FieldPosition& copy)
+{
+    fField         = copy.fField;
+    fEndIndex     = copy.fEndIndex;
+    fBeginIndex = copy.fBeginIndex;
+    return *this;
+}
+
+inline UBool
+FieldPosition::operator==(const FieldPosition& copy) const
+{
+    return (fField == copy.fField &&
+        fEndIndex == copy.fEndIndex &&
+        fBeginIndex == copy.fBeginIndex);
+}
+
+inline UBool
+FieldPosition::operator!=(const FieldPosition& copy) const
+{
+    return !operator==(copy);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _FIELDPOS
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/fmtable.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/fmtable.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/fmtable.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,591 +0,0 @@
-/*
-********************************************************************************
-*   Copyright (C) 1997-2006, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-********************************************************************************
-*
-* File FMTABLE.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   02/29/97    aliu        Creation.
-********************************************************************************
-*/
-#ifndef FMTABLE_H
-#define FMTABLE_H
-
-#include "unicode/utypes.h"
-#include "unicode/unistr.h"
-/**
- * \file 
- * \brief C++ API: Formattable is a thin wrapper for primitive numeric types.
- */
-
-#if !UCONFIG_NO_FORMATTING
-
-U_NAMESPACE_BEGIN
-
-/**
- * Formattable objects can be passed to the Format class or
- * its subclasses for formatting.  Formattable is a thin wrapper
- * class which interconverts between the primitive numeric types
- * (double, long, etc.) as well as UDate and UnicodeString.
- *
- * <p>Internally, a Formattable object is a union of primitive types.
- * As such, it can only store one flavor of data at a time.  To
- * determine what flavor of data it contains, use the getType method.
- *
- * <p>As of ICU 3.0, Formattable may also wrap a UObject pointer,
- * which it owns.  This allows an instance of any ICU class to be
- * encapsulated in a Formattable.  For legacy reasons and for
- * efficiency, primitive numeric types are still stored directly
- * within a Formattable.
- *
- * <p>The Formattable class is not suitable for subclassing.
- */
-class U_I18N_API Formattable : public UObject {
-public:
-    /**
-     * This enum is only used to let callers distinguish between
-     * the Formattable(UDate) constructor and the Formattable(double)
-     * constructor; the compiler cannot distinguish the signatures,
-     * since UDate is currently typedefed to be either double or long.
-     * If UDate is changed later to be a bonafide class
-     * or struct, then we no longer need this enum.
-     * @stable ICU 2.4
-     */
-    enum ISDATE { kIsDate };
-
-    /**
-     * Default constructor
-     * @stable ICU 2.4
-     */
-    Formattable(); // Type kLong, value 0
-
-    /**
-     * Creates a Formattable object with a UDate instance.
-     * @param d the UDate instance.
-     * @param flag the flag to indicate this is a date. Always set it to kIsDate
-     * @stable ICU 2.0  
-     */
-    Formattable(UDate d, ISDATE flag);
-
-    /**
-     * Creates a Formattable object with a double number.
-     * @param d the double number.
-     * @stable ICU 2.0
-     */
-    Formattable(double d);
-
-    /**
-     * Creates a Formattable object with a long number.
-     * @param l the long number.
-     * @stable ICU 2.0
-     */
-    Formattable(int32_t l);
-
-    /**
-     * Creates a Formattable object with an int64_t number
-     * @param ll the int64_t number.
-     * @stable ICU 2.8
-     */
-    Formattable(int64_t ll);
-
-#if !UCONFIG_NO_CONVERSION
-    /**
-     * Creates a Formattable object with a char string pointer.
-     * Assumes that the char string is null terminated.
-     * @param strToCopy the char string.
-     * @stable ICU 2.0
-     */
-    Formattable(const char* strToCopy);
-#endif
-
-    /**
-     * Creates a Formattable object with a UnicodeString object to copy from.
-     * @param strToCopy the UnicodeString string.
-     * @stable ICU 2.0
-     */
-    Formattable(const UnicodeString& strToCopy);
-
-    /**
-     * Creates a Formattable object with a UnicodeString object to adopt from.
-     * @param strToAdopt the UnicodeString string.
-     * @stable ICU 2.0
-     */
-    Formattable(UnicodeString* strToAdopt);
-
-    /**
-     * Creates a Formattable object with an array of Formattable objects.
-     * @param arrayToCopy the Formattable object array.
-     * @param count the array count.
-     * @stable ICU 2.0
-     */
-    Formattable(const Formattable* arrayToCopy, int32_t count);
-
-    /**
-     * Creates a Formattable object that adopts the given UObject.
-     * @param objectToAdopt the UObject to set this object to
-     * @stable ICU 3.0
-     */
-    Formattable(UObject* objectToAdopt);
-
-    /**
-     * Copy constructor.
-     * @stable ICU 2.0
-     */
-    Formattable(const Formattable&);
-
-    /**
-     * Assignment operator.
-     * @param rhs   The Formattable object to copy into this object.
-     * @stable ICU 2.0
-     */
-    Formattable&    operator=(const Formattable &rhs);
-
-    /**
-     * Equality comparison.
-     * @param other    the object to be compared with.
-     * @return        TRUE if other are equal to this, FALSE otherwise.
-     * @stable ICU 2.0
-     */
-    UBool          operator==(const Formattable &other) const;
-    
-    /** 
-     * Equality operator.
-     * @param other    the object to be compared with.
-     * @return        TRUE if other are unequal to this, FALSE otherwise.
-     * @stable ICU 2.0
-     */
-    UBool          operator!=(const Formattable& other) const
-      { return !operator==(other); }
-
-    /** 
-     * Destructor.
-     * @stable ICU 2.0
-     */
-    virtual         ~Formattable();
-
-    /**
-     * Clone this object.
-     * Clones can be used concurrently in multiple threads.
-     * If an error occurs, then NULL is returned.
-     * The caller must delete the clone.
-     *
-     * @return a clone of this object
-     *
-     * @see getDynamicClassID
-     * @stable ICU 2.8
-     */
-    Formattable *clone() const;
-
-    /** 
-     * Selector for flavor of data type contained within a
-     * Formattable object.  Formattable is a union of several
-     * different types, and at any time contains exactly one type.
-     * @stable ICU 2.4
-     */
-    enum Type {
-        /**
-         * Selector indicating a UDate value.  Use getDate to retrieve
-         * the value.
-         * @stable ICU 2.4
-         */
-        kDate,
-
-        /**
-         * Selector indicating a double value.  Use getDouble to
-         * retrieve the value.
-         * @stable ICU 2.4
-         */
-        kDouble,
-
-        /**
-         * Selector indicating a 32-bit integer value.  Use getLong to
-         * retrieve the value.
-         * @stable ICU 2.4
-         */
-        kLong,
-
-        /**
-         * Selector indicating a UnicodeString value.  Use getString
-         * to retrieve the value.
-         * @stable ICU 2.4
-         */
-        kString,
-
-        /**
-         * Selector indicating an array of Formattables.  Use getArray
-         * to retrieve the value.
-         * @stable ICU 2.4
-         */
-        kArray,
-
-        /**
-         * Selector indicating a 64-bit integer value.  Use getInt64
-         * to retrieve the value.
-         * @stable ICU 2.8
-         */
-        kInt64,
-
-        /**
-         * Selector indicating a UObject value.  Use getObject to
-         * retrieve the value.
-         * @stable ICU 3.0
-         */
-        kObject
-   };
-
-    /**
-     * Gets the data type of this Formattable object.
-     * @return    the data type of this Formattable object.
-     * @stable ICU 2.0
-     */
-    Type            getType(void) const;
-    
-    /**
-     * Returns TRUE if the data type of this Formattable object
-     * is kDouble, kLong, or kInt64.
-     * @return TRUE if this is a pure numeric object
-     * @stable ICU 3.0
-     */
-    UBool           isNumeric() const;
-    
-    /**
-     * Gets the double value of this object. If this object is not of type
-     * kDouble then the result is undefined.
-     * @return    the double value of this object.
-     * @stable ICU 2.0
-     */ 
-    double          getDouble(void) const { return fValue.fDouble; }
-
-    /**
-     * Gets the double value of this object. If this object is of type
-     * long or int64 then a casting conversion is peformed, with
-     * possible loss of precision.  If the type is kObject and the
-     * object is a Measure, then the result of
-     * getNumber().getDouble(status) is returned.  If this object is
-     * neither a numeric type nor a Measure, then 0 is returned and
-     * the status is set to U_INVALID_FORMAT_ERROR.
-     * @param status the error code
-     * @return the double value of this object.
-     * @stable ICU 3.0
-     */ 
-    double          getDouble(UErrorCode& status) const;
-
-    /**
-     * Gets the long value of this object. If this object is not of type
-     * kLong then the result is undefined.
-     * @return    the long value of this object.
-     * @stable ICU 2.0
-     */ 
-    int32_t         getLong(void) const { return (int32_t)fValue.fInt64; }
-
-    /**
-     * Gets the long value of this object. If the magnitude is too
-     * large to fit in a long, then the maximum or minimum long value,
-     * as appropriate, is returned and the status is set to
-     * U_INVALID_FORMAT_ERROR.  If this object is of type kInt64 and
-     * it fits within a long, then no precision is lost.  If it is of
-     * type kDouble, then a casting conversion is peformed, with
-     * truncation of any fractional part.  If the type is kObject and
-     * the object is a Measure, then the result of
-     * getNumber().getLong(status) is returned.  If this object is
-     * neither a numeric type nor a Measure, then 0 is returned and
-     * the status is set to U_INVALID_FORMAT_ERROR.
-     * @param status the error code
-     * @return    the long value of this object.
-     * @stable ICU 3.0
-     */ 
-    int32_t         getLong(UErrorCode& status) const;
-
-    /**
-     * Gets the int64 value of this object. If this object is not of type
-     * kInt64 then the result is undefined.
-     * @return    the int64 value of this object.
-     * @stable ICU 2.8
-     */ 
-    int64_t         getInt64(void) const { return fValue.fInt64; }
-
-    /**
-     * Gets the int64 value of this object. If this object is of type
-     * kDouble and the magnitude is too large to fit in an int64, then
-     * the maximum or minimum int64 value, as appropriate, is returned
-     * and the status is set to U_INVALID_FORMAT_ERROR.  If the
-     * magnitude fits in an int64, then a casting conversion is
-     * peformed, with truncation of any fractional part.  If the type
-     * is kObject and the object is a Measure, then the result of
-     * getNumber().getDouble(status) is returned.  If this object is
-     * neither a numeric type nor a Measure, then 0 is returned and
-     * the status is set to U_INVALID_FORMAT_ERROR.
-     * @param status the error code
-     * @return    the int64 value of this object.
-     * @stable ICU 3.0
-     */ 
-    int64_t         getInt64(UErrorCode& status) const;
-
-    /**
-     * Gets the Date value of this object. If this object is not of type
-     * kDate then the result is undefined.
-     * @return    the Date value of this object.
-     * @stable ICU 2.0
-     */ 
-    UDate           getDate() const { return fValue.fDate; }
-
-    /**
-     * Gets the Date value of this object.  If the type is not a date,
-     * status is set to U_INVALID_FORMAT_ERROR and the return value is
-     * undefined.
-     * @param status the error code.
-     * @return    the Date value of this object.
-     * @stable ICU 3.0
-     */ 
-     UDate          getDate(UErrorCode& status) const;
-
-    /**
-     * Gets the string value of this object. If this object is not of type
-     * kString then the result is undefined.
-     * @param result    Output param to receive the Date value of this object.
-     * @return          A reference to 'result'.
-     * @stable ICU 2.0
-     */ 
-    UnicodeString&  getString(UnicodeString& result) const
-      { result=*fValue.fString; return result; }
-
-    /**
-     * Gets the string value of this object. If the type is not a
-     * string, status is set to U_INVALID_FORMAT_ERROR and a bogus
-     * string is returned.
-     * @param result    Output param to receive the Date value of this object.
-     * @param status    the error code. 
-     * @return          A reference to 'result'.
-     * @stable ICU 3.0
-     */ 
-    UnicodeString&  getString(UnicodeString& result, UErrorCode& status) const;
-
-    /**
-     * Gets a const reference to the string value of this object. If
-     * this object is not of type kString then the result is
-     * undefined.
-     * @return   a const reference to the string value of this object.
-     * @stable ICU 2.0
-     */
-    inline const UnicodeString& getString(void) const;
-
-    /**
-     * Gets a const reference to the string value of this object.  If
-     * the type is not a string, status is set to
-     * U_INVALID_FORMAT_ERROR and the result is a bogus string.
-     * @param status    the error code.
-     * @return   a const reference to the string value of this object.
-     * @stable ICU 3.0
-     */
-    const UnicodeString& getString(UErrorCode& status) const;
-
-    /**
-     * Gets a reference to the string value of this object. If this
-     * object is not of type kString then the result is undefined.
-     * @return   a reference to the string value of this object.
-     * @stable ICU 2.0
-     */
-    inline UnicodeString& getString(void);
-
-    /**
-     * Gets a reference to the string value of this object. If the
-     * type is not a string, status is set to U_INVALID_FORMAT_ERROR
-     * and the result is a bogus string.
-     * @param status    the error code. 
-     * @return   a reference to the string value of this object.
-     * @stable ICU 3.0
-     */
-    UnicodeString& getString(UErrorCode& status);
-
-    /**
-     * Gets the array value and count of this object. If this object
-     * is not of type kArray then the result is undefined.
-     * @param count    fill-in with the count of this object.
-     * @return         the array value of this object.
-     * @stable ICU 2.0
-     */ 
-    const Formattable* getArray(int32_t& count) const
-      { count=fValue.fArrayAndCount.fCount; return fValue.fArrayAndCount.fArray; }
-
-    /**
-     * Gets the array value and count of this object. If the type is
-     * not an array, status is set to U_INVALID_FORMAT_ERROR, count is
-     * set to 0, and the result is NULL.
-     * @param count    fill-in with the count of this object.
-     * @param status the error code. 
-     * @return         the array value of this object.
-     * @stable ICU 3.0
-     */ 
-    const Formattable* getArray(int32_t& count, UErrorCode& status) const;
-
-    /**
-     * Accesses the specified element in the array value of this
-     * Formattable object. If this object is not of type kArray then
-     * the result is undefined.
-     * @param index the specified index.
-     * @return the accessed element in the array.
-     * @stable ICU 2.0
-     */
-    Formattable&    operator[](int32_t index) { return fValue.fArrayAndCount.fArray[index]; }
-       
-    /**
-     * Returns a pointer to the UObject contained within this
-     * formattable, or NULL if this object does not contain a UObject.
-     * @return a UObject pointer, or NULL
-     * @stable ICU 3.0
-     */
-    const UObject*  getObject() const;
-
-    /**
-     * Sets the double value of this object and changes the type to
-     * kDouble.
-     * @param d    the new double value to be set.
-     * @stable ICU 2.0
-     */ 
-    void            setDouble(double d);
-
-    /**
-     * Sets the long value of this object and changes the type to
-     * kLong.
-     * @param l    the new long value to be set.
-     * @stable ICU 2.0
-     */ 
-    void            setLong(int32_t l);
-
-    /**
-     * Sets the int64 value of this object and changes the type to
-     * kInt64.
-     * @param ll    the new int64 value to be set.
-     * @stable ICU 2.8
-     */ 
-    void            setInt64(int64_t ll);
-
-    /**
-     * Sets the Date value of this object and changes the type to
-     * kDate.
-     * @param d    the new Date value to be set.
-     * @stable ICU 2.0
-     */ 
-    void            setDate(UDate d);
-
-    /**
-     * Sets the string value of this object and changes the type to
-     * kString.
-     * @param stringToCopy    the new string value to be set.
-     * @stable ICU 2.0
-     */ 
-    void            setString(const UnicodeString& stringToCopy);
-
-    /**
-     * Sets the array value and count of this object and changes the
-     * type to kArray.
-     * @param array    the array value.
-     * @param count    the number of array elements to be copied.
-     * @stable ICU 2.0
-     */ 
-    void            setArray(const Formattable* array, int32_t count);
-
-    /**
-     * Sets and adopts the string value and count of this object and
-     * changes the type to kArray.
-     * @param stringToAdopt    the new string value to be adopted.
-     * @stable ICU 2.0
-     */ 
-    void            adoptString(UnicodeString* stringToAdopt);
-
-    /**
-     * Sets and adopts the array value and count of this object and
-     * changes the type to kArray.
-     * @stable ICU 2.0
-     */ 
-    void            adoptArray(Formattable* array, int32_t count);
-       
-    /**
-     * Sets and adopts the UObject value of this object and changes
-     * the type to kObject.  After this call, the caller must not
-     * delete the given object.
-     * @param objectToAdopt the UObject value to be adopted
-     * @stable ICU 3.0
-     */
-    void            adoptObject(UObject* objectToAdopt);
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.2
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.2
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-    /**
-     * Deprecated variant of getLong(UErrorCode&).
-     * @param status the error code
-     * @return the long value of this object.
-     * @deprecated ICU 3.0 use getLong(UErrorCode&) instead
-     */ 
-    inline int32_t getLong(UErrorCode* status) const;
-
-private:
-    /**
-     * Cleans up the memory for unwanted values.  For example, the adopted
-     * string or array objects.
-     */
-    void            dispose(void);
-
-    UnicodeString* getBogus() const;
-
-    union {
-        UObject*        fObject;
-        UnicodeString*  fString;
-        double          fDouble;
-        int64_t         fInt64;
-        UDate           fDate;
-        struct {
-          Formattable*  fArray;
-          int32_t       fCount;
-        }               fArrayAndCount;
-    } fValue;
-
-    Type                fType;
-    UnicodeString       fBogus; // Bogus string when it's needed.
-};
-
-inline UDate Formattable::getDate(UErrorCode& status) const {
-    if (fType != kDate) {
-        if (U_SUCCESS(status)) {
-            status = U_INVALID_FORMAT_ERROR;
-        }
-        return 0;
-    }
-    return fValue.fDate;
-}
-
-inline const UnicodeString& Formattable::getString(void) const {
-    return *fValue.fString;
-}
-
-inline UnicodeString& Formattable::getString(void) {
-    return *fValue.fString;
-}
-
-inline int32_t Formattable::getLong(UErrorCode* status) const {
-    return getLong(*status);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif //_FMTABLE
-//eof
-

Copied: MacRuby/trunk/icu-1060/unicode/fmtable.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/fmtable.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/fmtable.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/fmtable.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,591 @@
+/*
+********************************************************************************
+*   Copyright (C) 1997-2006, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+********************************************************************************
+*
+* File FMTABLE.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   02/29/97    aliu        Creation.
+********************************************************************************
+*/
+#ifndef FMTABLE_H
+#define FMTABLE_H
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+/**
+ * \file 
+ * \brief C++ API: Formattable is a thin wrapper for primitive numeric types.
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Formattable objects can be passed to the Format class or
+ * its subclasses for formatting.  Formattable is a thin wrapper
+ * class which interconverts between the primitive numeric types
+ * (double, long, etc.) as well as UDate and UnicodeString.
+ *
+ * <p>Internally, a Formattable object is a union of primitive types.
+ * As such, it can only store one flavor of data at a time.  To
+ * determine what flavor of data it contains, use the getType method.
+ *
+ * <p>As of ICU 3.0, Formattable may also wrap a UObject pointer,
+ * which it owns.  This allows an instance of any ICU class to be
+ * encapsulated in a Formattable.  For legacy reasons and for
+ * efficiency, primitive numeric types are still stored directly
+ * within a Formattable.
+ *
+ * <p>The Formattable class is not suitable for subclassing.
+ */
+class U_I18N_API Formattable : public UObject {
+public:
+    /**
+     * This enum is only used to let callers distinguish between
+     * the Formattable(UDate) constructor and the Formattable(double)
+     * constructor; the compiler cannot distinguish the signatures,
+     * since UDate is currently typedefed to be either double or long.
+     * If UDate is changed later to be a bonafide class
+     * or struct, then we no longer need this enum.
+     * @stable ICU 2.4
+     */
+    enum ISDATE { kIsDate };
+
+    /**
+     * Default constructor
+     * @stable ICU 2.4
+     */
+    Formattable(); // Type kLong, value 0
+
+    /**
+     * Creates a Formattable object with a UDate instance.
+     * @param d the UDate instance.
+     * @param flag the flag to indicate this is a date. Always set it to kIsDate
+     * @stable ICU 2.0  
+     */
+    Formattable(UDate d, ISDATE flag);
+
+    /**
+     * Creates a Formattable object with a double number.
+     * @param d the double number.
+     * @stable ICU 2.0
+     */
+    Formattable(double d);
+
+    /**
+     * Creates a Formattable object with a long number.
+     * @param l the long number.
+     * @stable ICU 2.0
+     */
+    Formattable(int32_t l);
+
+    /**
+     * Creates a Formattable object with an int64_t number
+     * @param ll the int64_t number.
+     * @stable ICU 2.8
+     */
+    Formattable(int64_t ll);
+
+#if !UCONFIG_NO_CONVERSION
+    /**
+     * Creates a Formattable object with a char string pointer.
+     * Assumes that the char string is null terminated.
+     * @param strToCopy the char string.
+     * @stable ICU 2.0
+     */
+    Formattable(const char* strToCopy);
+#endif
+
+    /**
+     * Creates a Formattable object with a UnicodeString object to copy from.
+     * @param strToCopy the UnicodeString string.
+     * @stable ICU 2.0
+     */
+    Formattable(const UnicodeString& strToCopy);
+
+    /**
+     * Creates a Formattable object with a UnicodeString object to adopt from.
+     * @param strToAdopt the UnicodeString string.
+     * @stable ICU 2.0
+     */
+    Formattable(UnicodeString* strToAdopt);
+
+    /**
+     * Creates a Formattable object with an array of Formattable objects.
+     * @param arrayToCopy the Formattable object array.
+     * @param count the array count.
+     * @stable ICU 2.0
+     */
+    Formattable(const Formattable* arrayToCopy, int32_t count);
+
+    /**
+     * Creates a Formattable object that adopts the given UObject.
+     * @param objectToAdopt the UObject to set this object to
+     * @stable ICU 3.0
+     */
+    Formattable(UObject* objectToAdopt);
+
+    /**
+     * Copy constructor.
+     * @stable ICU 2.0
+     */
+    Formattable(const Formattable&);
+
+    /**
+     * Assignment operator.
+     * @param rhs   The Formattable object to copy into this object.
+     * @stable ICU 2.0
+     */
+    Formattable&    operator=(const Formattable &rhs);
+
+    /**
+     * Equality comparison.
+     * @param other    the object to be compared with.
+     * @return        TRUE if other are equal to this, FALSE otherwise.
+     * @stable ICU 2.0
+     */
+    UBool          operator==(const Formattable &other) const;
+    
+    /** 
+     * Equality operator.
+     * @param other    the object to be compared with.
+     * @return        TRUE if other are unequal to this, FALSE otherwise.
+     * @stable ICU 2.0
+     */
+    UBool          operator!=(const Formattable& other) const
+      { return !operator==(other); }
+
+    /** 
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual         ~Formattable();
+
+    /**
+     * Clone this object.
+     * Clones can be used concurrently in multiple threads.
+     * If an error occurs, then NULL is returned.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @stable ICU 2.8
+     */
+    Formattable *clone() const;
+
+    /** 
+     * Selector for flavor of data type contained within a
+     * Formattable object.  Formattable is a union of several
+     * different types, and at any time contains exactly one type.
+     * @stable ICU 2.4
+     */
+    enum Type {
+        /**
+         * Selector indicating a UDate value.  Use getDate to retrieve
+         * the value.
+         * @stable ICU 2.4
+         */
+        kDate,
+
+        /**
+         * Selector indicating a double value.  Use getDouble to
+         * retrieve the value.
+         * @stable ICU 2.4
+         */
+        kDouble,
+
+        /**
+         * Selector indicating a 32-bit integer value.  Use getLong to
+         * retrieve the value.
+         * @stable ICU 2.4
+         */
+        kLong,
+
+        /**
+         * Selector indicating a UnicodeString value.  Use getString
+         * to retrieve the value.
+         * @stable ICU 2.4
+         */
+        kString,
+
+        /**
+         * Selector indicating an array of Formattables.  Use getArray
+         * to retrieve the value.
+         * @stable ICU 2.4
+         */
+        kArray,
+
+        /**
+         * Selector indicating a 64-bit integer value.  Use getInt64
+         * to retrieve the value.
+         * @stable ICU 2.8
+         */
+        kInt64,
+
+        /**
+         * Selector indicating a UObject value.  Use getObject to
+         * retrieve the value.
+         * @stable ICU 3.0
+         */
+        kObject
+   };
+
+    /**
+     * Gets the data type of this Formattable object.
+     * @return    the data type of this Formattable object.
+     * @stable ICU 2.0
+     */
+    Type            getType(void) const;
+    
+    /**
+     * Returns TRUE if the data type of this Formattable object
+     * is kDouble, kLong, or kInt64.
+     * @return TRUE if this is a pure numeric object
+     * @stable ICU 3.0
+     */
+    UBool           isNumeric() const;
+    
+    /**
+     * Gets the double value of this object. If this object is not of type
+     * kDouble then the result is undefined.
+     * @return    the double value of this object.
+     * @stable ICU 2.0
+     */ 
+    double          getDouble(void) const { return fValue.fDouble; }
+
+    /**
+     * Gets the double value of this object. If this object is of type
+     * long or int64 then a casting conversion is peformed, with
+     * possible loss of precision.  If the type is kObject and the
+     * object is a Measure, then the result of
+     * getNumber().getDouble(status) is returned.  If this object is
+     * neither a numeric type nor a Measure, then 0 is returned and
+     * the status is set to U_INVALID_FORMAT_ERROR.
+     * @param status the error code
+     * @return the double value of this object.
+     * @stable ICU 3.0
+     */ 
+    double          getDouble(UErrorCode& status) const;
+
+    /**
+     * Gets the long value of this object. If this object is not of type
+     * kLong then the result is undefined.
+     * @return    the long value of this object.
+     * @stable ICU 2.0
+     */ 
+    int32_t         getLong(void) const { return (int32_t)fValue.fInt64; }
+
+    /**
+     * Gets the long value of this object. If the magnitude is too
+     * large to fit in a long, then the maximum or minimum long value,
+     * as appropriate, is returned and the status is set to
+     * U_INVALID_FORMAT_ERROR.  If this object is of type kInt64 and
+     * it fits within a long, then no precision is lost.  If it is of
+     * type kDouble, then a casting conversion is peformed, with
+     * truncation of any fractional part.  If the type is kObject and
+     * the object is a Measure, then the result of
+     * getNumber().getLong(status) is returned.  If this object is
+     * neither a numeric type nor a Measure, then 0 is returned and
+     * the status is set to U_INVALID_FORMAT_ERROR.
+     * @param status the error code
+     * @return    the long value of this object.
+     * @stable ICU 3.0
+     */ 
+    int32_t         getLong(UErrorCode& status) const;
+
+    /**
+     * Gets the int64 value of this object. If this object is not of type
+     * kInt64 then the result is undefined.
+     * @return    the int64 value of this object.
+     * @stable ICU 2.8
+     */ 
+    int64_t         getInt64(void) const { return fValue.fInt64; }
+
+    /**
+     * Gets the int64 value of this object. If this object is of type
+     * kDouble and the magnitude is too large to fit in an int64, then
+     * the maximum or minimum int64 value, as appropriate, is returned
+     * and the status is set to U_INVALID_FORMAT_ERROR.  If the
+     * magnitude fits in an int64, then a casting conversion is
+     * peformed, with truncation of any fractional part.  If the type
+     * is kObject and the object is a Measure, then the result of
+     * getNumber().getDouble(status) is returned.  If this object is
+     * neither a numeric type nor a Measure, then 0 is returned and
+     * the status is set to U_INVALID_FORMAT_ERROR.
+     * @param status the error code
+     * @return    the int64 value of this object.
+     * @stable ICU 3.0
+     */ 
+    int64_t         getInt64(UErrorCode& status) const;
+
+    /**
+     * Gets the Date value of this object. If this object is not of type
+     * kDate then the result is undefined.
+     * @return    the Date value of this object.
+     * @stable ICU 2.0
+     */ 
+    UDate           getDate() const { return fValue.fDate; }
+
+    /**
+     * Gets the Date value of this object.  If the type is not a date,
+     * status is set to U_INVALID_FORMAT_ERROR and the return value is
+     * undefined.
+     * @param status the error code.
+     * @return    the Date value of this object.
+     * @stable ICU 3.0
+     */ 
+     UDate          getDate(UErrorCode& status) const;
+
+    /**
+     * Gets the string value of this object. If this object is not of type
+     * kString then the result is undefined.
+     * @param result    Output param to receive the Date value of this object.
+     * @return          A reference to 'result'.
+     * @stable ICU 2.0
+     */ 
+    UnicodeString&  getString(UnicodeString& result) const
+      { result=*fValue.fString; return result; }
+
+    /**
+     * Gets the string value of this object. If the type is not a
+     * string, status is set to U_INVALID_FORMAT_ERROR and a bogus
+     * string is returned.
+     * @param result    Output param to receive the Date value of this object.
+     * @param status    the error code. 
+     * @return          A reference to 'result'.
+     * @stable ICU 3.0
+     */ 
+    UnicodeString&  getString(UnicodeString& result, UErrorCode& status) const;
+
+    /**
+     * Gets a const reference to the string value of this object. If
+     * this object is not of type kString then the result is
+     * undefined.
+     * @return   a const reference to the string value of this object.
+     * @stable ICU 2.0
+     */
+    inline const UnicodeString& getString(void) const;
+
+    /**
+     * Gets a const reference to the string value of this object.  If
+     * the type is not a string, status is set to
+     * U_INVALID_FORMAT_ERROR and the result is a bogus string.
+     * @param status    the error code.
+     * @return   a const reference to the string value of this object.
+     * @stable ICU 3.0
+     */
+    const UnicodeString& getString(UErrorCode& status) const;
+
+    /**
+     * Gets a reference to the string value of this object. If this
+     * object is not of type kString then the result is undefined.
+     * @return   a reference to the string value of this object.
+     * @stable ICU 2.0
+     */
+    inline UnicodeString& getString(void);
+
+    /**
+     * Gets a reference to the string value of this object. If the
+     * type is not a string, status is set to U_INVALID_FORMAT_ERROR
+     * and the result is a bogus string.
+     * @param status    the error code. 
+     * @return   a reference to the string value of this object.
+     * @stable ICU 3.0
+     */
+    UnicodeString& getString(UErrorCode& status);
+
+    /**
+     * Gets the array value and count of this object. If this object
+     * is not of type kArray then the result is undefined.
+     * @param count    fill-in with the count of this object.
+     * @return         the array value of this object.
+     * @stable ICU 2.0
+     */ 
+    const Formattable* getArray(int32_t& count) const
+      { count=fValue.fArrayAndCount.fCount; return fValue.fArrayAndCount.fArray; }
+
+    /**
+     * Gets the array value and count of this object. If the type is
+     * not an array, status is set to U_INVALID_FORMAT_ERROR, count is
+     * set to 0, and the result is NULL.
+     * @param count    fill-in with the count of this object.
+     * @param status the error code. 
+     * @return         the array value of this object.
+     * @stable ICU 3.0
+     */ 
+    const Formattable* getArray(int32_t& count, UErrorCode& status) const;
+
+    /**
+     * Accesses the specified element in the array value of this
+     * Formattable object. If this object is not of type kArray then
+     * the result is undefined.
+     * @param index the specified index.
+     * @return the accessed element in the array.
+     * @stable ICU 2.0
+     */
+    Formattable&    operator[](int32_t index) { return fValue.fArrayAndCount.fArray[index]; }
+       
+    /**
+     * Returns a pointer to the UObject contained within this
+     * formattable, or NULL if this object does not contain a UObject.
+     * @return a UObject pointer, or NULL
+     * @stable ICU 3.0
+     */
+    const UObject*  getObject() const;
+
+    /**
+     * Sets the double value of this object and changes the type to
+     * kDouble.
+     * @param d    the new double value to be set.
+     * @stable ICU 2.0
+     */ 
+    void            setDouble(double d);
+
+    /**
+     * Sets the long value of this object and changes the type to
+     * kLong.
+     * @param l    the new long value to be set.
+     * @stable ICU 2.0
+     */ 
+    void            setLong(int32_t l);
+
+    /**
+     * Sets the int64 value of this object and changes the type to
+     * kInt64.
+     * @param ll    the new int64 value to be set.
+     * @stable ICU 2.8
+     */ 
+    void            setInt64(int64_t ll);
+
+    /**
+     * Sets the Date value of this object and changes the type to
+     * kDate.
+     * @param d    the new Date value to be set.
+     * @stable ICU 2.0
+     */ 
+    void            setDate(UDate d);
+
+    /**
+     * Sets the string value of this object and changes the type to
+     * kString.
+     * @param stringToCopy    the new string value to be set.
+     * @stable ICU 2.0
+     */ 
+    void            setString(const UnicodeString& stringToCopy);
+
+    /**
+     * Sets the array value and count of this object and changes the
+     * type to kArray.
+     * @param array    the array value.
+     * @param count    the number of array elements to be copied.
+     * @stable ICU 2.0
+     */ 
+    void            setArray(const Formattable* array, int32_t count);
+
+    /**
+     * Sets and adopts the string value and count of this object and
+     * changes the type to kArray.
+     * @param stringToAdopt    the new string value to be adopted.
+     * @stable ICU 2.0
+     */ 
+    void            adoptString(UnicodeString* stringToAdopt);
+
+    /**
+     * Sets and adopts the array value and count of this object and
+     * changes the type to kArray.
+     * @stable ICU 2.0
+     */ 
+    void            adoptArray(Formattable* array, int32_t count);
+       
+    /**
+     * Sets and adopts the UObject value of this object and changes
+     * the type to kObject.  After this call, the caller must not
+     * delete the given object.
+     * @param objectToAdopt the UObject value to be adopted
+     * @stable ICU 3.0
+     */
+    void            adoptObject(UObject* objectToAdopt);
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.2
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+    /**
+     * Deprecated variant of getLong(UErrorCode&).
+     * @param status the error code
+     * @return the long value of this object.
+     * @deprecated ICU 3.0 use getLong(UErrorCode&) instead
+     */ 
+    inline int32_t getLong(UErrorCode* status) const;
+
+private:
+    /**
+     * Cleans up the memory for unwanted values.  For example, the adopted
+     * string or array objects.
+     */
+    void            dispose(void);
+
+    UnicodeString* getBogus() const;
+
+    union {
+        UObject*        fObject;
+        UnicodeString*  fString;
+        double          fDouble;
+        int64_t         fInt64;
+        UDate           fDate;
+        struct {
+          Formattable*  fArray;
+          int32_t       fCount;
+        }               fArrayAndCount;
+    } fValue;
+
+    Type                fType;
+    UnicodeString       fBogus; // Bogus string when it's needed.
+};
+
+inline UDate Formattable::getDate(UErrorCode& status) const {
+    if (fType != kDate) {
+        if (U_SUCCESS(status)) {
+            status = U_INVALID_FORMAT_ERROR;
+        }
+        return 0;
+    }
+    return fValue.fDate;
+}
+
+inline const UnicodeString& Formattable::getString(void) const {
+    return *fValue.fString;
+}
+
+inline UnicodeString& Formattable::getString(void) {
+    return *fValue.fString;
+}
+
+inline int32_t Formattable::getLong(UErrorCode* status) const {
+    return getLong(*status);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif //_FMTABLE
+//eof
+

Deleted: MacRuby/trunk/icu-1060/unicode/format.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/format.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/format.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,296 +0,0 @@
-/*
-********************************************************************************
-* Copyright (C) 1997-2006, International Business Machines Corporation and others.
-* All Rights Reserved.
-********************************************************************************
-*
-* File FORMAT.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   02/19/97    aliu        Converted from java.
-*   03/17/97    clhuang     Updated per C++ implementation.
-*   03/27/97    helena      Updated to pass the simple test after code review.
-********************************************************************************
-*/
-// *****************************************************************************
-// This file was generated from the java source file Format.java
-// *****************************************************************************
-
-#ifndef FORMAT_H
-#define FORMAT_H
-
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Base class for all formats. 
- */
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/unistr.h"
-#include "unicode/fmtable.h"
-#include "unicode/fieldpos.h"
-#include "unicode/parsepos.h"
-#include "unicode/parseerr.h" 
-#include "unicode/locid.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * Base class for all formats.  This is an abstract base class which
- * specifies the protocol for classes which convert other objects or
- * values, such as numeric values and dates, and their string
- * representations.  In some cases these representations may be
- * localized or contain localized characters or strings.  For example,
- * a numeric formatter such as DecimalFormat may convert a numeric
- * value such as 12345 to the string "$12,345".  It may also parse
- * the string back into a numeric value.  A date and time formatter
- * like SimpleDateFormat may represent a specific date, encoded
- * numerically, as a string such as "Wednesday, February 26, 1997 AD".
- * <P>
- * Many of the concrete subclasses of Format employ the notion of
- * a pattern.  A pattern is a string representation of the rules which
- * govern the interconversion between values and strings.  For example,
- * a DecimalFormat object may be associated with the pattern
- * "$#,##0.00;($#,##0.00)", which is a common US English format for
- * currency values, yielding strings such as "$1,234.45" for 1234.45,
- * and "($987.65)" for 987.6543.  The specific syntax of a pattern
- * is defined by each subclass.
- * <P>
- * Even though many subclasses use patterns, the notion of a pattern
- * is not inherent to Format classes in general, and is not part of
- * the explicit base class protocol.
- * <P>
- * Two complex formatting classes bear mentioning.  These are
- * MessageFormat and ChoiceFormat.  ChoiceFormat is a subclass of
- * NumberFormat which allows the user to format different number ranges
- * as strings.  For instance, 0 may be represented as "no files", 1 as
- * "one file", and any number greater than 1 as "many files".
- * MessageFormat is a formatter which utilizes other Format objects to
- * format a string containing with multiple values.  For instance,
- * A MessageFormat object might produce the string "There are no files
- * on the disk MyDisk on February 27, 1997." given the arguments 0,
- * "MyDisk", and the date value of 2/27/97.  See the ChoiceFormat
- * and MessageFormat headers for further information.
- * <P>
- * If formatting is unsuccessful, a failing UErrorCode is returned when
- * the Format cannot format the type of object, otherwise if there is
- * something illformed about the the Unicode replacement character
- * 0xFFFD is returned.
- * <P>
- * If there is no match when parsing, a parse failure UErrorCode is
- * retured for methods which take no ParsePosition.  For the method
- * that takes a ParsePosition, the index parameter is left unchanged.
- * <P>
- * <em>User subclasses are not supported.</em> While clients may write
- * subclasses, such code will not necessarily work and will not be
- * guaranteed to work stably from release to release.
- */
-class U_I18N_API Format : public UObject {
-public:
-
-    /** Destructor
-     * @stable ICU 2.4
-     */
-    virtual ~Format();
-
-    /**
-     * Return true if the given Format objects are semantically equal.
-     * Objects of different subclasses are considered unequal.
-     * @param other    the object to be compared with.
-     * @return         Return true if the given Format objects are semantically equal.
-     *                 Objects of different subclasses are considered unequal.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const Format& other) const = 0;
-
-    /**
-     * Return true if the given Format objects are not semantically
-     * equal.
-     * @param other    the object to be compared with.
-     * @return         Return true if the given Format objects are not semantically.
-     * @stable ICU 2.0
-     */
-    UBool operator!=(const Format& other) const { return !operator==(other); }
-
-    /**
-     * Clone this object polymorphically.  The caller is responsible
-     * for deleting the result when done.
-     * @return    A copy of the object
-     * @stable ICU 2.0
-     */
-    virtual Format* clone() const = 0;
-
-    /**
-     * Formats an object to produce a string.
-     *
-     * @param obj       The object to format.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param status    Output parameter filled in with success or failure status.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(const Formattable& obj,
-                          UnicodeString& appendTo,
-                          UErrorCode& status) const;
-
-    /**
-     * Format an object to produce a string.  This is a pure virtual method which
-     * subclasses must implement. This method allows polymorphic formatting
-     * of Formattable objects. If a subclass of Format receives a Formattable
-     * object type it doesn't handle (e.g., if a numeric Formattable is passed
-     * to a DateFormat object) then it returns a failing UErrorCode.
-     *
-     * @param obj       The object to format.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @param status    Output param filled with success/failure status.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& format(const Formattable& obj,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos,
-                                  UErrorCode& status) const = 0;
-
-    /**
-     * Parse a string to produce an object.  This is a pure virtual
-     * method which subclasses must implement.  This method allows
-     * polymorphic parsing of strings into Formattable objects.
-     * <P>
-     * Before calling, set parse_pos.index to the offset you want to
-     * start parsing at in the source.  After calling, parse_pos.index
-     * is the end of the text you parsed.  If error occurs, index is
-     * unchanged.
-     * <P>
-     * When parsing, leading whitespace is discarded (with successful
-     * parse), while trailing whitespace is left as is.
-     * <P>
-     * Example:
-     * <P>
-     * Parsing "_12_xy" (where _ represents a space) for a number,
-     * with index == 0 will result in the number 12, with
-     * parse_pos.index updated to 3 (just before the second space).
-     * Parsing a second time will result in a failing UErrorCode since
-     * "xy" is not a number, and leave index at 3.
-     * <P>
-     * Subclasses will typically supply specific parse methods that
-     * return different types of values. Since methods can't overload
-     * on return types, these will typically be named "parse", while
-     * this polymorphic method will always be called parseObject.  Any
-     * parse method that does not take a parse_pos should set status
-     * to an error value when no text in the required format is at the
-     * start position.
-     *
-     * @param source    The string to be parsed into an object.
-     * @param result    Formattable to be set to the parse result.
-     *                  If parse fails, return contents are undefined.
-     * @param parse_pos The position to start parsing at. Upon return
-     *                  this param is set to the position after the
-     *                  last character successfully parsed. If the
-     *                  source is not parsed successfully, this param
-     *                  will remain unchanged.
-     * @stable ICU 2.0
-     */
-    virtual void parseObject(const UnicodeString& source,
-                             Formattable& result,
-                             ParsePosition& parse_pos) const = 0;
-
-    /**
-     * Parses a string to produce an object. This is a convenience method
-     * which calls the pure virtual parseObject() method, and returns a
-     * failure UErrorCode if the ParsePosition indicates failure.
-     *
-     * @param source    The string to be parsed into an object.
-     * @param result    Formattable to be set to the parse result.
-     *                  If parse fails, return contents are undefined.
-     * @param status    Output param to be filled with success/failure
-     *                  result code.
-     * @stable ICU 2.0
-     */
-    void parseObject(const UnicodeString& source,
-                     Formattable& result,
-                     UErrorCode& status) const;
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual method.
-     * This method is to implement a simple version of RTTI, since not all
-     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
-     * clone() methods call this method.
-     * Concrete subclasses of Format must implement getDynamicClassID()
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID() const = 0;
-
-    /** Get the locale for this format object. You can choose between valid and actual locale.
-     *  @param type type of the locale we're looking for (valid or actual) 
-     *  @param status error code for the operation
-     *  @return the locale
-     *  @stable ICU 2.8
-     */
-    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
-
-    /** Get the locale for this format object. You can choose between valid and actual locale.
-     *  @param type type of the locale we're looking for (valid or actual) 
-     *  @param status error code for the operation
-     *  @return the locale
-     *  @internal
-     */
-    const char* getLocaleID(ULocDataLocaleType type, UErrorCode &status) const;
-
- protected:
-    /** @stable ICU 2.8 */
-    void setLocaleIDs(const char* valid, const char* actual);
-
-protected:
-    /**
-     * Default constructor for subclass use only.  Does nothing.
-     * @stable ICU 2.0
-     */
-    Format();
-
-    /**
-     * @stable ICU 2.0
-     */
-    Format(const Format&); // Does nothing; for subclasses only
-
-    /**
-     * @stable ICU 2.0
-     */
-    Format& operator=(const Format&); // Does nothing; for subclasses
-
-       
-    /**
-     * Simple function for initializing a UParseError from a UnicodeString.
-     *
-     * @param pattern The pattern to copy into the parseError
-     * @param pos The position in pattern where the error occured
-     * @param parseError The UParseError object to fill in
-     * @stable ICU 2.4
-     */
-    static void syntaxError(const UnicodeString& pattern,
-                            int32_t pos,
-                            UParseError& parseError);
-
- private:
-    char actualLocale[ULOC_FULLNAME_CAPACITY];
-    char validLocale[ULOC_FULLNAME_CAPACITY];
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _FORMAT
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/format.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/format.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/format.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/format.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,296 @@
+/*
+********************************************************************************
+* Copyright (C) 1997-2006, International Business Machines Corporation and others.
+* All Rights Reserved.
+********************************************************************************
+*
+* File FORMAT.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   02/19/97    aliu        Converted from java.
+*   03/17/97    clhuang     Updated per C++ implementation.
+*   03/27/97    helena      Updated to pass the simple test after code review.
+********************************************************************************
+*/
+// *****************************************************************************
+// This file was generated from the java source file Format.java
+// *****************************************************************************
+
+#ifndef FORMAT_H
+#define FORMAT_H
+
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Base class for all formats. 
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/unistr.h"
+#include "unicode/fmtable.h"
+#include "unicode/fieldpos.h"
+#include "unicode/parsepos.h"
+#include "unicode/parseerr.h" 
+#include "unicode/locid.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Base class for all formats.  This is an abstract base class which
+ * specifies the protocol for classes which convert other objects or
+ * values, such as numeric values and dates, and their string
+ * representations.  In some cases these representations may be
+ * localized or contain localized characters or strings.  For example,
+ * a numeric formatter such as DecimalFormat may convert a numeric
+ * value such as 12345 to the string "$12,345".  It may also parse
+ * the string back into a numeric value.  A date and time formatter
+ * like SimpleDateFormat may represent a specific date, encoded
+ * numerically, as a string such as "Wednesday, February 26, 1997 AD".
+ * <P>
+ * Many of the concrete subclasses of Format employ the notion of
+ * a pattern.  A pattern is a string representation of the rules which
+ * govern the interconversion between values and strings.  For example,
+ * a DecimalFormat object may be associated with the pattern
+ * "$#,##0.00;($#,##0.00)", which is a common US English format for
+ * currency values, yielding strings such as "$1,234.45" for 1234.45,
+ * and "($987.65)" for 987.6543.  The specific syntax of a pattern
+ * is defined by each subclass.
+ * <P>
+ * Even though many subclasses use patterns, the notion of a pattern
+ * is not inherent to Format classes in general, and is not part of
+ * the explicit base class protocol.
+ * <P>
+ * Two complex formatting classes bear mentioning.  These are
+ * MessageFormat and ChoiceFormat.  ChoiceFormat is a subclass of
+ * NumberFormat which allows the user to format different number ranges
+ * as strings.  For instance, 0 may be represented as "no files", 1 as
+ * "one file", and any number greater than 1 as "many files".
+ * MessageFormat is a formatter which utilizes other Format objects to
+ * format a string containing with multiple values.  For instance,
+ * A MessageFormat object might produce the string "There are no files
+ * on the disk MyDisk on February 27, 1997." given the arguments 0,
+ * "MyDisk", and the date value of 2/27/97.  See the ChoiceFormat
+ * and MessageFormat headers for further information.
+ * <P>
+ * If formatting is unsuccessful, a failing UErrorCode is returned when
+ * the Format cannot format the type of object, otherwise if there is
+ * something illformed about the the Unicode replacement character
+ * 0xFFFD is returned.
+ * <P>
+ * If there is no match when parsing, a parse failure UErrorCode is
+ * retured for methods which take no ParsePosition.  For the method
+ * that takes a ParsePosition, the index parameter is left unchanged.
+ * <P>
+ * <em>User subclasses are not supported.</em> While clients may write
+ * subclasses, such code will not necessarily work and will not be
+ * guaranteed to work stably from release to release.
+ */
+class U_I18N_API Format : public UObject {
+public:
+
+    /** Destructor
+     * @stable ICU 2.4
+     */
+    virtual ~Format();
+
+    /**
+     * Return true if the given Format objects are semantically equal.
+     * Objects of different subclasses are considered unequal.
+     * @param other    the object to be compared with.
+     * @return         Return true if the given Format objects are semantically equal.
+     *                 Objects of different subclasses are considered unequal.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const Format& other) const = 0;
+
+    /**
+     * Return true if the given Format objects are not semantically
+     * equal.
+     * @param other    the object to be compared with.
+     * @return         Return true if the given Format objects are not semantically.
+     * @stable ICU 2.0
+     */
+    UBool operator!=(const Format& other) const { return !operator==(other); }
+
+    /**
+     * Clone this object polymorphically.  The caller is responsible
+     * for deleting the result when done.
+     * @return    A copy of the object
+     * @stable ICU 2.0
+     */
+    virtual Format* clone() const = 0;
+
+    /**
+     * Formats an object to produce a string.
+     *
+     * @param obj       The object to format.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param status    Output parameter filled in with success or failure status.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(const Formattable& obj,
+                          UnicodeString& appendTo,
+                          UErrorCode& status) const;
+
+    /**
+     * Format an object to produce a string.  This is a pure virtual method which
+     * subclasses must implement. This method allows polymorphic formatting
+     * of Formattable objects. If a subclass of Format receives a Formattable
+     * object type it doesn't handle (e.g., if a numeric Formattable is passed
+     * to a DateFormat object) then it returns a failing UErrorCode.
+     *
+     * @param obj       The object to format.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @param status    Output param filled with success/failure status.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& format(const Formattable& obj,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos,
+                                  UErrorCode& status) const = 0;
+
+    /**
+     * Parse a string to produce an object.  This is a pure virtual
+     * method which subclasses must implement.  This method allows
+     * polymorphic parsing of strings into Formattable objects.
+     * <P>
+     * Before calling, set parse_pos.index to the offset you want to
+     * start parsing at in the source.  After calling, parse_pos.index
+     * is the end of the text you parsed.  If error occurs, index is
+     * unchanged.
+     * <P>
+     * When parsing, leading whitespace is discarded (with successful
+     * parse), while trailing whitespace is left as is.
+     * <P>
+     * Example:
+     * <P>
+     * Parsing "_12_xy" (where _ represents a space) for a number,
+     * with index == 0 will result in the number 12, with
+     * parse_pos.index updated to 3 (just before the second space).
+     * Parsing a second time will result in a failing UErrorCode since
+     * "xy" is not a number, and leave index at 3.
+     * <P>
+     * Subclasses will typically supply specific parse methods that
+     * return different types of values. Since methods can't overload
+     * on return types, these will typically be named "parse", while
+     * this polymorphic method will always be called parseObject.  Any
+     * parse method that does not take a parse_pos should set status
+     * to an error value when no text in the required format is at the
+     * start position.
+     *
+     * @param source    The string to be parsed into an object.
+     * @param result    Formattable to be set to the parse result.
+     *                  If parse fails, return contents are undefined.
+     * @param parse_pos The position to start parsing at. Upon return
+     *                  this param is set to the position after the
+     *                  last character successfully parsed. If the
+     *                  source is not parsed successfully, this param
+     *                  will remain unchanged.
+     * @stable ICU 2.0
+     */
+    virtual void parseObject(const UnicodeString& source,
+                             Formattable& result,
+                             ParsePosition& parse_pos) const = 0;
+
+    /**
+     * Parses a string to produce an object. This is a convenience method
+     * which calls the pure virtual parseObject() method, and returns a
+     * failure UErrorCode if the ParsePosition indicates failure.
+     *
+     * @param source    The string to be parsed into an object.
+     * @param result    Formattable to be set to the parse result.
+     *                  If parse fails, return contents are undefined.
+     * @param status    Output param to be filled with success/failure
+     *                  result code.
+     * @stable ICU 2.0
+     */
+    void parseObject(const UnicodeString& source,
+                     Formattable& result,
+                     UErrorCode& status) const;
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual method.
+     * This method is to implement a simple version of RTTI, since not all
+     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
+     * clone() methods call this method.
+     * Concrete subclasses of Format must implement getDynamicClassID()
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID() const = 0;
+
+    /** Get the locale for this format object. You can choose between valid and actual locale.
+     *  @param type type of the locale we're looking for (valid or actual) 
+     *  @param status error code for the operation
+     *  @return the locale
+     *  @stable ICU 2.8
+     */
+    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+
+    /** Get the locale for this format object. You can choose between valid and actual locale.
+     *  @param type type of the locale we're looking for (valid or actual) 
+     *  @param status error code for the operation
+     *  @return the locale
+     *  @internal
+     */
+    const char* getLocaleID(ULocDataLocaleType type, UErrorCode &status) const;
+
+ protected:
+    /** @stable ICU 2.8 */
+    void setLocaleIDs(const char* valid, const char* actual);
+
+protected:
+    /**
+     * Default constructor for subclass use only.  Does nothing.
+     * @stable ICU 2.0
+     */
+    Format();
+
+    /**
+     * @stable ICU 2.0
+     */
+    Format(const Format&); // Does nothing; for subclasses only
+
+    /**
+     * @stable ICU 2.0
+     */
+    Format& operator=(const Format&); // Does nothing; for subclasses
+
+       
+    /**
+     * Simple function for initializing a UParseError from a UnicodeString.
+     *
+     * @param pattern The pattern to copy into the parseError
+     * @param pos The position in pattern where the error occured
+     * @param parseError The UParseError object to fill in
+     * @stable ICU 2.4
+     */
+    static void syntaxError(const UnicodeString& pattern,
+                            int32_t pos,
+                            UParseError& parseError);
+
+ private:
+    char actualLocale[ULOC_FULLNAME_CAPACITY];
+    char validLocale[ULOC_FULLNAME_CAPACITY];
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _FORMAT
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/gregocal.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/gregocal.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/gregocal.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,823 +0,0 @@
-/*
-* Copyright (C) 1997-2006, International Business Machines Corporation and others.
-* All Rights Reserved.
-********************************************************************************
-*
-* File GREGOCAL.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   04/22/97    aliu        Overhauled header.
-*    07/28/98    stephen        Sync with JDK 1.2
-*    09/04/98    stephen        Re-sync with JDK 8/31 putback
-*    09/14/98    stephen        Changed type of kOneDay, kOneWeek to double.
-*                            Fixed bug in roll()
-*   10/15/99    aliu        Fixed j31, incorrect WEEK_OF_YEAR computation.
-*                           Added documentation of WEEK_OF_YEAR computation.
-*   10/15/99    aliu        Fixed j32, cannot set date to Feb 29 2000 AD.
-*                           {JDK bug 4210209 4209272}
-*   11/07/2003  srl         Update, clean up documentation.
-********************************************************************************
-*/
-
-#ifndef GREGOCAL_H
-#define GREGOCAL_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/calendar.h"
-
-/**
- * \file 
- * \brief C++ API: Concrete class which provides the standard calendar.
- */
-
-U_NAMESPACE_BEGIN
-
-/** 
- * Concrete class which provides the standard calendar used by most of the world.
- * <P>
- * The standard (Gregorian) calendar has 2 eras, BC and AD.
- * <P>
- * This implementation handles a single discontinuity, which corresponds by default to
- * the date the Gregorian calendar was originally instituted (October 15, 1582). Not all
- * countries adopted the Gregorian calendar then, so this cutover date may be changed by
- * the caller.
- * <P>
- * Prior to the institution of the Gregorian Calendar, New Year's Day was March 25. To
- * avoid confusion, this Calendar always uses January 1. A manual adjustment may be made
- * if desired for dates that are prior to the Gregorian changeover and which fall
- * between January 1 and March 24.
- *
- * <p>Values calculated for the <code>WEEK_OF_YEAR</code> field range from 1 to
- * 53.  Week 1 for a year is the first week that contains at least
- * <code>getMinimalDaysInFirstWeek()</code> days from that year.  It thus
- * depends on the values of <code>getMinimalDaysInFirstWeek()</code>,
- * <code>getFirstDayOfWeek()</code>, and the day of the week of January 1.
- * Weeks between week 1 of one year and week 1 of the following year are
- * numbered sequentially from 2 to 52 or 53 (as needed).
- *
- * <p>For example, January 1, 1998 was a Thursday.  If
- * <code>getFirstDayOfWeek()</code> is <code>MONDAY</code> and
- * <code>getMinimalDaysInFirstWeek()</code> is 4 (these are the values
- * reflecting ISO 8601 and many national standards), then week 1 of 1998 starts
- * on December 29, 1997, and ends on January 4, 1998.  If, however,
- * <code>getFirstDayOfWeek()</code> is <code>SUNDAY</code>, then week 1 of 1998
- * starts on January 4, 1998, and ends on January 10, 1998; the first three days
- * of 1998 then are part of week 53 of 1997.
- *
- * <p>Example for using GregorianCalendar:
- * <pre>
- * \code
- *     // get the supported ids for GMT-08:00 (Pacific Standard Time)
- *     UErrorCode success = U_ZERO_ERROR;
- *     const StringEnumeration *ids = TimeZone::createEnumeration(-8 * 60 * 60 * 1000);
- *     // if no ids were returned, something is wrong. get out.
- *     if (ids == 0 || ids->count(success) == 0) {
- *         return;
- *     }
- *
- *     // begin output
- *     cout << "Current Time" << endl;
- *
- *     // create a Pacific Standard Time time zone
- *     SimpleTimeZone* pdt = new SimpleTimeZone(-8 * 60 * 60 * 1000, ids->unext(NULL, success)));
- *
- *     // set up rules for daylight savings time
- *     pdt->setStartRule(Calendar::APRIL, 1, Calendar::SUNDAY, 2 * 60 * 60 * 1000);
- *     pdt->setEndRule(Calendar::OCTOBER, -1, Calendar::SUNDAY, 2 * 60 * 60 * 1000);
- *
- *     // create a GregorianCalendar with the Pacific Daylight time zone
- *     // and the current date and time
- *     Calendar* calendar = new GregorianCalendar( pdt, success );
- *
- *     // print out a bunch of interesting things
- *     cout << "ERA: " << calendar->get( Calendar::ERA, success ) << endl;
- *     cout << "YEAR: " << calendar->get( Calendar::YEAR, success ) << endl;
- *     cout << "MONTH: " << calendar->get( Calendar::MONTH, success ) << endl;
- *     cout << "WEEK_OF_YEAR: " << calendar->get( Calendar::WEEK_OF_YEAR, success ) << endl;
- *     cout << "WEEK_OF_MONTH: " << calendar->get( Calendar::WEEK_OF_MONTH, success ) << endl;
- *     cout << "DATE: " << calendar->get( Calendar::DATE, success ) << endl;
- *     cout << "DAY_OF_MONTH: " << calendar->get( Calendar::DAY_OF_MONTH, success ) << endl;
- *     cout << "DAY_OF_YEAR: " << calendar->get( Calendar::DAY_OF_YEAR, success ) << endl;
- *     cout << "DAY_OF_WEEK: " << calendar->get( Calendar::DAY_OF_WEEK, success ) << endl;
- *     cout << "DAY_OF_WEEK_IN_MONTH: " << calendar->get( Calendar::DAY_OF_WEEK_IN_MONTH, success ) << endl;
- *     cout << "AM_PM: " << calendar->get( Calendar::AM_PM, success ) << endl;
- *     cout << "HOUR: " << calendar->get( Calendar::HOUR, success ) << endl;
- *     cout << "HOUR_OF_DAY: " << calendar->get( Calendar::HOUR_OF_DAY, success ) << endl;
- *     cout << "MINUTE: " << calendar->get( Calendar::MINUTE, success ) << endl;
- *     cout << "SECOND: " << calendar->get( Calendar::SECOND, success ) << endl;
- *     cout << "MILLISECOND: " << calendar->get( Calendar::MILLISECOND, success ) << endl;
- *     cout << "ZONE_OFFSET: " << (calendar->get( Calendar::ZONE_OFFSET, success )/(60*60*1000)) << endl;
- *     cout << "DST_OFFSET: " << (calendar->get( Calendar::DST_OFFSET, success )/(60*60*1000)) << endl;
- *
- *     cout << "Current Time, with hour reset to 3" << endl;
- *     calendar->clear(Calendar::HOUR_OF_DAY); // so doesn't override
- *     calendar->set(Calendar::HOUR, 3);
- *     cout << "ERA: " << calendar->get( Calendar::ERA, success ) << endl;
- *     cout << "YEAR: " << calendar->get( Calendar::YEAR, success ) << endl;
- *     cout << "MONTH: " << calendar->get( Calendar::MONTH, success ) << endl;
- *     cout << "WEEK_OF_YEAR: " << calendar->get( Calendar::WEEK_OF_YEAR, success ) << endl;
- *     cout << "WEEK_OF_MONTH: " << calendar->get( Calendar::WEEK_OF_MONTH, success ) << endl;
- *     cout << "DATE: " << calendar->get( Calendar::DATE, success ) << endl;
- *     cout << "DAY_OF_MONTH: " << calendar->get( Calendar::DAY_OF_MONTH, success ) << endl;
- *     cout << "DAY_OF_YEAR: " << calendar->get( Calendar::DAY_OF_YEAR, success ) << endl;
- *     cout << "DAY_OF_WEEK: " << calendar->get( Calendar::DAY_OF_WEEK, success ) << endl;
- *     cout << "DAY_OF_WEEK_IN_MONTH: " << calendar->get( Calendar::DAY_OF_WEEK_IN_MONTH, success ) << endl;
- *     cout << "AM_PM: " << calendar->get( Calendar::AM_PM, success ) << endl;
- *     cout << "HOUR: " << calendar->get( Calendar::HOUR, success ) << endl;
- *     cout << "HOUR_OF_DAY: " << calendar->get( Calendar::HOUR_OF_DAY, success ) << endl;
- *     cout << "MINUTE: " << calendar->get( Calendar::MINUTE, success ) << endl;
- *     cout << "SECOND: " << calendar->get( Calendar::SECOND, success ) << endl;
- *     cout << "MILLISECOND: " << calendar->get( Calendar::MILLISECOND, success ) << endl;
- *     cout << "ZONE_OFFSET: " << (calendar->get( Calendar::ZONE_OFFSET, success )/(60*60*1000)) << endl; // in hours
- *     cout << "DST_OFFSET: " << (calendar->get( Calendar::DST_OFFSET, success )/(60*60*1000)) << endl; // in hours
- *
- *     if (U_FAILURE(success)) {
- *         cout << "An error occured. success=" << u_errorName(success) << endl;
- *     }
- *
- *     delete ids;
- *     delete calendar; // also deletes pdt
- * \endcode
- * </pre>
- * @stable ICU 2.0
- */
-class U_I18N_API GregorianCalendar: public Calendar {
-public:
-
-    /**
-     * Useful constants for GregorianCalendar and TimeZone.
-     * @stable ICU 2.0
-     */
-    enum EEras {
-        BC,
-        AD
-    };
-
-    /**
-     * Constructs a default GregorianCalendar using the current time in the default time
-     * zone with the default locale.
-     *
-     * @param success  Indicates the status of GregorianCalendar object construction.
-     *                 Returns U_ZERO_ERROR if constructed successfully.
-     * @stable ICU 2.0
-     */
-    GregorianCalendar(UErrorCode& success);
-
-    /**
-     * Constructs a GregorianCalendar based on the current time in the given time zone
-     * with the default locale. Clients are no longer responsible for deleting the given
-     * time zone object after it's adopted.
-     *
-     * @param zoneToAdopt     The given timezone.
-     * @param success  Indicates the status of GregorianCalendar object construction.
-     *                 Returns U_ZERO_ERROR if constructed successfully.
-     * @stable ICU 2.0
-     */
-    GregorianCalendar(TimeZone* zoneToAdopt, UErrorCode& success);
-
-    /**
-     * Constructs a GregorianCalendar based on the current time in the given time zone
-     * with the default locale.
-     *
-     * @param zone     The given timezone.
-     * @param success  Indicates the status of GregorianCalendar object construction.
-     *                 Returns U_ZERO_ERROR if constructed successfully.
-     * @stable ICU 2.0
-     */
-    GregorianCalendar(const TimeZone& zone, UErrorCode& success);
-
-    /**
-     * Constructs a GregorianCalendar based on the current time in the default time zone
-     * with the given locale.
-     *
-     * @param aLocale  The given locale.
-     * @param success  Indicates the status of GregorianCalendar object construction.
-     *                 Returns U_ZERO_ERROR if constructed successfully.
-     * @stable ICU 2.0
-     */
-    GregorianCalendar(const Locale& aLocale, UErrorCode& success);
-
-    /**
-     * Constructs a GregorianCalendar based on the current time in the given time zone
-     * with the given locale. Clients are no longer responsible for deleting the given
-     * time zone object after it's adopted.
-     *
-     * @param zoneToAdopt     The given timezone.
-     * @param aLocale  The given locale.
-     * @param success  Indicates the status of GregorianCalendar object construction.
-     *                 Returns U_ZERO_ERROR if constructed successfully.
-     * @stable ICU 2.0
-     */
-    GregorianCalendar(TimeZone* zoneToAdopt, const Locale& aLocale, UErrorCode& success);
-
-    /**
-     * Constructs a GregorianCalendar based on the current time in the given time zone
-     * with the given locale.
-     *
-     * @param zone     The given timezone.
-     * @param aLocale  The given locale.
-     * @param success  Indicates the status of GregorianCalendar object construction.
-     *                 Returns U_ZERO_ERROR if constructed successfully.
-     * @stable ICU 2.0
-     */
-    GregorianCalendar(const TimeZone& zone, const Locale& aLocale, UErrorCode& success);
-
-    /**
-     * Constructs a GregorianCalendar with the given AD date set in the default time
-     * zone with the default locale.
-     *
-     * @param year     The value used to set the YEAR time field in the calendar.
-     * @param month    The value used to set the MONTH time field in the calendar. Month
-     *                 value is 0-based. e.g., 0 for January.
-     * @param date     The value used to set the DATE time field in the calendar.
-     * @param success  Indicates the status of GregorianCalendar object construction.
-     *                 Returns U_ZERO_ERROR if constructed successfully.
-     * @stable ICU 2.0
-     */
-    GregorianCalendar(int32_t year, int32_t month, int32_t date, UErrorCode& success);
-
-    /**
-     * Constructs a GregorianCalendar with the given AD date and time set for the
-     * default time zone with the default locale.
-     *
-     * @param year     The value used to set the YEAR time field in the calendar.
-     * @param month    The value used to set the MONTH time field in the calendar. Month
-     *                 value is 0-based. e.g., 0 for January.
-     * @param date     The value used to set the DATE time field in the calendar.
-     * @param hour     The value used to set the HOUR_OF_DAY time field in the calendar.
-     * @param minute   The value used to set the MINUTE time field in the calendar.
-     * @param success  Indicates the status of GregorianCalendar object construction.
-     *                 Returns U_ZERO_ERROR if constructed successfully.
-     * @stable ICU 2.0
-     */
-    GregorianCalendar(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute, UErrorCode& success);
-
-    /**
-     * Constructs a GregorianCalendar with the given AD date and time set for the
-     * default time zone with the default locale.
-     *
-     * @param year     The value used to set the YEAR time field in the calendar.
-     * @param month    The value used to set the MONTH time field in the calendar. Month
-     *                 value is 0-based. e.g., 0 for January.
-     * @param date     The value used to set the DATE time field in the calendar.
-     * @param hour     The value used to set the HOUR_OF_DAY time field in the calendar.
-     * @param minute   The value used to set the MINUTE time field in the calendar.
-     * @param second   The value used to set the SECOND time field in the calendar.
-     * @param success  Indicates the status of GregorianCalendar object construction.
-     *                 Returns U_ZERO_ERROR if constructed successfully.
-     * @stable ICU 2.0
-     */
-    GregorianCalendar(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute, int32_t second, UErrorCode& success);
-
-    /**
-     * Destructor
-     * @stable ICU 2.0
-     */
-    virtual ~GregorianCalendar();
-
-    /**
-     * Copy constructor
-     * @param source    the object to be copied.
-     * @stable ICU 2.0
-     */
-    GregorianCalendar(const GregorianCalendar& source);
-
-    /**
-     * Default assignment operator
-     * @param right    the object to be copied.
-     * @stable ICU 2.0
-     */
-    GregorianCalendar& operator=(const GregorianCalendar& right);
-
-    /**
-     * Create and return a polymorphic copy of this calendar.
-     * @return    return a polymorphic copy of this calendar.
-     * @stable ICU 2.0
-     */
-    virtual Calendar* clone(void) const;
-
-    /**
-     * Sets the GregorianCalendar change date. This is the point when the switch from
-     * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October
-     * 15, 1582. Previous to this time and date will be Julian dates.
-     *
-     * @param date     The given Gregorian cutover date.
-     * @param success  Output param set to success/failure code on exit.
-     * @stable ICU 2.0
-     */
-    void setGregorianChange(UDate date, UErrorCode& success);
-
-    /**
-     * Gets the Gregorian Calendar change date. This is the point when the switch from
-     * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October
-     * 15, 1582. Previous to this time and date will be Julian dates.
-     *
-     * @return   The Gregorian cutover time for this calendar.
-     * @stable ICU 2.0
-     */
-    UDate getGregorianChange(void) const;
-
-    /**
-     * Return true if the given year is a leap year. Determination of whether a year is
-     * a leap year is actually very complicated. We do something crude and mostly
-     * correct here, but for a real determination you need a lot of contextual
-     * information. For example, in Sweden, the change from Julian to Gregorian happened
-     * in a complex way resulting in missed leap years and double leap years between
-     * 1700 and 1753. Another example is that after the start of the Julian calendar in
-     * 45 B.C., the leap years did not regularize until 8 A.D. This method ignores these
-     * quirks, and pays attention only to the Julian onset date and the Gregorian
-     * cutover (which can be changed).
-     *
-     * @param year  The given year.
-     * @return      True if the given year is a leap year; false otherwise.
-     * @stable ICU 2.0
-     */
-    UBool isLeapYear(int32_t year) const;
-
-    /**
-     * Returns TRUE if the given Calendar object is equivalent to this
-     * one.  Calendar override.
-     *
-     * @param other the Calendar to be compared with this Calendar   
-     * @stable ICU 2.4
-     */
-    virtual UBool isEquivalentTo(const Calendar& other) const;
-
-    /**
-     * (Overrides Calendar) Rolls up or down by the given amount in the specified field.
-     * For more information, see the documentation for Calendar::roll().
-     *
-     * @param field   The time field.
-     * @param amount  Indicates amount to roll.
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid, this will be set to
-     *                an error status.
-     * @deprecated ICU 2.6. Use roll(UCalendarDateFields field, int32_t amount, UErrorCode& status) instead.
-     */
-    virtual void roll(EDateFields field, int32_t amount, UErrorCode& status);
-
-    /**
-     * (Overrides Calendar) Rolls up or down by the given amount in the specified field.
-     * For more information, see the documentation for Calendar::roll().
-     *
-     * @param field   The time field.
-     * @param amount  Indicates amount to roll.
-     * @param status  Output param set to success/failure code on exit. If any value
-     *                previously set in the time field is invalid, this will be set to
-     *                an error status.
-     * @stable ICU 2.6.
-     */
-    virtual void roll(UCalendarDateFields field, int32_t amount, UErrorCode& status);
-
-    /**
-     * Return the minimum value that this field could have, given the current date.
-     * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum().
-     * @param field    the time field.
-     * @return         the minimum value that this field could have, given the current date.
-     * @deprecated ICU 2.6. Use getActualMinimum(UCalendarDateFields field) instead.
-     */
-    int32_t getActualMinimum(EDateFields field) const;
-
-    /**
-     * Return the minimum value that this field could have, given the current date.
-     * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum().
-     * @param field    the time field.
-     * @param status
-     * @return         the minimum value that this field could have, given the current date.
-     * @deprecated ICU 2.6. Use getActualMinimum(UCalendarDateFields field) instead. (Added to ICU 3.0 for signature consistency)
-     */
-    int32_t getActualMinimum(EDateFields field, UErrorCode& status) const;
-
-    /**
-     * Return the minimum value that this field could have, given the current date.
-     * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum().
-     * @param field    the time field.
-     * @param status   error result.
-     * @return         the minimum value that this field could have, given the current date.
-     * @stable ICU 3.0
-     */
-    int32_t getActualMinimum(UCalendarDateFields field, UErrorCode &status) const;
-
-    /**
-     * Return the maximum value that this field could have, given the current date.
-     * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual
-     * maximum would be 28; for "Feb 3, 1996" it s 29.  Similarly for a Hebrew calendar,
-     * for some years the actual maximum for MONTH is 12, and for others 13.
-     * @param field    the time field.
-     * @return         the maximum value that this field could have, given the current date.
-     * @deprecated ICU 2.6. Use getActualMaximum(UCalendarDateFields field) instead.
-     */
-    int32_t getActualMaximum(EDateFields field) const;
-
-    /**
-     * Return the maximum value that this field could have, given the current date.
-     * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual
-     * maximum would be 28; for "Feb 3, 1996" it s 29.  Similarly for a Hebrew calendar,
-     * for some years the actual maximum for MONTH is 12, and for others 13.
-     * @param field    the time field.
-     * @param status   returns any errors that may result from this function call.
-     * @return         the maximum value that this field could have, given the current date.
-     * @stable ICU 2.6
-     */
-    virtual int32_t getActualMaximum(UCalendarDateFields field, UErrorCode& status) const;
-
-    /**
-     * (Overrides Calendar) Return true if the current date for this Calendar is in
-     * Daylight Savings Time. Recognizes DST_OFFSET, if it is set.
-     *
-     * @param status Fill-in parameter which receives the status of this operation.
-     * @return   True if the current date for this Calendar is in Daylight Savings Time,
-     *           false, otherwise.
-     * @stable ICU 2.0
-     */
-    virtual UBool inDaylightTime(UErrorCode& status) const;
-
-public:
-
-    /**
-     * Override Calendar Returns a unique class ID POLYMORPHICALLY. Pure virtual
-     * override. This method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone() methods call
-     * this method.
-     *
-     * @return   The class ID for this object. All objects of a given class have the
-     *           same class ID. Objects of other classes have different class IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-
-    /**
-     * Return the class ID for this class. This is useful only for comparing to a return
-     * value from getDynamicClassID(). For example:
-     *
-     *      Base* polymorphic_pointer = createPolymorphicObject();
-     *      if (polymorphic_pointer->getDynamicClassID() ==
-     *          Derived::getStaticClassID()) ...
-     *
-     * @return   The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Get the calendar type, "gregorian", for use in DateFormatSymbols.
-     *
-     * @return calendar type
-     * @internal
-     */
-    virtual const char * getType() const;
-
-protected:
-
-    /**
-     * (Overrides Calendar) Converts GMT as milliseconds to time field values.
-     * @param status Fill-in parameter which receives the status of this operation.
-     * @stable ICU 2.0
-     */
-
- private:
-    GregorianCalendar(); // default constructor not implemented
-
- protected:
-    /**
-     * Return the ERA.  We need a special method for this because the
-     * default ERA is AD, but a zero (unset) ERA is BC.
-     * @return    the ERA.
-     * @internal
-     */
-    virtual int32_t internalGetEra() const;
-
-    /**
-     * Return the Julian day number of day before the first day of the
-     * given month in the given extended year.  Subclasses should override
-     * this method to implement their calendar system.
-     * @param eyear the extended year
-     * @param month the zero-based month, or 0 if useMonth is false
-     * @param useMonth if false, compute the day before the first day of
-     * the given year, otherwise, compute the day before the first day of
-     * the given month
-     * @return the Julian day number of the day before the first
-     * day of the given month and year
-     * @internal
-     */
-    virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month,
-                                                   UBool useMonth) const;
-
-    /**
-     * Subclasses may override this.  This method calls
-     * handleGetMonthLength() to obtain the calendar-specific month
-     * length.
-     * @param bestField which field to use to calculate the date 
-     * @return julian day specified by calendar fields.
-     * @internal
-     */
-    virtual int32_t handleComputeJulianDay(UCalendarDateFields bestField)  ;
-
-    /**
-     * Return the number of days in the given month of the given extended
-     * year of this calendar system.  Subclasses should override this
-     * method if they can provide a more correct or more efficient
-     * implementation than the default implementation in Calendar.
-     * @internal
-     */
-    virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const;
-
-    /**
-     * Return the number of days in the given extended year of this
-     * calendar system.  Subclasses should override this method if they can
-     * provide a more correct or more efficient implementation than the
-     * default implementation in Calendar.
-     * @stable ICU 2.0
-     */
-    virtual int32_t handleGetYearLength(int32_t eyear) const;
-
-    /**
-     * return the length of the given month.
-     * @param month    the given month.
-     * @return    the length of the given month.
-     * @internal
-     */
-    virtual int32_t monthLength(int32_t month) const;
-
-    /**
-     * return the length of the month according to the given year.
-     * @param month    the given month.
-     * @param year     the given year.
-     * @return         the length of the month
-     * @internal
-     */
-    virtual int32_t monthLength(int32_t month, int32_t year) const;
-    
-    /**
-     * return the length of the given year.
-     * @param year    the given year.
-     * @return        the length of the given year.
-     * @internal
-     */
-    int32_t yearLength(int32_t year) const;
-    
-    /**
-     * return the length of the year field.
-     * @return    the length of the year field
-     * @internal
-     */
-    int32_t yearLength(void) const;
-
-    /**
-     * After adjustments such as add(MONTH), add(YEAR), we don't want the
-     * month to jump around.  E.g., we don't want Jan 31 + 1 month to go to Mar
-     * 3, we want it to go to Feb 28.  Adjustments which might run into this
-     * problem call this method to retain the proper month.
-     * @internal
-     */
-    void pinDayOfMonth(void);
-
-    /**
-     * Return the day number with respect to the epoch.  January 1, 1970 (Gregorian)
-     * is day zero.
-     * @param status Fill-in parameter which receives the status of this operation.
-     * @return       the day number with respect to the epoch.  
-     * @internal
-     */
-    virtual UDate getEpochDay(UErrorCode& status);
-
-    /**
-     * Subclass API for defining limits of different types.
-     * Subclasses must implement this method to return limits for the
-     * following fields:
-     *
-     * <pre>UCAL_ERA
-     * UCAL_YEAR
-     * UCAL_MONTH
-     * UCAL_WEEK_OF_YEAR
-     * UCAL_WEEK_OF_MONTH
-     * UCAL_DATE (DAY_OF_MONTH on Java)
-     * UCAL_DAY_OF_YEAR
-     * UCAL_DAY_OF_WEEK_IN_MONTH
-     * UCAL_YEAR_WOY
-     * UCAL_EXTENDED_YEAR</pre>
-     *
-     * @param field one of the above field numbers
-     * @param limitType one of <code>MINIMUM</code>, <code>GREATEST_MINIMUM</code>,
-     * <code>LEAST_MAXIMUM</code>, or <code>MAXIMUM</code>
-     * @internal
-     */
-    virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const;
-
-    /**
-     * Return the extended year defined by the current fields.  This will
-     * use the UCAL_EXTENDED_YEAR field or the UCAL_YEAR and supra-year fields (such
-     * as UCAL_ERA) specific to the calendar system, depending on which set of
-     * fields is newer.
-     * @return the extended year
-     * @internal
-     */
-    virtual int32_t handleGetExtendedYear();
-
-    /** 
-     * Subclasses may override this to convert from week fields 
-     * (YEAR_WOY and WEEK_OF_YEAR) to an extended year in the case
-     * where YEAR, EXTENDED_YEAR are not set.
-     * The Gregorian implementation assumes a yearWoy in gregorian format, according to the current era.
-     * @return the extended year, UCAL_EXTENDED_YEAR
-     * @internal
-     */
-    virtual int32_t handleGetExtendedYearFromWeekFields(int32_t yearWoy, int32_t woy);
-
-
-    /**
-     * Subclasses may override this method to compute several fields
-     * specific to each calendar system.  These are:
-     *
-     * <ul><li>ERA
-     * <li>YEAR
-     * <li>MONTH
-     * <li>DAY_OF_MONTH
-     * <li>DAY_OF_YEAR
-     * <li>EXTENDED_YEAR</ul>
-     *
-     * <p>The GregorianCalendar implementation implements
-     * a calendar with the specified Julian/Gregorian cutover date.
-     * @internal
-     */
-    virtual void handleComputeFields(int32_t julianDay, UErrorCode &status);
-
- private:
-    /**
-     * Compute the julian day number of the given year.
-     * @param isGregorian    if true, using Gregorian calendar, otherwise using Julian calendar
-     * @param year           the given year.
-     * @param isLeap         true if the year is a leap year.       
-     * @return 
-     */
-    static double computeJulianDayOfYear(UBool isGregorian, int32_t year,
-                                         UBool& isLeap);
-    
-    /**
-     * Validates the values of the set time fields.  True if they're all valid.
-     * @return    True if the set time fields are all valid.
-     */
-    UBool validateFields(void) const;
-
-    /**
-     * Validates the value of the given time field.  True if it's valid.
-     */
-    UBool boundsCheck(int32_t value, UCalendarDateFields field) const;
-
-    /**
-     * Return the pseudo-time-stamp for two fields, given their
-     * individual pseudo-time-stamps.  If either of the fields
-     * is unset, then the aggregate is unset.  Otherwise, the
-     * aggregate is the later of the two stamps.
-     * @param stamp_a    One given field.
-     * @param stamp_b    Another given field.
-     * @return the pseudo-time-stamp for two fields
-     */
-    int32_t aggregateStamp(int32_t stamp_a, int32_t stamp_b);
-
-    /**
-     * The point at which the Gregorian calendar rules are used, measured in
-     * milliseconds from the standard epoch.  Default is October 15, 1582
-     * (Gregorian) 00:00:00 UTC, that is, October 4, 1582 (Julian) is followed
-     * by October 15, 1582 (Gregorian).  This corresponds to Julian day number
-     * 2299161. This is measured from the standard epoch, not in Julian Days.
-     * @internal
-     */
-    UDate                fGregorianCutover;
-
-    /**
-     * Julian day number of the Gregorian cutover
-     */
-    int32_t             fCutoverJulianDay;
-
-    /**
-     * Midnight, local time (using this Calendar's TimeZone) at or before the
-     * gregorianCutover. This is a pure date value with no time of day or
-     * timezone component.
-     */
-    UDate                 fNormalizedGregorianCutover;// = gregorianCutover;
-
-    /**
-     * The year of the gregorianCutover, with 0 representing
-     * 1 BC, -1 representing 2 BC, etc.
-     */
-    int32_t fGregorianCutoverYear;// = 1582;
-
-    /**
-     * The year of the gregorianCutover, with 0 representing
-     * 1 BC, -1 representing 2 BC, etc.
-     */
-    int32_t fGregorianCutoverJulianDay;// = 2299161;
-
-    /**
-     * Converts time as milliseconds to Julian date. The Julian date used here is not a
-     * true Julian date, since it is measured from midnight, not noon.
-     *
-     * @param millis  The given milliseconds.
-     * @return        The Julian date number.
-     */
-    static double millisToJulianDay(UDate millis);
-
-    /**
-     * Converts Julian date to time as milliseconds. The Julian date used here is not a
-     * true Julian date, since it is measured from midnight, not noon.
-     *
-     * @param julian  The given Julian date number.
-     * @return        Time as milliseconds.
-     */
-    static UDate julianDayToMillis(double julian);
-
-    /**
-     * Used by handleComputeJulianDay() and handleComputeMonthStart().
-     * Temporary field indicating whether the calendar is currently Gregorian as opposed to Julian.
-     */
-    UBool fIsGregorian;
-
-    /**
-     * Used by handleComputeJulianDay() and handleComputeMonthStart().
-     * Temporary field indicating that the sense of the gregorian cutover should be inverted
-     * to handle certain calculations on and around the cutover date.
-     */
-    UBool fInvertGregorian;
-
-
- public: // internal implementation
-
-    /**
-     * @internal 
-     * @return TRUE if this calendar has the notion of a default century
-     */
-    virtual UBool haveDefaultCentury() const;
-
-    /**
-     * @internal
-     * @return the start of the default century
-     */
-    virtual UDate defaultCenturyStart() const;
-
-    /**
-     * @internal 
-     * @return the beginning year of the default century
-     */
-    virtual int32_t defaultCenturyStartYear() const;
-
- private:
-    /**
-     * The system maintains a static default century start date.  This is initialized
-     * the first time it is used.  Before then, it is set to SYSTEM_DEFAULT_CENTURY to
-     * indicate an uninitialized state.  Once the system default century date and year
-     * are set, they do not change.
-     */
-    static UDate         fgSystemDefaultCenturyStart;
-
-    /**
-     * See documentation for systemDefaultCenturyStart.
-     */
-    static int32_t          fgSystemDefaultCenturyStartYear;
-
-    /**
-     * Default value that indicates the defaultCenturyStartYear is unitialized
-     */
-    static const int32_t    fgSystemDefaultCenturyYear;
-
-    /**
-     * Default value that indicates the UDate of the beginning of the system default century
-     */
-    static const UDate        fgSystemDefaultCentury;
-
-    /**
-     * Returns the beginning date of the 100-year window that dates with 2-digit years
-     * are considered to fall within.
-     * @return    the beginning date of the 100-year window that dates with 2-digit years
-     *            are considered to fall within.
-     */
-    UDate         internalGetDefaultCenturyStart(void) const;
-
-    /**
-     * Returns the first year of the 100-year window that dates with 2-digit years
-     * are considered to fall within.
-     * @return    the first year of the 100-year window that dates with 2-digit years
-     *            are considered to fall within.
-     */
-    int32_t          internalGetDefaultCenturyStartYear(void) const;
-
-    /**
-     * Initializes the 100-year window that dates with 2-digit years are considered
-     * to fall within so that its start date is 80 years before the current time.
-     */
-    static void  initializeSystemDefaultCentury(void);
-
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _GREGOCAL
-//eof
-

Copied: MacRuby/trunk/icu-1060/unicode/gregocal.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/gregocal.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/gregocal.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/gregocal.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,823 @@
+/*
+* Copyright (C) 1997-2006, International Business Machines Corporation and others.
+* All Rights Reserved.
+********************************************************************************
+*
+* File GREGOCAL.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   04/22/97    aliu        Overhauled header.
+*    07/28/98    stephen        Sync with JDK 1.2
+*    09/04/98    stephen        Re-sync with JDK 8/31 putback
+*    09/14/98    stephen        Changed type of kOneDay, kOneWeek to double.
+*                            Fixed bug in roll()
+*   10/15/99    aliu        Fixed j31, incorrect WEEK_OF_YEAR computation.
+*                           Added documentation of WEEK_OF_YEAR computation.
+*   10/15/99    aliu        Fixed j32, cannot set date to Feb 29 2000 AD.
+*                           {JDK bug 4210209 4209272}
+*   11/07/2003  srl         Update, clean up documentation.
+********************************************************************************
+*/
+
+#ifndef GREGOCAL_H
+#define GREGOCAL_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/calendar.h"
+
+/**
+ * \file 
+ * \brief C++ API: Concrete class which provides the standard calendar.
+ */
+
+U_NAMESPACE_BEGIN
+
+/** 
+ * Concrete class which provides the standard calendar used by most of the world.
+ * <P>
+ * The standard (Gregorian) calendar has 2 eras, BC and AD.
+ * <P>
+ * This implementation handles a single discontinuity, which corresponds by default to
+ * the date the Gregorian calendar was originally instituted (October 15, 1582). Not all
+ * countries adopted the Gregorian calendar then, so this cutover date may be changed by
+ * the caller.
+ * <P>
+ * Prior to the institution of the Gregorian Calendar, New Year's Day was March 25. To
+ * avoid confusion, this Calendar always uses January 1. A manual adjustment may be made
+ * if desired for dates that are prior to the Gregorian changeover and which fall
+ * between January 1 and March 24.
+ *
+ * <p>Values calculated for the <code>WEEK_OF_YEAR</code> field range from 1 to
+ * 53.  Week 1 for a year is the first week that contains at least
+ * <code>getMinimalDaysInFirstWeek()</code> days from that year.  It thus
+ * depends on the values of <code>getMinimalDaysInFirstWeek()</code>,
+ * <code>getFirstDayOfWeek()</code>, and the day of the week of January 1.
+ * Weeks between week 1 of one year and week 1 of the following year are
+ * numbered sequentially from 2 to 52 or 53 (as needed).
+ *
+ * <p>For example, January 1, 1998 was a Thursday.  If
+ * <code>getFirstDayOfWeek()</code> is <code>MONDAY</code> and
+ * <code>getMinimalDaysInFirstWeek()</code> is 4 (these are the values
+ * reflecting ISO 8601 and many national standards), then week 1 of 1998 starts
+ * on December 29, 1997, and ends on January 4, 1998.  If, however,
+ * <code>getFirstDayOfWeek()</code> is <code>SUNDAY</code>, then week 1 of 1998
+ * starts on January 4, 1998, and ends on January 10, 1998; the first three days
+ * of 1998 then are part of week 53 of 1997.
+ *
+ * <p>Example for using GregorianCalendar:
+ * <pre>
+ * \code
+ *     // get the supported ids for GMT-08:00 (Pacific Standard Time)
+ *     UErrorCode success = U_ZERO_ERROR;
+ *     const StringEnumeration *ids = TimeZone::createEnumeration(-8 * 60 * 60 * 1000);
+ *     // if no ids were returned, something is wrong. get out.
+ *     if (ids == 0 || ids->count(success) == 0) {
+ *         return;
+ *     }
+ *
+ *     // begin output
+ *     cout << "Current Time" << endl;
+ *
+ *     // create a Pacific Standard Time time zone
+ *     SimpleTimeZone* pdt = new SimpleTimeZone(-8 * 60 * 60 * 1000, ids->unext(NULL, success)));
+ *
+ *     // set up rules for daylight savings time
+ *     pdt->setStartRule(Calendar::APRIL, 1, Calendar::SUNDAY, 2 * 60 * 60 * 1000);
+ *     pdt->setEndRule(Calendar::OCTOBER, -1, Calendar::SUNDAY, 2 * 60 * 60 * 1000);
+ *
+ *     // create a GregorianCalendar with the Pacific Daylight time zone
+ *     // and the current date and time
+ *     Calendar* calendar = new GregorianCalendar( pdt, success );
+ *
+ *     // print out a bunch of interesting things
+ *     cout << "ERA: " << calendar->get( Calendar::ERA, success ) << endl;
+ *     cout << "YEAR: " << calendar->get( Calendar::YEAR, success ) << endl;
+ *     cout << "MONTH: " << calendar->get( Calendar::MONTH, success ) << endl;
+ *     cout << "WEEK_OF_YEAR: " << calendar->get( Calendar::WEEK_OF_YEAR, success ) << endl;
+ *     cout << "WEEK_OF_MONTH: " << calendar->get( Calendar::WEEK_OF_MONTH, success ) << endl;
+ *     cout << "DATE: " << calendar->get( Calendar::DATE, success ) << endl;
+ *     cout << "DAY_OF_MONTH: " << calendar->get( Calendar::DAY_OF_MONTH, success ) << endl;
+ *     cout << "DAY_OF_YEAR: " << calendar->get( Calendar::DAY_OF_YEAR, success ) << endl;
+ *     cout << "DAY_OF_WEEK: " << calendar->get( Calendar::DAY_OF_WEEK, success ) << endl;
+ *     cout << "DAY_OF_WEEK_IN_MONTH: " << calendar->get( Calendar::DAY_OF_WEEK_IN_MONTH, success ) << endl;
+ *     cout << "AM_PM: " << calendar->get( Calendar::AM_PM, success ) << endl;
+ *     cout << "HOUR: " << calendar->get( Calendar::HOUR, success ) << endl;
+ *     cout << "HOUR_OF_DAY: " << calendar->get( Calendar::HOUR_OF_DAY, success ) << endl;
+ *     cout << "MINUTE: " << calendar->get( Calendar::MINUTE, success ) << endl;
+ *     cout << "SECOND: " << calendar->get( Calendar::SECOND, success ) << endl;
+ *     cout << "MILLISECOND: " << calendar->get( Calendar::MILLISECOND, success ) << endl;
+ *     cout << "ZONE_OFFSET: " << (calendar->get( Calendar::ZONE_OFFSET, success )/(60*60*1000)) << endl;
+ *     cout << "DST_OFFSET: " << (calendar->get( Calendar::DST_OFFSET, success )/(60*60*1000)) << endl;
+ *
+ *     cout << "Current Time, with hour reset to 3" << endl;
+ *     calendar->clear(Calendar::HOUR_OF_DAY); // so doesn't override
+ *     calendar->set(Calendar::HOUR, 3);
+ *     cout << "ERA: " << calendar->get( Calendar::ERA, success ) << endl;
+ *     cout << "YEAR: " << calendar->get( Calendar::YEAR, success ) << endl;
+ *     cout << "MONTH: " << calendar->get( Calendar::MONTH, success ) << endl;
+ *     cout << "WEEK_OF_YEAR: " << calendar->get( Calendar::WEEK_OF_YEAR, success ) << endl;
+ *     cout << "WEEK_OF_MONTH: " << calendar->get( Calendar::WEEK_OF_MONTH, success ) << endl;
+ *     cout << "DATE: " << calendar->get( Calendar::DATE, success ) << endl;
+ *     cout << "DAY_OF_MONTH: " << calendar->get( Calendar::DAY_OF_MONTH, success ) << endl;
+ *     cout << "DAY_OF_YEAR: " << calendar->get( Calendar::DAY_OF_YEAR, success ) << endl;
+ *     cout << "DAY_OF_WEEK: " << calendar->get( Calendar::DAY_OF_WEEK, success ) << endl;
+ *     cout << "DAY_OF_WEEK_IN_MONTH: " << calendar->get( Calendar::DAY_OF_WEEK_IN_MONTH, success ) << endl;
+ *     cout << "AM_PM: " << calendar->get( Calendar::AM_PM, success ) << endl;
+ *     cout << "HOUR: " << calendar->get( Calendar::HOUR, success ) << endl;
+ *     cout << "HOUR_OF_DAY: " << calendar->get( Calendar::HOUR_OF_DAY, success ) << endl;
+ *     cout << "MINUTE: " << calendar->get( Calendar::MINUTE, success ) << endl;
+ *     cout << "SECOND: " << calendar->get( Calendar::SECOND, success ) << endl;
+ *     cout << "MILLISECOND: " << calendar->get( Calendar::MILLISECOND, success ) << endl;
+ *     cout << "ZONE_OFFSET: " << (calendar->get( Calendar::ZONE_OFFSET, success )/(60*60*1000)) << endl; // in hours
+ *     cout << "DST_OFFSET: " << (calendar->get( Calendar::DST_OFFSET, success )/(60*60*1000)) << endl; // in hours
+ *
+ *     if (U_FAILURE(success)) {
+ *         cout << "An error occured. success=" << u_errorName(success) << endl;
+ *     }
+ *
+ *     delete ids;
+ *     delete calendar; // also deletes pdt
+ * \endcode
+ * </pre>
+ * @stable ICU 2.0
+ */
+class U_I18N_API GregorianCalendar: public Calendar {
+public:
+
+    /**
+     * Useful constants for GregorianCalendar and TimeZone.
+     * @stable ICU 2.0
+     */
+    enum EEras {
+        BC,
+        AD
+    };
+
+    /**
+     * Constructs a default GregorianCalendar using the current time in the default time
+     * zone with the default locale.
+     *
+     * @param success  Indicates the status of GregorianCalendar object construction.
+     *                 Returns U_ZERO_ERROR if constructed successfully.
+     * @stable ICU 2.0
+     */
+    GregorianCalendar(UErrorCode& success);
+
+    /**
+     * Constructs a GregorianCalendar based on the current time in the given time zone
+     * with the default locale. Clients are no longer responsible for deleting the given
+     * time zone object after it's adopted.
+     *
+     * @param zoneToAdopt     The given timezone.
+     * @param success  Indicates the status of GregorianCalendar object construction.
+     *                 Returns U_ZERO_ERROR if constructed successfully.
+     * @stable ICU 2.0
+     */
+    GregorianCalendar(TimeZone* zoneToAdopt, UErrorCode& success);
+
+    /**
+     * Constructs a GregorianCalendar based on the current time in the given time zone
+     * with the default locale.
+     *
+     * @param zone     The given timezone.
+     * @param success  Indicates the status of GregorianCalendar object construction.
+     *                 Returns U_ZERO_ERROR if constructed successfully.
+     * @stable ICU 2.0
+     */
+    GregorianCalendar(const TimeZone& zone, UErrorCode& success);
+
+    /**
+     * Constructs a GregorianCalendar based on the current time in the default time zone
+     * with the given locale.
+     *
+     * @param aLocale  The given locale.
+     * @param success  Indicates the status of GregorianCalendar object construction.
+     *                 Returns U_ZERO_ERROR if constructed successfully.
+     * @stable ICU 2.0
+     */
+    GregorianCalendar(const Locale& aLocale, UErrorCode& success);
+
+    /**
+     * Constructs a GregorianCalendar based on the current time in the given time zone
+     * with the given locale. Clients are no longer responsible for deleting the given
+     * time zone object after it's adopted.
+     *
+     * @param zoneToAdopt     The given timezone.
+     * @param aLocale  The given locale.
+     * @param success  Indicates the status of GregorianCalendar object construction.
+     *                 Returns U_ZERO_ERROR if constructed successfully.
+     * @stable ICU 2.0
+     */
+    GregorianCalendar(TimeZone* zoneToAdopt, const Locale& aLocale, UErrorCode& success);
+
+    /**
+     * Constructs a GregorianCalendar based on the current time in the given time zone
+     * with the given locale.
+     *
+     * @param zone     The given timezone.
+     * @param aLocale  The given locale.
+     * @param success  Indicates the status of GregorianCalendar object construction.
+     *                 Returns U_ZERO_ERROR if constructed successfully.
+     * @stable ICU 2.0
+     */
+    GregorianCalendar(const TimeZone& zone, const Locale& aLocale, UErrorCode& success);
+
+    /**
+     * Constructs a GregorianCalendar with the given AD date set in the default time
+     * zone with the default locale.
+     *
+     * @param year     The value used to set the YEAR time field in the calendar.
+     * @param month    The value used to set the MONTH time field in the calendar. Month
+     *                 value is 0-based. e.g., 0 for January.
+     * @param date     The value used to set the DATE time field in the calendar.
+     * @param success  Indicates the status of GregorianCalendar object construction.
+     *                 Returns U_ZERO_ERROR if constructed successfully.
+     * @stable ICU 2.0
+     */
+    GregorianCalendar(int32_t year, int32_t month, int32_t date, UErrorCode& success);
+
+    /**
+     * Constructs a GregorianCalendar with the given AD date and time set for the
+     * default time zone with the default locale.
+     *
+     * @param year     The value used to set the YEAR time field in the calendar.
+     * @param month    The value used to set the MONTH time field in the calendar. Month
+     *                 value is 0-based. e.g., 0 for January.
+     * @param date     The value used to set the DATE time field in the calendar.
+     * @param hour     The value used to set the HOUR_OF_DAY time field in the calendar.
+     * @param minute   The value used to set the MINUTE time field in the calendar.
+     * @param success  Indicates the status of GregorianCalendar object construction.
+     *                 Returns U_ZERO_ERROR if constructed successfully.
+     * @stable ICU 2.0
+     */
+    GregorianCalendar(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute, UErrorCode& success);
+
+    /**
+     * Constructs a GregorianCalendar with the given AD date and time set for the
+     * default time zone with the default locale.
+     *
+     * @param year     The value used to set the YEAR time field in the calendar.
+     * @param month    The value used to set the MONTH time field in the calendar. Month
+     *                 value is 0-based. e.g., 0 for January.
+     * @param date     The value used to set the DATE time field in the calendar.
+     * @param hour     The value used to set the HOUR_OF_DAY time field in the calendar.
+     * @param minute   The value used to set the MINUTE time field in the calendar.
+     * @param second   The value used to set the SECOND time field in the calendar.
+     * @param success  Indicates the status of GregorianCalendar object construction.
+     *                 Returns U_ZERO_ERROR if constructed successfully.
+     * @stable ICU 2.0
+     */
+    GregorianCalendar(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute, int32_t second, UErrorCode& success);
+
+    /**
+     * Destructor
+     * @stable ICU 2.0
+     */
+    virtual ~GregorianCalendar();
+
+    /**
+     * Copy constructor
+     * @param source    the object to be copied.
+     * @stable ICU 2.0
+     */
+    GregorianCalendar(const GregorianCalendar& source);
+
+    /**
+     * Default assignment operator
+     * @param right    the object to be copied.
+     * @stable ICU 2.0
+     */
+    GregorianCalendar& operator=(const GregorianCalendar& right);
+
+    /**
+     * Create and return a polymorphic copy of this calendar.
+     * @return    return a polymorphic copy of this calendar.
+     * @stable ICU 2.0
+     */
+    virtual Calendar* clone(void) const;
+
+    /**
+     * Sets the GregorianCalendar change date. This is the point when the switch from
+     * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October
+     * 15, 1582. Previous to this time and date will be Julian dates.
+     *
+     * @param date     The given Gregorian cutover date.
+     * @param success  Output param set to success/failure code on exit.
+     * @stable ICU 2.0
+     */
+    void setGregorianChange(UDate date, UErrorCode& success);
+
+    /**
+     * Gets the Gregorian Calendar change date. This is the point when the switch from
+     * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October
+     * 15, 1582. Previous to this time and date will be Julian dates.
+     *
+     * @return   The Gregorian cutover time for this calendar.
+     * @stable ICU 2.0
+     */
+    UDate getGregorianChange(void) const;
+
+    /**
+     * Return true if the given year is a leap year. Determination of whether a year is
+     * a leap year is actually very complicated. We do something crude and mostly
+     * correct here, but for a real determination you need a lot of contextual
+     * information. For example, in Sweden, the change from Julian to Gregorian happened
+     * in a complex way resulting in missed leap years and double leap years between
+     * 1700 and 1753. Another example is that after the start of the Julian calendar in
+     * 45 B.C., the leap years did not regularize until 8 A.D. This method ignores these
+     * quirks, and pays attention only to the Julian onset date and the Gregorian
+     * cutover (which can be changed).
+     *
+     * @param year  The given year.
+     * @return      True if the given year is a leap year; false otherwise.
+     * @stable ICU 2.0
+     */
+    UBool isLeapYear(int32_t year) const;
+
+    /**
+     * Returns TRUE if the given Calendar object is equivalent to this
+     * one.  Calendar override.
+     *
+     * @param other the Calendar to be compared with this Calendar   
+     * @stable ICU 2.4
+     */
+    virtual UBool isEquivalentTo(const Calendar& other) const;
+
+    /**
+     * (Overrides Calendar) Rolls up or down by the given amount in the specified field.
+     * For more information, see the documentation for Calendar::roll().
+     *
+     * @param field   The time field.
+     * @param amount  Indicates amount to roll.
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid, this will be set to
+     *                an error status.
+     * @deprecated ICU 2.6. Use roll(UCalendarDateFields field, int32_t amount, UErrorCode& status) instead.
+     */
+    virtual void roll(EDateFields field, int32_t amount, UErrorCode& status);
+
+    /**
+     * (Overrides Calendar) Rolls up or down by the given amount in the specified field.
+     * For more information, see the documentation for Calendar::roll().
+     *
+     * @param field   The time field.
+     * @param amount  Indicates amount to roll.
+     * @param status  Output param set to success/failure code on exit. If any value
+     *                previously set in the time field is invalid, this will be set to
+     *                an error status.
+     * @stable ICU 2.6.
+     */
+    virtual void roll(UCalendarDateFields field, int32_t amount, UErrorCode& status);
+
+    /**
+     * Return the minimum value that this field could have, given the current date.
+     * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum().
+     * @param field    the time field.
+     * @return         the minimum value that this field could have, given the current date.
+     * @deprecated ICU 2.6. Use getActualMinimum(UCalendarDateFields field) instead.
+     */
+    int32_t getActualMinimum(EDateFields field) const;
+
+    /**
+     * Return the minimum value that this field could have, given the current date.
+     * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum().
+     * @param field    the time field.
+     * @param status
+     * @return         the minimum value that this field could have, given the current date.
+     * @deprecated ICU 2.6. Use getActualMinimum(UCalendarDateFields field) instead. (Added to ICU 3.0 for signature consistency)
+     */
+    int32_t getActualMinimum(EDateFields field, UErrorCode& status) const;
+
+    /**
+     * Return the minimum value that this field could have, given the current date.
+     * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum().
+     * @param field    the time field.
+     * @param status   error result.
+     * @return         the minimum value that this field could have, given the current date.
+     * @stable ICU 3.0
+     */
+    int32_t getActualMinimum(UCalendarDateFields field, UErrorCode &status) const;
+
+    /**
+     * Return the maximum value that this field could have, given the current date.
+     * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual
+     * maximum would be 28; for "Feb 3, 1996" it s 29.  Similarly for a Hebrew calendar,
+     * for some years the actual maximum for MONTH is 12, and for others 13.
+     * @param field    the time field.
+     * @return         the maximum value that this field could have, given the current date.
+     * @deprecated ICU 2.6. Use getActualMaximum(UCalendarDateFields field) instead.
+     */
+    int32_t getActualMaximum(EDateFields field) const;
+
+    /**
+     * Return the maximum value that this field could have, given the current date.
+     * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual
+     * maximum would be 28; for "Feb 3, 1996" it s 29.  Similarly for a Hebrew calendar,
+     * for some years the actual maximum for MONTH is 12, and for others 13.
+     * @param field    the time field.
+     * @param status   returns any errors that may result from this function call.
+     * @return         the maximum value that this field could have, given the current date.
+     * @stable ICU 2.6
+     */
+    virtual int32_t getActualMaximum(UCalendarDateFields field, UErrorCode& status) const;
+
+    /**
+     * (Overrides Calendar) Return true if the current date for this Calendar is in
+     * Daylight Savings Time. Recognizes DST_OFFSET, if it is set.
+     *
+     * @param status Fill-in parameter which receives the status of this operation.
+     * @return   True if the current date for this Calendar is in Daylight Savings Time,
+     *           false, otherwise.
+     * @stable ICU 2.0
+     */
+    virtual UBool inDaylightTime(UErrorCode& status) const;
+
+public:
+
+    /**
+     * Override Calendar Returns a unique class ID POLYMORPHICALLY. Pure virtual
+     * override. This method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone() methods call
+     * this method.
+     *
+     * @return   The class ID for this object. All objects of a given class have the
+     *           same class ID. Objects of other classes have different class IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+    /**
+     * Return the class ID for this class. This is useful only for comparing to a return
+     * value from getDynamicClassID(). For example:
+     *
+     *      Base* polymorphic_pointer = createPolymorphicObject();
+     *      if (polymorphic_pointer->getDynamicClassID() ==
+     *          Derived::getStaticClassID()) ...
+     *
+     * @return   The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Get the calendar type, "gregorian", for use in DateFormatSymbols.
+     *
+     * @return calendar type
+     * @internal
+     */
+    virtual const char * getType() const;
+
+protected:
+
+    /**
+     * (Overrides Calendar) Converts GMT as milliseconds to time field values.
+     * @param status Fill-in parameter which receives the status of this operation.
+     * @stable ICU 2.0
+     */
+
+ private:
+    GregorianCalendar(); // default constructor not implemented
+
+ protected:
+    /**
+     * Return the ERA.  We need a special method for this because the
+     * default ERA is AD, but a zero (unset) ERA is BC.
+     * @return    the ERA.
+     * @internal
+     */
+    virtual int32_t internalGetEra() const;
+
+    /**
+     * Return the Julian day number of day before the first day of the
+     * given month in the given extended year.  Subclasses should override
+     * this method to implement their calendar system.
+     * @param eyear the extended year
+     * @param month the zero-based month, or 0 if useMonth is false
+     * @param useMonth if false, compute the day before the first day of
+     * the given year, otherwise, compute the day before the first day of
+     * the given month
+     * @return the Julian day number of the day before the first
+     * day of the given month and year
+     * @internal
+     */
+    virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month,
+                                                   UBool useMonth) const;
+
+    /**
+     * Subclasses may override this.  This method calls
+     * handleGetMonthLength() to obtain the calendar-specific month
+     * length.
+     * @param bestField which field to use to calculate the date 
+     * @return julian day specified by calendar fields.
+     * @internal
+     */
+    virtual int32_t handleComputeJulianDay(UCalendarDateFields bestField)  ;
+
+    /**
+     * Return the number of days in the given month of the given extended
+     * year of this calendar system.  Subclasses should override this
+     * method if they can provide a more correct or more efficient
+     * implementation than the default implementation in Calendar.
+     * @internal
+     */
+    virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const;
+
+    /**
+     * Return the number of days in the given extended year of this
+     * calendar system.  Subclasses should override this method if they can
+     * provide a more correct or more efficient implementation than the
+     * default implementation in Calendar.
+     * @stable ICU 2.0
+     */
+    virtual int32_t handleGetYearLength(int32_t eyear) const;
+
+    /**
+     * return the length of the given month.
+     * @param month    the given month.
+     * @return    the length of the given month.
+     * @internal
+     */
+    virtual int32_t monthLength(int32_t month) const;
+
+    /**
+     * return the length of the month according to the given year.
+     * @param month    the given month.
+     * @param year     the given year.
+     * @return         the length of the month
+     * @internal
+     */
+    virtual int32_t monthLength(int32_t month, int32_t year) const;
+    
+    /**
+     * return the length of the given year.
+     * @param year    the given year.
+     * @return        the length of the given year.
+     * @internal
+     */
+    int32_t yearLength(int32_t year) const;
+    
+    /**
+     * return the length of the year field.
+     * @return    the length of the year field
+     * @internal
+     */
+    int32_t yearLength(void) const;
+
+    /**
+     * After adjustments such as add(MONTH), add(YEAR), we don't want the
+     * month to jump around.  E.g., we don't want Jan 31 + 1 month to go to Mar
+     * 3, we want it to go to Feb 28.  Adjustments which might run into this
+     * problem call this method to retain the proper month.
+     * @internal
+     */
+    void pinDayOfMonth(void);
+
+    /**
+     * Return the day number with respect to the epoch.  January 1, 1970 (Gregorian)
+     * is day zero.
+     * @param status Fill-in parameter which receives the status of this operation.
+     * @return       the day number with respect to the epoch.  
+     * @internal
+     */
+    virtual UDate getEpochDay(UErrorCode& status);
+
+    /**
+     * Subclass API for defining limits of different types.
+     * Subclasses must implement this method to return limits for the
+     * following fields:
+     *
+     * <pre>UCAL_ERA
+     * UCAL_YEAR
+     * UCAL_MONTH
+     * UCAL_WEEK_OF_YEAR
+     * UCAL_WEEK_OF_MONTH
+     * UCAL_DATE (DAY_OF_MONTH on Java)
+     * UCAL_DAY_OF_YEAR
+     * UCAL_DAY_OF_WEEK_IN_MONTH
+     * UCAL_YEAR_WOY
+     * UCAL_EXTENDED_YEAR</pre>
+     *
+     * @param field one of the above field numbers
+     * @param limitType one of <code>MINIMUM</code>, <code>GREATEST_MINIMUM</code>,
+     * <code>LEAST_MAXIMUM</code>, or <code>MAXIMUM</code>
+     * @internal
+     */
+    virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const;
+
+    /**
+     * Return the extended year defined by the current fields.  This will
+     * use the UCAL_EXTENDED_YEAR field or the UCAL_YEAR and supra-year fields (such
+     * as UCAL_ERA) specific to the calendar system, depending on which set of
+     * fields is newer.
+     * @return the extended year
+     * @internal
+     */
+    virtual int32_t handleGetExtendedYear();
+
+    /** 
+     * Subclasses may override this to convert from week fields 
+     * (YEAR_WOY and WEEK_OF_YEAR) to an extended year in the case
+     * where YEAR, EXTENDED_YEAR are not set.
+     * The Gregorian implementation assumes a yearWoy in gregorian format, according to the current era.
+     * @return the extended year, UCAL_EXTENDED_YEAR
+     * @internal
+     */
+    virtual int32_t handleGetExtendedYearFromWeekFields(int32_t yearWoy, int32_t woy);
+
+
+    /**
+     * Subclasses may override this method to compute several fields
+     * specific to each calendar system.  These are:
+     *
+     * <ul><li>ERA
+     * <li>YEAR
+     * <li>MONTH
+     * <li>DAY_OF_MONTH
+     * <li>DAY_OF_YEAR
+     * <li>EXTENDED_YEAR</ul>
+     *
+     * <p>The GregorianCalendar implementation implements
+     * a calendar with the specified Julian/Gregorian cutover date.
+     * @internal
+     */
+    virtual void handleComputeFields(int32_t julianDay, UErrorCode &status);
+
+ private:
+    /**
+     * Compute the julian day number of the given year.
+     * @param isGregorian    if true, using Gregorian calendar, otherwise using Julian calendar
+     * @param year           the given year.
+     * @param isLeap         true if the year is a leap year.       
+     * @return 
+     */
+    static double computeJulianDayOfYear(UBool isGregorian, int32_t year,
+                                         UBool& isLeap);
+    
+    /**
+     * Validates the values of the set time fields.  True if they're all valid.
+     * @return    True if the set time fields are all valid.
+     */
+    UBool validateFields(void) const;
+
+    /**
+     * Validates the value of the given time field.  True if it's valid.
+     */
+    UBool boundsCheck(int32_t value, UCalendarDateFields field) const;
+
+    /**
+     * Return the pseudo-time-stamp for two fields, given their
+     * individual pseudo-time-stamps.  If either of the fields
+     * is unset, then the aggregate is unset.  Otherwise, the
+     * aggregate is the later of the two stamps.
+     * @param stamp_a    One given field.
+     * @param stamp_b    Another given field.
+     * @return the pseudo-time-stamp for two fields
+     */
+    int32_t aggregateStamp(int32_t stamp_a, int32_t stamp_b);
+
+    /**
+     * The point at which the Gregorian calendar rules are used, measured in
+     * milliseconds from the standard epoch.  Default is October 15, 1582
+     * (Gregorian) 00:00:00 UTC, that is, October 4, 1582 (Julian) is followed
+     * by October 15, 1582 (Gregorian).  This corresponds to Julian day number
+     * 2299161. This is measured from the standard epoch, not in Julian Days.
+     * @internal
+     */
+    UDate                fGregorianCutover;
+
+    /**
+     * Julian day number of the Gregorian cutover
+     */
+    int32_t             fCutoverJulianDay;
+
+    /**
+     * Midnight, local time (using this Calendar's TimeZone) at or before the
+     * gregorianCutover. This is a pure date value with no time of day or
+     * timezone component.
+     */
+    UDate                 fNormalizedGregorianCutover;// = gregorianCutover;
+
+    /**
+     * The year of the gregorianCutover, with 0 representing
+     * 1 BC, -1 representing 2 BC, etc.
+     */
+    int32_t fGregorianCutoverYear;// = 1582;
+
+    /**
+     * The year of the gregorianCutover, with 0 representing
+     * 1 BC, -1 representing 2 BC, etc.
+     */
+    int32_t fGregorianCutoverJulianDay;// = 2299161;
+
+    /**
+     * Converts time as milliseconds to Julian date. The Julian date used here is not a
+     * true Julian date, since it is measured from midnight, not noon.
+     *
+     * @param millis  The given milliseconds.
+     * @return        The Julian date number.
+     */
+    static double millisToJulianDay(UDate millis);
+
+    /**
+     * Converts Julian date to time as milliseconds. The Julian date used here is not a
+     * true Julian date, since it is measured from midnight, not noon.
+     *
+     * @param julian  The given Julian date number.
+     * @return        Time as milliseconds.
+     */
+    static UDate julianDayToMillis(double julian);
+
+    /**
+     * Used by handleComputeJulianDay() and handleComputeMonthStart().
+     * Temporary field indicating whether the calendar is currently Gregorian as opposed to Julian.
+     */
+    UBool fIsGregorian;
+
+    /**
+     * Used by handleComputeJulianDay() and handleComputeMonthStart().
+     * Temporary field indicating that the sense of the gregorian cutover should be inverted
+     * to handle certain calculations on and around the cutover date.
+     */
+    UBool fInvertGregorian;
+
+
+ public: // internal implementation
+
+    /**
+     * @internal 
+     * @return TRUE if this calendar has the notion of a default century
+     */
+    virtual UBool haveDefaultCentury() const;
+
+    /**
+     * @internal
+     * @return the start of the default century
+     */
+    virtual UDate defaultCenturyStart() const;
+
+    /**
+     * @internal 
+     * @return the beginning year of the default century
+     */
+    virtual int32_t defaultCenturyStartYear() const;
+
+ private:
+    /**
+     * The system maintains a static default century start date.  This is initialized
+     * the first time it is used.  Before then, it is set to SYSTEM_DEFAULT_CENTURY to
+     * indicate an uninitialized state.  Once the system default century date and year
+     * are set, they do not change.
+     */
+    static UDate         fgSystemDefaultCenturyStart;
+
+    /**
+     * See documentation for systemDefaultCenturyStart.
+     */
+    static int32_t          fgSystemDefaultCenturyStartYear;
+
+    /**
+     * Default value that indicates the defaultCenturyStartYear is unitialized
+     */
+    static const int32_t    fgSystemDefaultCenturyYear;
+
+    /**
+     * Default value that indicates the UDate of the beginning of the system default century
+     */
+    static const UDate        fgSystemDefaultCentury;
+
+    /**
+     * Returns the beginning date of the 100-year window that dates with 2-digit years
+     * are considered to fall within.
+     * @return    the beginning date of the 100-year window that dates with 2-digit years
+     *            are considered to fall within.
+     */
+    UDate         internalGetDefaultCenturyStart(void) const;
+
+    /**
+     * Returns the first year of the 100-year window that dates with 2-digit years
+     * are considered to fall within.
+     * @return    the first year of the 100-year window that dates with 2-digit years
+     *            are considered to fall within.
+     */
+    int32_t          internalGetDefaultCenturyStartYear(void) const;
+
+    /**
+     * Initializes the 100-year window that dates with 2-digit years are considered
+     * to fall within so that its start date is 80 years before the current time.
+     */
+    static void  initializeSystemDefaultCentury(void);
+
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _GREGOCAL
+//eof
+

Deleted: MacRuby/trunk/icu-1060/unicode/locid.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/locid.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/locid.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,765 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 1996-2006, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-******************************************************************************
-*
-* File locid.h
-*
-* Created by: Helena Shih
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   02/11/97    aliu        Changed gLocPath to fgLocPath and added methods to
-*                           get and set it.
-*   04/02/97    aliu        Made operator!= inline; fixed return value of getName().
-*   04/15/97    aliu        Cleanup for AIX/Win32.
-*   04/24/97    aliu        Numerous changes per code review.
-*   08/18/98    stephen     Added tokenizeString(),changed getDisplayName()
-*   09/08/98    stephen     Moved definition of kEmptyString for Mac Port
-*   11/09/99    weiv        Added const char * getName() const;
-*   04/12/00    srl         removing unicodestring api's and cached hash code
-*   08/10/01    grhoten     Change the static Locales to accessor functions
-******************************************************************************
-*/
-
-#ifndef LOCID_H
-#define LOCID_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-#include "unicode/putil.h"
-#include "unicode/uloc.h"
-#include "unicode/strenum.h"
-
-/**
- * \file
- * \brief C++ API: Locale ID object.
- */
-
-/**
- * A <code>Locale</code> object represents a specific geographical, political,
- * or cultural region. An operation that requires a <code>Locale</code> to perform
- * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
- * to tailor information for the user. For example, displaying a number
- * is a locale-sensitive operation--the number should be formatted
- * according to the customs/conventions of the user's native country,
- * region, or culture.
- *
- * The Locale class is not suitable for subclassing.
- *
- * <P>
- * You can create a <code>Locale</code> object using the constructor in
- * this class:
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- *       Locale( const   char*  language,
- *               const   char*  country,
- *               const   char*  variant);
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- * The first argument to the constructors is a valid <STRONG>ISO
- * Language Code.</STRONG> These codes are the lower-case two-letter
- * codes as defined by ISO-639.
- * You can find a full list of these codes at:
- * <BR><a href ="http://www.loc.gov/standards/iso639-2/">
- * http://www.loc.gov/standards/iso639-2/</a>
- *
- * <P>
- * The second argument to the constructors is a valid <STRONG>ISO Country
- * Code.</STRONG> These codes are the upper-case two-letter codes
- * as defined by ISO-3166.
- * You can find a full list of these codes at a number of sites, such as:
- * <BR><a href="http://www.iso.org/iso/en/prods-services/iso3166ma/index.html">
- * http://www.iso.org/iso/en/prods-services/iso3166ma/index.html</a>
- *
- * <P>
- * The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
- * The Variant codes are vendor and browser-specific.
- * For example, use REVISED for a langauge's revised script orthography, and POSIX for POSIX.
- * Where there are two variants, separate them with an underscore, and
- * put the most important one first. For
- * example, a Traditional Spanish collation might be referenced, with
- * "ES", "ES", "Traditional_POSIX".
- *
- * <P>
- * Because a <code>Locale</code> object is just an identifier for a region,
- * no validity check is performed when you construct a <code>Locale</code>.
- * If you want to see whether particular resources are available for the
- * <code>Locale</code> you construct, you must query those resources. For
- * example, ask the <code>NumberFormat</code> for the locales it supports
- * using its <code>getAvailableLocales</code> method.
- * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
- * locale, you get back the best available match, not necessarily
- * precisely what you asked for. For more information, look at
- * <code>ResourceBundle</code>.
- *
- * <P>
- * The <code>Locale</code> class provides a number of convenient constants
- * that you can use to create <code>Locale</code> objects for commonly used
- * locales. For example, the following refers to a <code>Locale</code> object
- * for the United States:
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- *       Locale::getUS()
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- *
- * <P>
- * Once you've created a <code>Locale</code> you can query it for information about
- * itself. Use <code>getCountry</code> to get the ISO Country Code and
- * <code>getLanguage</code> to get the ISO Language Code. You can
- * use <code>getDisplayCountry</code> to get the
- * name of the country suitable for displaying to the user. Similarly,
- * you can use <code>getDisplayLanguage</code> to get the name of
- * the language suitable for displaying to the user. Interestingly,
- * the <code>getDisplayXXX</code> methods are themselves locale-sensitive
- * and have two versions: one that uses the default locale and one
- * that takes a locale as an argument and displays the name or country in
- * a language appropriate to that locale.
- *
- * <P>
- * ICU provides a number of classes that perform locale-sensitive
- * operations. For example, the <code>NumberFormat</code> class formats
- * numbers, currency, or percentages in a locale-sensitive manner. Classes
- * such as <code>NumberFormat</code> have a number of convenience methods
- * for creating a default object of that type. For example, the
- * <code>NumberFormat</code> class provides these three convenience methods
- * for creating a default <code>NumberFormat</code> object:
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- *     UErrorCode success = U_ZERO_ERROR;
- *     Locale myLocale;
- *     NumberFormat *nf;
- *
- *     nf = NumberFormat::createInstance( success );          delete nf;
- *     nf = NumberFormat::createCurrencyInstance( success );  delete nf;
- *     nf = NumberFormat::createPercentInstance( success );   delete nf;
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- * Each of these methods has two variants; one with an explicit locale
- * and one without; the latter using the default locale.
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- *     nf = NumberFormat::createInstance( myLocale, success );          delete nf;
- *     nf = NumberFormat::createCurrencyInstance( myLocale, success );  delete nf;
- *     nf = NumberFormat::createPercentInstance( myLocale, success );   delete nf;
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- * A <code>Locale</code> is the mechanism for identifying the kind of object
- * (<code>NumberFormat</code>) that you would like to get. The locale is
- * <STRONG>just</STRONG> a mechanism for identifying objects,
- * <STRONG>not</STRONG> a container for the objects themselves.
- *
- * <P>
- * Each class that performs locale-sensitive operations allows you
- * to get all the available objects of that type. You can sift
- * through these objects by language, country, or variant,
- * and use the display names to present a menu to the user.
- * For example, you can create a menu of all the collation objects
- * suitable for a given language. Such classes implement these
- * three class methods:
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- *       static Locale* getAvailableLocales(int32_t& numLocales)
- *       static UnicodeString& getDisplayName(const Locale&  objectLocale,
- *                                            const Locale&  displayLocale,
- *                                            UnicodeString& displayName)
- *       static UnicodeString& getDisplayName(const Locale&  objectLocale,
- *                                            UnicodeString& displayName)
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- *
- * @stable ICU 2.0
- * @see ResourceBundle
- */
-U_NAMESPACE_BEGIN
-class U_COMMON_API Locale : public UObject {
-public:
-    /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getEnglish(void);
-    /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getFrench(void);
-    /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getGerman(void);
-    /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getItalian(void);
-    /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getJapanese(void);
-    /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getKorean(void);
-    /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getChinese(void);
-    /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getSimplifiedChinese(void);
-    /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getTraditionalChinese(void);
-
-    /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getFrance(void);
-    /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getGermany(void);
-    /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getItaly(void);
-    /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getJapan(void);
-    /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getKorea(void);
-    /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getChina(void);
-    /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getPRC(void);
-    /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getTaiwan(void);
-    /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getUK(void);
-    /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getUS(void);
-    /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getCanada(void);
-    /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getCanadaFrench(void);
-
-
-    /**
-     * Construct a default locale object, a Locale for the default locale ID.
-     *
-     * @see getDefault
-     * @see uloc_getDefault
-     * @stable ICU 2.0
-     */
-    Locale();
-
-    /**
-     * Construct a locale from language, country, variant.
-     * If an error occurs, then the constructed object will be "bogus"
-     * (isBogus() will return TRUE).
-     *
-     * @param language Lowercase two-letter or three-letter ISO-639 code.
-     *  This parameter can instead be an ICU style C locale (e.g. "en_US"),
-     *  but the other parameters must not be used.
-     *  This parameter can be NULL; if so,
-     *  the locale is initialized to match the current default locale.
-     *  (This is the same as using the default constructor.)
-     *  Please note: The Java Locale class does NOT accept the form
-     *  'new Locale("en_US")' but only 'new Locale("en","US")'
-     *
-     * @param country  Uppercase two-letter ISO-3166 code. (optional)
-     * @param variant  Uppercase vendor and browser specific code. See class
-     *                 description. (optional)
-     * @param keywordsAndValues A string consisting of keyword/values pairs, such as
-     *                 "collation=phonebook;currency=euro"
-     *
-     * @see getDefault
-     * @see uloc_getDefault
-     * @stable ICU 2.0
-     */
-    Locale( const   char * language,
-            const   char * country  = 0,
-            const   char * variant  = 0,
-            const   char * keywordsAndValues = 0);
-
-    /**
-     * Initializes a Locale object from another Locale object.
-     *
-     * @param other The Locale object being copied in.
-     * @stable ICU 2.0
-     */
-    Locale(const    Locale& other);
-
-
-    /**
-     * Destructor
-     * @stable ICU 2.0
-     */
-    virtual ~Locale() ;
-
-    /**
-     * Replaces the entire contents of *this with the specified value.
-     *
-     * @param other The Locale object being copied in.
-     * @return      *this
-     * @stable ICU 2.0
-     */
-    Locale& operator=(const Locale& other);
-
-    /**
-     * Checks if two locale keys are the same.
-     *
-     * @param other The locale key object to be compared with this.
-     * @return      True if the two locale keys are the same, false otherwise.
-     * @stable ICU 2.0
-     */
-    UBool   operator==(const    Locale&     other) const;
-
-    /**
-     * Checks if two locale keys are not the same.
-     *
-     * @param other The locale key object to be compared with this.
-     * @return      True if the two locale keys are not the same, false
-     *              otherwise.
-     * @stable ICU 2.0
-     */
-    UBool   operator!=(const    Locale&     other) const;
-
-    /**
-     * Clone this object.
-     * Clones can be used concurrently in multiple threads.
-     * If an error occurs, then NULL is returned.
-     * The caller must delete the clone.
-     *
-     * @return a clone of this object
-     *
-     * @see getDynamicClassID
-     * @stable ICU 2.8
-     */
-    Locale *clone() const;
-
-    /**
-     * Common methods of getting the current default Locale. Used for the
-     * presentation: menus, dialogs, etc. Generally set once when your applet or
-     * application is initialized, then never reset. (If you do reset the
-     * default locale, you probably want to reload your GUI, so that the change
-     * is reflected in your interface.)
-     *
-     * More advanced programs will allow users to use different locales for
-     * different fields, e.g. in a spreadsheet.
-     *
-     * Note that the initial setting will match the host system.
-     * @return a reference to the Locale object for the default locale ID
-     * @system
-     * @stable ICU 2.0
-     */
-    static const Locale& U_EXPORT2 getDefault(void);
-
-    /**
-     * Sets the default. Normally set once at the beginning of a process,
-     * then never reset.
-     * setDefault() only changes ICU's default locale ID, <strong>not</strong>
-     * the default locale ID of the runtime environment.
-     *
-     * @param newLocale Locale to set to.  If NULL, set to the value obtained
-     *                  from the runtime environement.
-     * @param success The error code.
-     * @system
-     * @stable ICU 2.0
-     */
-    static void U_EXPORT2 setDefault(const Locale& newLocale,
-                                     UErrorCode&   success);
-
-    /**
-     * Creates a locale which has had minimal canonicalization
-     * as per uloc_getName().
-     * @param name The name to create from.  If name is null,
-     *  the default Locale is used.
-     * @return new locale object
-     * @stable ICU 2.0
-     * @see uloc_getName
-     */
-    static Locale U_EXPORT2 createFromName(const char *name);
-
-    /**
-     * Creates a locale from the given string after canonicalizing
-     * the string by calling uloc_canonicalize().
-     * @param name the locale ID to create from.  Must not be NULL.
-     * @return a new locale object corresponding to the given name
-     * @stable ICU 3.0
-     * @see uloc_canonicalize
-     */
-    static Locale U_EXPORT2 createCanonical(const char* name);
-
-    /**
-     * Returns the locale's ISO-639 language code.
-     * @return      An alias to the code
-     * @stable ICU 2.0
-     */
-    inline const char *  getLanguage( ) const;
-
-    /**
-     * Returns the locale's ISO-15924 abbreviation script code.
-     * @return      An alias to the code
-     * @see uscript_getShortName
-     * @see uscript_getCode
-     * @stable ICU 2.8
-     */
-    inline const char *  getScript( ) const;
-
-    /**
-     * Returns the locale's ISO-3166 country code.
-     * @return      An alias to the code
-     * @stable ICU 2.0
-     */
-    inline const char *  getCountry( ) const;
-
-    /**
-     * Returns the locale's variant code.
-     * @return      An alias to the code
-     * @stable ICU 2.0
-     */
-    inline const char *  getVariant( ) const;
-
-    /**
-     * Returns the programmatic name of the entire locale, with the language,
-     * country and variant separated by underbars. If a field is missing, up
-     * to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN",
-     * "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO"
-     * @return      A pointer to "name".
-     * @stable ICU 2.0
-     */
-    inline const char * getName() const;
-
-    /**
-     * Returns the programmatic name of the entire locale as getName would return,
-     * but without keywords.
-     * @return      A pointer to "name".
-     * @see getName
-     * @stable ICU 2.8
-     */
-    const char * getBaseName() const;
-
-
-    /**
-     * Gets the list of keywords for the specified locale.
-     *
-     * @return pointer to StringEnumeration class. Client must dispose of it by calling delete.
-     * @param status Returns any error information while performing this operation.
-     * @stable ICU 2.8
-     */
-    StringEnumeration * createKeywords(UErrorCode &status) const;
-
-    /**
-     * Get the value for a keyword.
-     *
-     * @param keywordName name of the keyword for which we want the value. Case insensitive.
-     * @param status Returns any error information while performing this operation.
-     * @param buffer The buffer to receive the keyword value.
-     * @param bufferCapacity The capacity of receiving buffer
-     * @return the length of keyword value
-     *
-     * @stable ICU 2.8
-     */
-    int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const;
-
-    /**
-     * returns the locale's three-letter language code, as specified
-     * in ISO draft standard ISO-639-2.
-     * @return      An alias to the code, or NULL
-     * @stable ICU 2.0
-     */
-    const char * getISO3Language() const;
-
-    /**
-     * Fills in "name" with the locale's three-letter ISO-3166 country code.
-     * @return      An alias to the code, or NULL
-     * @stable ICU 2.0
-     */
-    const char * getISO3Country() const;
-
-    /**
-     * Returns the Windows LCID value corresponding to this locale.
-     * This value is stored in the resource data for the locale as a one-to-four-digit
-     * hexadecimal number.  If the resource is missing, in the wrong format, or
-     * there is no Windows LCID value that corresponds to this locale, returns 0.
-     * @stable ICU 2.0
-     */
-    uint32_t        getLCID(void) const;
-
-    /**
-     * Fills in "dispLang" with the name of this locale's language in a format suitable for
-     * user display in the default locale.  For example, if the locale's language code is
-     * "fr" and the default locale's language code is "en", this function would set
-     * dispLang to "French".
-     * @param dispLang  Receives the language's display name.
-     * @return          A reference to "dispLang".
-     * @stable ICU 2.0
-     */
-    UnicodeString&  getDisplayLanguage(UnicodeString&   dispLang) const;
-
-    /**
-     * Fills in "dispLang" with the name of this locale's language in a format suitable for
-     * user display in the locale specified by "displayLocale".  For example, if the locale's
-     * language code is "en" and displayLocale's language code is "fr", this function would set
-     * dispLang to "Anglais".
-     * @param displayLocale  Specifies the locale to be used to display the name.  In other words,
-     *                  if the locale's language code is "en", passing Locale::getFrench() for
-     *                  displayLocale would result in "Anglais", while passing Locale::getGerman()
-     *                  for displayLocale would result in "Englisch".
-     * @param dispLang  Receives the language's display name.
-     * @return          A reference to "dispLang".
-     * @stable ICU 2.0
-     */
-    UnicodeString&  getDisplayLanguage( const   Locale&         displayLocale,
-                                                UnicodeString&  dispLang) const;
-
-    /**
-     * Fills in "dispScript" with the name of this locale's script in a format suitable
-     * for user display in the default locale.  For example, if the locale's script code
-     * is "LATN" and the default locale's language code is "en", this function would set
-     * dispScript to "Latin".
-     * @param dispScript    Receives the scripts's display name.
-     * @return              A reference to "dispScript".
-     * @stable ICU 2.8
-     */
-    UnicodeString&  getDisplayScript(          UnicodeString& dispScript) const;
-
-    /**
-     * Fills in "dispScript" with the name of this locale's country in a format suitable
-     * for user display in the locale specified by "displayLocale".  For example, if the locale's
-     * script code is "LATN" and displayLocale's language code is "en", this function would set
-     * dispScript to "Latin".
-     * @param displayLocale      Specifies the locale to be used to display the name.  In other
-     *                      words, if the locale's script code is "LATN", passing
-     *                      Locale::getFrench() for displayLocale would result in "", while
-     *                      passing Locale::getGerman() for displayLocale would result in
-     *                      "".
-     * @param dispScript    Receives the scripts's display name.
-     * @return              A reference to "dispScript".
-     * @stable ICU 2.8
-     */
-    UnicodeString&  getDisplayScript(  const   Locale&         displayLocale,
-                                               UnicodeString&  dispScript) const;
-
-    /**
-     * Fills in "dispCountry" with the name of this locale's country in a format suitable
-     * for user display in the default locale.  For example, if the locale's country code
-     * is "FR" and the default locale's language code is "en", this function would set
-     * dispCountry to "France".
-     * @param dispCountry   Receives the country's display name.
-     * @return              A reference to "dispCountry".
-     * @stable ICU 2.0
-     */
-    UnicodeString&  getDisplayCountry(          UnicodeString& dispCountry) const;
-
-    /**
-     * Fills in "dispCountry" with the name of this locale's country in a format suitable
-     * for user display in the locale specified by "displayLocale".  For example, if the locale's
-     * country code is "US" and displayLocale's language code is "fr", this function would set
-     * dispCountry to "&Eacute;tats-Unis".
-     * @param displayLocale      Specifies the locale to be used to display the name.  In other
-     *                      words, if the locale's country code is "US", passing
-     *                      Locale::getFrench() for displayLocale would result in "&Eacute;tats-Unis", while
-     *                      passing Locale::getGerman() for displayLocale would result in
-     *                      "Vereinigte Staaten".
-     * @param dispCountry   Receives the country's display name.
-     * @return              A reference to "dispCountry".
-     * @stable ICU 2.0
-     */
-    UnicodeString&  getDisplayCountry(  const   Locale&         displayLocale,
-                                                UnicodeString&  dispCountry) const;
-
-    /**
-     * Fills in "dispVar" with the name of this locale's variant code in a format suitable
-     * for user display in the default locale.
-     * @param dispVar   Receives the variant's name.
-     * @return          A reference to "dispVar".
-     * @stable ICU 2.0
-     */
-    UnicodeString&  getDisplayVariant(      UnicodeString& dispVar) const;
-
-    /**
-     * Fills in "dispVar" with the name of this locale's variant code in a format
-     * suitable for user display in the locale specified by "displayLocale".
-     * @param displayLocale  Specifies the locale to be used to display the name.
-     * @param dispVar   Receives the variant's display name.
-     * @return          A reference to "dispVar".
-     * @stable ICU 2.0
-     */
-    UnicodeString&  getDisplayVariant(  const   Locale&         displayLocale,
-                                                UnicodeString&  dispVar) const;
-
-    /**
-     * Fills in "name" with the name of this locale in a format suitable for user display
-     * in the default locale.  This function uses getDisplayLanguage(), getDisplayCountry(),
-     * and getDisplayVariant() to do its work, and outputs the display name in the format
-     * "language (country[,variant])".  For example, if the default locale is en_US, then
-     * fr_FR's display name would be "French (France)", and es_MX_Traditional's display name
-     * would be "Spanish (Mexico,Traditional)".
-     * @param name  Receives the locale's display name.
-     * @return      A reference to "name".
-     * @stable ICU 2.0
-     */
-    UnicodeString&  getDisplayName(         UnicodeString&  name) const;
-
-    /**
-     * Fills in "name" with the name of this locale in a format suitable for user display
-     * in the locale specfied by "displayLocale".  This function uses getDisplayLanguage(),
-     * getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
-     * name in the format "language (country[,variant])".  For example, if displayLocale is
-     * fr_FR, then en_US's display name would be "Anglais (&Eacute;tats-Unis)", and no_NO_NY's
-     * display name would be "norv&eacute;gien (Norv&egrave;ge,NY)".
-     * @param displayLocale  Specifies the locale to be used to display the name.
-     * @param name      Receives the locale's display name.
-     * @return          A reference to "name".
-     * @stable ICU 2.0
-     */
-    UnicodeString&  getDisplayName( const   Locale&         displayLocale,
-                                            UnicodeString&  name) const;
-
-    /**
-     * Generates a hash code for the locale.
-     * @stable ICU 2.0
-     */
-    int32_t         hashCode(void) const;
-
-    /**
-     * Sets the locale to bogus
-     * A bogus locale represents a non-existing locale associated
-     * with services that can be instantiated from non-locale data
-     * in addition to locale (for example, collation can be
-     * instantiated from a locale and from a rule set).
-     * @stable ICU 2.1
-     */
-    void setToBogus();
-
-    /**
-     * Gets the bogus state. Locale object can be bogus if it doesn't exist
-     * @return FALSE if it is a real locale, TRUE if it is a bogus locale
-     * @stable ICU 2.1
-     */
-    UBool isBogus(void) const;
-
-    /**
-     * Returns a list of all installed locales.
-     * @param count Receives the number of locales in the list.
-     * @return      A pointer to an array of Locale objects.  This array is the list
-     *              of all locales with installed resource files.  The called does NOT
-     *              get ownership of this list, and must NOT delete it.
-     * @stable ICU 2.0
-     */
-    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
-
-    /**
-     * Gets a list of all available 2-letter country codes defined in ISO 639.  This is a
-     * pointer to an array of pointers to arrays of char.  All of these pointers are
-     * owned by ICU-- do not delete them, and do not write through them.  The array is
-     * terminated with a null pointer.
-     * @return a list of all available country codes
-     * @stable ICU 2.0
-     */
-    static const char* const* U_EXPORT2 getISOCountries();
-
-    /**
-     * Gets a list of all available language codes defined in ISO 639.  This is a pointer
-     * to an array of pointers to arrays of char.  All of these pointers are owned
-     * by ICU-- do not delete them, and do not write through them.  The array is
-     * terminated with a null pointer.
-     * @return a list of all available language codes
-     * @stable ICU 2.0
-     */
-    static const char* const* U_EXPORT2 getISOLanguages();
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.2
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.2
-     */
-    virtual UClassID getDynamicClassID() const;
-
-protected: /* only protected for testing purposes. DO NOT USE. */
-    /**
-     * Set this from a single POSIX style locale string.
-     * @internal
-     */
-    void setFromPOSIXID(const char *posixID);
-
-private:
-    /**
-     * Initialize the locale object with a new name.
-     * Was deprecated - used in implementation - moved internal
-     *
-     * @param cLocaleID The new locale name.
-     */
-    Locale& init(const char* cLocaleID, UBool canonicalize);
-
-    /*
-     * Internal constructor to allow construction of a locale object with
-     *   NO side effects.   (Default constructor tries to get
-     *   the default locale.)
-     */
-    enum ELocaleType {
-        eBOGUS
-    };
-    Locale(ELocaleType);
-
-    /**
-     * Initialize the locale cache for commonly used locales
-     */
-    static Locale *getLocaleCache(void);
-
-    char language[ULOC_LANG_CAPACITY];
-    char script[ULOC_SCRIPT_CAPACITY];
-    char country[ULOC_COUNTRY_CAPACITY];
-    int32_t variantBegin;
-    char* fullName;
-    char fullNameBuffer[ULOC_FULLNAME_CAPACITY];
-    // name without keywords
-    char* baseName;
-    char baseNameBuffer[ULOC_FULLNAME_CAPACITY];
-
-    UBool fIsBogus;
-
-    static const Locale &getLocale(int locid);
-
-    /**
-     * A friend to allow the default locale to be set by either the C or C++ API.
-     * @internal
-     */
-    friend void locale_set_default_internal(const char *);
-};
-
-inline UBool
-Locale::operator!=(const    Locale&     other) const
-{
-    return !operator==(other);
-}
-
-inline const char *
-Locale::getCountry() const
-{
-    return country;
-}
-
-inline const char *
-Locale::getLanguage() const
-{
-    return language;
-}
-
-inline const char *
-Locale::getScript() const
-{
-    return script;
-}
-
-inline const char *
-Locale::getVariant() const
-{
-    return &fullName[variantBegin];
-}
-
-inline const char *
-Locale::getName() const
-{
-    return fullName;
-}
-
-inline UBool
-Locale::isBogus(void) const {
-    return fIsBogus;
-}
-
-U_NAMESPACE_END
-
-#endif
-

Copied: MacRuby/trunk/icu-1060/unicode/locid.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/locid.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/locid.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/locid.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,765 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1996-2006, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File locid.h
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   02/11/97    aliu        Changed gLocPath to fgLocPath and added methods to
+*                           get and set it.
+*   04/02/97    aliu        Made operator!= inline; fixed return value of getName().
+*   04/15/97    aliu        Cleanup for AIX/Win32.
+*   04/24/97    aliu        Numerous changes per code review.
+*   08/18/98    stephen     Added tokenizeString(),changed getDisplayName()
+*   09/08/98    stephen     Moved definition of kEmptyString for Mac Port
+*   11/09/99    weiv        Added const char * getName() const;
+*   04/12/00    srl         removing unicodestring api's and cached hash code
+*   08/10/01    grhoten     Change the static Locales to accessor functions
+******************************************************************************
+*/
+
+#ifndef LOCID_H
+#define LOCID_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/putil.h"
+#include "unicode/uloc.h"
+#include "unicode/strenum.h"
+
+/**
+ * \file
+ * \brief C++ API: Locale ID object.
+ */
+
+/**
+ * A <code>Locale</code> object represents a specific geographical, political,
+ * or cultural region. An operation that requires a <code>Locale</code> to perform
+ * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
+ * to tailor information for the user. For example, displaying a number
+ * is a locale-sensitive operation--the number should be formatted
+ * according to the customs/conventions of the user's native country,
+ * region, or culture.
+ *
+ * The Locale class is not suitable for subclassing.
+ *
+ * <P>
+ * You can create a <code>Locale</code> object using the constructor in
+ * this class:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ *       Locale( const   char*  language,
+ *               const   char*  country,
+ *               const   char*  variant);
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * The first argument to the constructors is a valid <STRONG>ISO
+ * Language Code.</STRONG> These codes are the lower-case two-letter
+ * codes as defined by ISO-639.
+ * You can find a full list of these codes at:
+ * <BR><a href ="http://www.loc.gov/standards/iso639-2/">
+ * http://www.loc.gov/standards/iso639-2/</a>
+ *
+ * <P>
+ * The second argument to the constructors is a valid <STRONG>ISO Country
+ * Code.</STRONG> These codes are the upper-case two-letter codes
+ * as defined by ISO-3166.
+ * You can find a full list of these codes at a number of sites, such as:
+ * <BR><a href="http://www.iso.org/iso/en/prods-services/iso3166ma/index.html">
+ * http://www.iso.org/iso/en/prods-services/iso3166ma/index.html</a>
+ *
+ * <P>
+ * The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
+ * The Variant codes are vendor and browser-specific.
+ * For example, use REVISED for a langauge's revised script orthography, and POSIX for POSIX.
+ * Where there are two variants, separate them with an underscore, and
+ * put the most important one first. For
+ * example, a Traditional Spanish collation might be referenced, with
+ * "ES", "ES", "Traditional_POSIX".
+ *
+ * <P>
+ * Because a <code>Locale</code> object is just an identifier for a region,
+ * no validity check is performed when you construct a <code>Locale</code>.
+ * If you want to see whether particular resources are available for the
+ * <code>Locale</code> you construct, you must query those resources. For
+ * example, ask the <code>NumberFormat</code> for the locales it supports
+ * using its <code>getAvailableLocales</code> method.
+ * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
+ * locale, you get back the best available match, not necessarily
+ * precisely what you asked for. For more information, look at
+ * <code>ResourceBundle</code>.
+ *
+ * <P>
+ * The <code>Locale</code> class provides a number of convenient constants
+ * that you can use to create <code>Locale</code> objects for commonly used
+ * locales. For example, the following refers to a <code>Locale</code> object
+ * for the United States:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ *       Locale::getUS()
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <P>
+ * Once you've created a <code>Locale</code> you can query it for information about
+ * itself. Use <code>getCountry</code> to get the ISO Country Code and
+ * <code>getLanguage</code> to get the ISO Language Code. You can
+ * use <code>getDisplayCountry</code> to get the
+ * name of the country suitable for displaying to the user. Similarly,
+ * you can use <code>getDisplayLanguage</code> to get the name of
+ * the language suitable for displaying to the user. Interestingly,
+ * the <code>getDisplayXXX</code> methods are themselves locale-sensitive
+ * and have two versions: one that uses the default locale and one
+ * that takes a locale as an argument and displays the name or country in
+ * a language appropriate to that locale.
+ *
+ * <P>
+ * ICU provides a number of classes that perform locale-sensitive
+ * operations. For example, the <code>NumberFormat</code> class formats
+ * numbers, currency, or percentages in a locale-sensitive manner. Classes
+ * such as <code>NumberFormat</code> have a number of convenience methods
+ * for creating a default object of that type. For example, the
+ * <code>NumberFormat</code> class provides these three convenience methods
+ * for creating a default <code>NumberFormat</code> object:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ *     UErrorCode success = U_ZERO_ERROR;
+ *     Locale myLocale;
+ *     NumberFormat *nf;
+ *
+ *     nf = NumberFormat::createInstance( success );          delete nf;
+ *     nf = NumberFormat::createCurrencyInstance( success );  delete nf;
+ *     nf = NumberFormat::createPercentInstance( success );   delete nf;
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * Each of these methods has two variants; one with an explicit locale
+ * and one without; the latter using the default locale.
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ *     nf = NumberFormat::createInstance( myLocale, success );          delete nf;
+ *     nf = NumberFormat::createCurrencyInstance( myLocale, success );  delete nf;
+ *     nf = NumberFormat::createPercentInstance( myLocale, success );   delete nf;
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * A <code>Locale</code> is the mechanism for identifying the kind of object
+ * (<code>NumberFormat</code>) that you would like to get. The locale is
+ * <STRONG>just</STRONG> a mechanism for identifying objects,
+ * <STRONG>not</STRONG> a container for the objects themselves.
+ *
+ * <P>
+ * Each class that performs locale-sensitive operations allows you
+ * to get all the available objects of that type. You can sift
+ * through these objects by language, country, or variant,
+ * and use the display names to present a menu to the user.
+ * For example, you can create a menu of all the collation objects
+ * suitable for a given language. Such classes implement these
+ * three class methods:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ *       static Locale* getAvailableLocales(int32_t& numLocales)
+ *       static UnicodeString& getDisplayName(const Locale&  objectLocale,
+ *                                            const Locale&  displayLocale,
+ *                                            UnicodeString& displayName)
+ *       static UnicodeString& getDisplayName(const Locale&  objectLocale,
+ *                                            UnicodeString& displayName)
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * @stable ICU 2.0
+ * @see ResourceBundle
+ */
+U_NAMESPACE_BEGIN
+class U_COMMON_API Locale : public UObject {
+public:
+    /** Useful constant for this language. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getEnglish(void);
+    /** Useful constant for this language. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getFrench(void);
+    /** Useful constant for this language. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getGerman(void);
+    /** Useful constant for this language. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getItalian(void);
+    /** Useful constant for this language. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getJapanese(void);
+    /** Useful constant for this language. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getKorean(void);
+    /** Useful constant for this language. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getChinese(void);
+    /** Useful constant for this language. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getSimplifiedChinese(void);
+    /** Useful constant for this language. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getTraditionalChinese(void);
+
+    /** Useful constant for this country/region. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getFrance(void);
+    /** Useful constant for this country/region. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getGermany(void);
+    /** Useful constant for this country/region. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getItaly(void);
+    /** Useful constant for this country/region. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getJapan(void);
+    /** Useful constant for this country/region. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getKorea(void);
+    /** Useful constant for this country/region. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getChina(void);
+    /** Useful constant for this country/region. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getPRC(void);
+    /** Useful constant for this country/region. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getTaiwan(void);
+    /** Useful constant for this country/region. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getUK(void);
+    /** Useful constant for this country/region. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getUS(void);
+    /** Useful constant for this country/region. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getCanada(void);
+    /** Useful constant for this country/region. @stable ICU 2.0 */
+    static const Locale &U_EXPORT2 getCanadaFrench(void);
+
+
+    /**
+     * Construct a default locale object, a Locale for the default locale ID.
+     *
+     * @see getDefault
+     * @see uloc_getDefault
+     * @stable ICU 2.0
+     */
+    Locale();
+
+    /**
+     * Construct a locale from language, country, variant.
+     * If an error occurs, then the constructed object will be "bogus"
+     * (isBogus() will return TRUE).
+     *
+     * @param language Lowercase two-letter or three-letter ISO-639 code.
+     *  This parameter can instead be an ICU style C locale (e.g. "en_US"),
+     *  but the other parameters must not be used.
+     *  This parameter can be NULL; if so,
+     *  the locale is initialized to match the current default locale.
+     *  (This is the same as using the default constructor.)
+     *  Please note: The Java Locale class does NOT accept the form
+     *  'new Locale("en_US")' but only 'new Locale("en","US")'
+     *
+     * @param country  Uppercase two-letter ISO-3166 code. (optional)
+     * @param variant  Uppercase vendor and browser specific code. See class
+     *                 description. (optional)
+     * @param keywordsAndValues A string consisting of keyword/values pairs, such as
+     *                 "collation=phonebook;currency=euro"
+     *
+     * @see getDefault
+     * @see uloc_getDefault
+     * @stable ICU 2.0
+     */
+    Locale( const   char * language,
+            const   char * country  = 0,
+            const   char * variant  = 0,
+            const   char * keywordsAndValues = 0);
+
+    /**
+     * Initializes a Locale object from another Locale object.
+     *
+     * @param other The Locale object being copied in.
+     * @stable ICU 2.0
+     */
+    Locale(const    Locale& other);
+
+
+    /**
+     * Destructor
+     * @stable ICU 2.0
+     */
+    virtual ~Locale() ;
+
+    /**
+     * Replaces the entire contents of *this with the specified value.
+     *
+     * @param other The Locale object being copied in.
+     * @return      *this
+     * @stable ICU 2.0
+     */
+    Locale& operator=(const Locale& other);
+
+    /**
+     * Checks if two locale keys are the same.
+     *
+     * @param other The locale key object to be compared with this.
+     * @return      True if the two locale keys are the same, false otherwise.
+     * @stable ICU 2.0
+     */
+    UBool   operator==(const    Locale&     other) const;
+
+    /**
+     * Checks if two locale keys are not the same.
+     *
+     * @param other The locale key object to be compared with this.
+     * @return      True if the two locale keys are not the same, false
+     *              otherwise.
+     * @stable ICU 2.0
+     */
+    UBool   operator!=(const    Locale&     other) const;
+
+    /**
+     * Clone this object.
+     * Clones can be used concurrently in multiple threads.
+     * If an error occurs, then NULL is returned.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @stable ICU 2.8
+     */
+    Locale *clone() const;
+
+    /**
+     * Common methods of getting the current default Locale. Used for the
+     * presentation: menus, dialogs, etc. Generally set once when your applet or
+     * application is initialized, then never reset. (If you do reset the
+     * default locale, you probably want to reload your GUI, so that the change
+     * is reflected in your interface.)
+     *
+     * More advanced programs will allow users to use different locales for
+     * different fields, e.g. in a spreadsheet.
+     *
+     * Note that the initial setting will match the host system.
+     * @return a reference to the Locale object for the default locale ID
+     * @system
+     * @stable ICU 2.0
+     */
+    static const Locale& U_EXPORT2 getDefault(void);
+
+    /**
+     * Sets the default. Normally set once at the beginning of a process,
+     * then never reset.
+     * setDefault() only changes ICU's default locale ID, <strong>not</strong>
+     * the default locale ID of the runtime environment.
+     *
+     * @param newLocale Locale to set to.  If NULL, set to the value obtained
+     *                  from the runtime environement.
+     * @param success The error code.
+     * @system
+     * @stable ICU 2.0
+     */
+    static void U_EXPORT2 setDefault(const Locale& newLocale,
+                                     UErrorCode&   success);
+
+    /**
+     * Creates a locale which has had minimal canonicalization
+     * as per uloc_getName().
+     * @param name The name to create from.  If name is null,
+     *  the default Locale is used.
+     * @return new locale object
+     * @stable ICU 2.0
+     * @see uloc_getName
+     */
+    static Locale U_EXPORT2 createFromName(const char *name);
+
+    /**
+     * Creates a locale from the given string after canonicalizing
+     * the string by calling uloc_canonicalize().
+     * @param name the locale ID to create from.  Must not be NULL.
+     * @return a new locale object corresponding to the given name
+     * @stable ICU 3.0
+     * @see uloc_canonicalize
+     */
+    static Locale U_EXPORT2 createCanonical(const char* name);
+
+    /**
+     * Returns the locale's ISO-639 language code.
+     * @return      An alias to the code
+     * @stable ICU 2.0
+     */
+    inline const char *  getLanguage( ) const;
+
+    /**
+     * Returns the locale's ISO-15924 abbreviation script code.
+     * @return      An alias to the code
+     * @see uscript_getShortName
+     * @see uscript_getCode
+     * @stable ICU 2.8
+     */
+    inline const char *  getScript( ) const;
+
+    /**
+     * Returns the locale's ISO-3166 country code.
+     * @return      An alias to the code
+     * @stable ICU 2.0
+     */
+    inline const char *  getCountry( ) const;
+
+    /**
+     * Returns the locale's variant code.
+     * @return      An alias to the code
+     * @stable ICU 2.0
+     */
+    inline const char *  getVariant( ) const;
+
+    /**
+     * Returns the programmatic name of the entire locale, with the language,
+     * country and variant separated by underbars. If a field is missing, up
+     * to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN",
+     * "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO"
+     * @return      A pointer to "name".
+     * @stable ICU 2.0
+     */
+    inline const char * getName() const;
+
+    /**
+     * Returns the programmatic name of the entire locale as getName would return,
+     * but without keywords.
+     * @return      A pointer to "name".
+     * @see getName
+     * @stable ICU 2.8
+     */
+    const char * getBaseName() const;
+
+
+    /**
+     * Gets the list of keywords for the specified locale.
+     *
+     * @return pointer to StringEnumeration class. Client must dispose of it by calling delete.
+     * @param status Returns any error information while performing this operation.
+     * @stable ICU 2.8
+     */
+    StringEnumeration * createKeywords(UErrorCode &status) const;
+
+    /**
+     * Get the value for a keyword.
+     *
+     * @param keywordName name of the keyword for which we want the value. Case insensitive.
+     * @param status Returns any error information while performing this operation.
+     * @param buffer The buffer to receive the keyword value.
+     * @param bufferCapacity The capacity of receiving buffer
+     * @return the length of keyword value
+     *
+     * @stable ICU 2.8
+     */
+    int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const;
+
+    /**
+     * returns the locale's three-letter language code, as specified
+     * in ISO draft standard ISO-639-2.
+     * @return      An alias to the code, or NULL
+     * @stable ICU 2.0
+     */
+    const char * getISO3Language() const;
+
+    /**
+     * Fills in "name" with the locale's three-letter ISO-3166 country code.
+     * @return      An alias to the code, or NULL
+     * @stable ICU 2.0
+     */
+    const char * getISO3Country() const;
+
+    /**
+     * Returns the Windows LCID value corresponding to this locale.
+     * This value is stored in the resource data for the locale as a one-to-four-digit
+     * hexadecimal number.  If the resource is missing, in the wrong format, or
+     * there is no Windows LCID value that corresponds to this locale, returns 0.
+     * @stable ICU 2.0
+     */
+    uint32_t        getLCID(void) const;
+
+    /**
+     * Fills in "dispLang" with the name of this locale's language in a format suitable for
+     * user display in the default locale.  For example, if the locale's language code is
+     * "fr" and the default locale's language code is "en", this function would set
+     * dispLang to "French".
+     * @param dispLang  Receives the language's display name.
+     * @return          A reference to "dispLang".
+     * @stable ICU 2.0
+     */
+    UnicodeString&  getDisplayLanguage(UnicodeString&   dispLang) const;
+
+    /**
+     * Fills in "dispLang" with the name of this locale's language in a format suitable for
+     * user display in the locale specified by "displayLocale".  For example, if the locale's
+     * language code is "en" and displayLocale's language code is "fr", this function would set
+     * dispLang to "Anglais".
+     * @param displayLocale  Specifies the locale to be used to display the name.  In other words,
+     *                  if the locale's language code is "en", passing Locale::getFrench() for
+     *                  displayLocale would result in "Anglais", while passing Locale::getGerman()
+     *                  for displayLocale would result in "Englisch".
+     * @param dispLang  Receives the language's display name.
+     * @return          A reference to "dispLang".
+     * @stable ICU 2.0
+     */
+    UnicodeString&  getDisplayLanguage( const   Locale&         displayLocale,
+                                                UnicodeString&  dispLang) const;
+
+    /**
+     * Fills in "dispScript" with the name of this locale's script in a format suitable
+     * for user display in the default locale.  For example, if the locale's script code
+     * is "LATN" and the default locale's language code is "en", this function would set
+     * dispScript to "Latin".
+     * @param dispScript    Receives the scripts's display name.
+     * @return              A reference to "dispScript".
+     * @stable ICU 2.8
+     */
+    UnicodeString&  getDisplayScript(          UnicodeString& dispScript) const;
+
+    /**
+     * Fills in "dispScript" with the name of this locale's country in a format suitable
+     * for user display in the locale specified by "displayLocale".  For example, if the locale's
+     * script code is "LATN" and displayLocale's language code is "en", this function would set
+     * dispScript to "Latin".
+     * @param displayLocale      Specifies the locale to be used to display the name.  In other
+     *                      words, if the locale's script code is "LATN", passing
+     *                      Locale::getFrench() for displayLocale would result in "", while
+     *                      passing Locale::getGerman() for displayLocale would result in
+     *                      "".
+     * @param dispScript    Receives the scripts's display name.
+     * @return              A reference to "dispScript".
+     * @stable ICU 2.8
+     */
+    UnicodeString&  getDisplayScript(  const   Locale&         displayLocale,
+                                               UnicodeString&  dispScript) const;
+
+    /**
+     * Fills in "dispCountry" with the name of this locale's country in a format suitable
+     * for user display in the default locale.  For example, if the locale's country code
+     * is "FR" and the default locale's language code is "en", this function would set
+     * dispCountry to "France".
+     * @param dispCountry   Receives the country's display name.
+     * @return              A reference to "dispCountry".
+     * @stable ICU 2.0
+     */
+    UnicodeString&  getDisplayCountry(          UnicodeString& dispCountry) const;
+
+    /**
+     * Fills in "dispCountry" with the name of this locale's country in a format suitable
+     * for user display in the locale specified by "displayLocale".  For example, if the locale's
+     * country code is "US" and displayLocale's language code is "fr", this function would set
+     * dispCountry to "&Eacute;tats-Unis".
+     * @param displayLocale      Specifies the locale to be used to display the name.  In other
+     *                      words, if the locale's country code is "US", passing
+     *                      Locale::getFrench() for displayLocale would result in "&Eacute;tats-Unis", while
+     *                      passing Locale::getGerman() for displayLocale would result in
+     *                      "Vereinigte Staaten".
+     * @param dispCountry   Receives the country's display name.
+     * @return              A reference to "dispCountry".
+     * @stable ICU 2.0
+     */
+    UnicodeString&  getDisplayCountry(  const   Locale&         displayLocale,
+                                                UnicodeString&  dispCountry) const;
+
+    /**
+     * Fills in "dispVar" with the name of this locale's variant code in a format suitable
+     * for user display in the default locale.
+     * @param dispVar   Receives the variant's name.
+     * @return          A reference to "dispVar".
+     * @stable ICU 2.0
+     */
+    UnicodeString&  getDisplayVariant(      UnicodeString& dispVar) const;
+
+    /**
+     * Fills in "dispVar" with the name of this locale's variant code in a format
+     * suitable for user display in the locale specified by "displayLocale".
+     * @param displayLocale  Specifies the locale to be used to display the name.
+     * @param dispVar   Receives the variant's display name.
+     * @return          A reference to "dispVar".
+     * @stable ICU 2.0
+     */
+    UnicodeString&  getDisplayVariant(  const   Locale&         displayLocale,
+                                                UnicodeString&  dispVar) const;
+
+    /**
+     * Fills in "name" with the name of this locale in a format suitable for user display
+     * in the default locale.  This function uses getDisplayLanguage(), getDisplayCountry(),
+     * and getDisplayVariant() to do its work, and outputs the display name in the format
+     * "language (country[,variant])".  For example, if the default locale is en_US, then
+     * fr_FR's display name would be "French (France)", and es_MX_Traditional's display name
+     * would be "Spanish (Mexico,Traditional)".
+     * @param name  Receives the locale's display name.
+     * @return      A reference to "name".
+     * @stable ICU 2.0
+     */
+    UnicodeString&  getDisplayName(         UnicodeString&  name) const;
+
+    /**
+     * Fills in "name" with the name of this locale in a format suitable for user display
+     * in the locale specfied by "displayLocale".  This function uses getDisplayLanguage(),
+     * getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
+     * name in the format "language (country[,variant])".  For example, if displayLocale is
+     * fr_FR, then en_US's display name would be "Anglais (&Eacute;tats-Unis)", and no_NO_NY's
+     * display name would be "norv&eacute;gien (Norv&egrave;ge,NY)".
+     * @param displayLocale  Specifies the locale to be used to display the name.
+     * @param name      Receives the locale's display name.
+     * @return          A reference to "name".
+     * @stable ICU 2.0
+     */
+    UnicodeString&  getDisplayName( const   Locale&         displayLocale,
+                                            UnicodeString&  name) const;
+
+    /**
+     * Generates a hash code for the locale.
+     * @stable ICU 2.0
+     */
+    int32_t         hashCode(void) const;
+
+    /**
+     * Sets the locale to bogus
+     * A bogus locale represents a non-existing locale associated
+     * with services that can be instantiated from non-locale data
+     * in addition to locale (for example, collation can be
+     * instantiated from a locale and from a rule set).
+     * @stable ICU 2.1
+     */
+    void setToBogus();
+
+    /**
+     * Gets the bogus state. Locale object can be bogus if it doesn't exist
+     * @return FALSE if it is a real locale, TRUE if it is a bogus locale
+     * @stable ICU 2.1
+     */
+    UBool isBogus(void) const;
+
+    /**
+     * Returns a list of all installed locales.
+     * @param count Receives the number of locales in the list.
+     * @return      A pointer to an array of Locale objects.  This array is the list
+     *              of all locales with installed resource files.  The called does NOT
+     *              get ownership of this list, and must NOT delete it.
+     * @stable ICU 2.0
+     */
+    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+
+    /**
+     * Gets a list of all available 2-letter country codes defined in ISO 639.  This is a
+     * pointer to an array of pointers to arrays of char.  All of these pointers are
+     * owned by ICU-- do not delete them, and do not write through them.  The array is
+     * terminated with a null pointer.
+     * @return a list of all available country codes
+     * @stable ICU 2.0
+     */
+    static const char* const* U_EXPORT2 getISOCountries();
+
+    /**
+     * Gets a list of all available language codes defined in ISO 639.  This is a pointer
+     * to an array of pointers to arrays of char.  All of these pointers are owned
+     * by ICU-- do not delete them, and do not write through them.  The array is
+     * terminated with a null pointer.
+     * @return a list of all available language codes
+     * @stable ICU 2.0
+     */
+    static const char* const* U_EXPORT2 getISOLanguages();
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.2
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const;
+
+protected: /* only protected for testing purposes. DO NOT USE. */
+    /**
+     * Set this from a single POSIX style locale string.
+     * @internal
+     */
+    void setFromPOSIXID(const char *posixID);
+
+private:
+    /**
+     * Initialize the locale object with a new name.
+     * Was deprecated - used in implementation - moved internal
+     *
+     * @param cLocaleID The new locale name.
+     */
+    Locale& init(const char* cLocaleID, UBool canonicalize);
+
+    /*
+     * Internal constructor to allow construction of a locale object with
+     *   NO side effects.   (Default constructor tries to get
+     *   the default locale.)
+     */
+    enum ELocaleType {
+        eBOGUS
+    };
+    Locale(ELocaleType);
+
+    /**
+     * Initialize the locale cache for commonly used locales
+     */
+    static Locale *getLocaleCache(void);
+
+    char language[ULOC_LANG_CAPACITY];
+    char script[ULOC_SCRIPT_CAPACITY];
+    char country[ULOC_COUNTRY_CAPACITY];
+    int32_t variantBegin;
+    char* fullName;
+    char fullNameBuffer[ULOC_FULLNAME_CAPACITY];
+    // name without keywords
+    char* baseName;
+    char baseNameBuffer[ULOC_FULLNAME_CAPACITY];
+
+    UBool fIsBogus;
+
+    static const Locale &getLocale(int locid);
+
+    /**
+     * A friend to allow the default locale to be set by either the C or C++ API.
+     * @internal
+     */
+    friend void locale_set_default_internal(const char *);
+};
+
+inline UBool
+Locale::operator!=(const    Locale&     other) const
+{
+    return !operator==(other);
+}
+
+inline const char *
+Locale::getCountry() const
+{
+    return country;
+}
+
+inline const char *
+Locale::getLanguage() const
+{
+    return language;
+}
+
+inline const char *
+Locale::getScript() const
+{
+    return script;
+}
+
+inline const char *
+Locale::getVariant() const
+{
+    return &fullName[variantBegin];
+}
+
+inline const char *
+Locale::getName() const
+{
+    return fullName;
+}
+
+inline UBool
+Locale::isBogus(void) const {
+    return fIsBogus;
+}
+
+U_NAMESPACE_END
+
+#endif
+

Deleted: MacRuby/trunk/icu-1060/unicode/measfmt.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/measfmt.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/measfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,77 +0,0 @@
-/*
-**********************************************************************
-* Copyright (c) 2004-2006, International Business Machines
-* Corporation and others.  All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: April 20, 2004
-* Since: ICU 3.0
-**********************************************************************
-*/
-#ifndef MEASUREFORMAT_H
-#define MEASUREFORMAT_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/format.h"
-
-/**
- * \file 
- * \brief C++ API: Formatter for measure objects.
- */
-
-U_NAMESPACE_BEGIN
-
-/**
- * 
- * A formatter for measure objects.  This is an abstract base class.
- *
- * <p>To format or parse a measure object, first create a formatter
- * object using a MeasureFormat factory method.  Then use that
- * object's format and parse methods.
- *
- * <p>This is an abstract class.
- *
- * @see Format
- * @author Alan Liu
- * @stable ICU 3.0
- */
-class U_I18N_API MeasureFormat : public Format {
-
- public:
-
-    /**
-     * Return a formatter for CurrencyAmount objects in the given
-     * locale.
-     * @param locale desired locale
-     * @param ec input-output error code
-     * @return a formatter object, or NULL upon error
-     * @stable ICU 3.0
-     */
-    static MeasureFormat* U_EXPORT2 createCurrencyFormat(const Locale& locale,
-                                               UErrorCode& ec);
-
-    /**
-     * Return a formatter for CurrencyAmount objects in the default
-     * locale.
-     * @param ec input-output error code
-     * @return a formatter object, or NULL upon error
-     * @stable ICU 3.0
-     */
-    static MeasureFormat* U_EXPORT2 createCurrencyFormat(UErrorCode& ec);
-
- protected:
-
-    /**
-     * Default constructor.
-     * @stable ICU 3.0
-     */
-    MeasureFormat();
-};
-
-U_NAMESPACE_END
-
-#endif // #if !UCONFIG_NO_FORMATTING
-#endif // #ifndef MEASUREFORMAT_H

Copied: MacRuby/trunk/icu-1060/unicode/measfmt.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/measfmt.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/measfmt.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/measfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,77 @@
+/*
+**********************************************************************
+* Copyright (c) 2004-2006, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: April 20, 2004
+* Since: ICU 3.0
+**********************************************************************
+*/
+#ifndef MEASUREFORMAT_H
+#define MEASUREFORMAT_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/format.h"
+
+/**
+ * \file 
+ * \brief C++ API: Formatter for measure objects.
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * 
+ * A formatter for measure objects.  This is an abstract base class.
+ *
+ * <p>To format or parse a measure object, first create a formatter
+ * object using a MeasureFormat factory method.  Then use that
+ * object's format and parse methods.
+ *
+ * <p>This is an abstract class.
+ *
+ * @see Format
+ * @author Alan Liu
+ * @stable ICU 3.0
+ */
+class U_I18N_API MeasureFormat : public Format {
+
+ public:
+
+    /**
+     * Return a formatter for CurrencyAmount objects in the given
+     * locale.
+     * @param locale desired locale
+     * @param ec input-output error code
+     * @return a formatter object, or NULL upon error
+     * @stable ICU 3.0
+     */
+    static MeasureFormat* U_EXPORT2 createCurrencyFormat(const Locale& locale,
+                                               UErrorCode& ec);
+
+    /**
+     * Return a formatter for CurrencyAmount objects in the default
+     * locale.
+     * @param ec input-output error code
+     * @return a formatter object, or NULL upon error
+     * @stable ICU 3.0
+     */
+    static MeasureFormat* U_EXPORT2 createCurrencyFormat(UErrorCode& ec);
+
+ protected:
+
+    /**
+     * Default constructor.
+     * @stable ICU 3.0
+     */
+    MeasureFormat();
+};
+
+U_NAMESPACE_END
+
+#endif // #if !UCONFIG_NO_FORMATTING
+#endif // #ifndef MEASUREFORMAT_H

Deleted: MacRuby/trunk/icu-1060/unicode/measunit.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/measunit.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/measunit.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,71 +0,0 @@
-/*
-**********************************************************************
-* Copyright (c) 2004-2006, International Business Machines
-* Corporation and others.  All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: April 26, 2004
-* Since: ICU 3.0
-**********************************************************************
-*/
-#ifndef __MEASUREUNIT_H__
-#define __MEASUREUNIT_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/fmtable.h"
-
-/**
- * \file 
- * \brief C++ API: A unit for measuring a quantity.
- */
- 
-U_NAMESPACE_BEGIN
-
-/**
- * A unit such as length, mass, volume, currency, etc.  A unit is
- * coupled with a numeric amount to produce a Measure.
- *
- * <p>This is an abstract class.
- *
- * @author Alan Liu
- * @stable ICU 3.0
- */
-class U_I18N_API MeasureUnit: public UObject {
- public:
-    /**
-     * Return a polymorphic clone of this object.  The result will
-     * have the same class as returned by getDynamicClassID().
-     * @stable ICU 3.0
-     */
-    virtual UObject* clone() const = 0;
-
-    /**
-     * Destructor
-     * @stable ICU 3.0
-     */
-    virtual ~MeasureUnit();
-    
-    /**
-     * Equality operator.  Return true if this object is equal
-     * to the given object.
-     * @stable ICU 3.0
-     */
-    virtual UBool operator==(const UObject& other) const = 0;
-
- protected:
-    /**
-     * Default constructor.
-     * @stable ICU 3.0
-     */
-    MeasureUnit();
-};
-
-U_NAMESPACE_END
-
-// NOTE: There is no measunit.cpp. For implementation, see measure.cpp. [alan]
-
-#endif // !UCONFIG_NO_FORMATTING
-#endif // __MEASUREUNIT_H__

Copied: MacRuby/trunk/icu-1060/unicode/measunit.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/measunit.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/measunit.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/measunit.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,71 @@
+/*
+**********************************************************************
+* Copyright (c) 2004-2006, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: April 26, 2004
+* Since: ICU 3.0
+**********************************************************************
+*/
+#ifndef __MEASUREUNIT_H__
+#define __MEASUREUNIT_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/fmtable.h"
+
+/**
+ * \file 
+ * \brief C++ API: A unit for measuring a quantity.
+ */
+ 
+U_NAMESPACE_BEGIN
+
+/**
+ * A unit such as length, mass, volume, currency, etc.  A unit is
+ * coupled with a numeric amount to produce a Measure.
+ *
+ * <p>This is an abstract class.
+ *
+ * @author Alan Liu
+ * @stable ICU 3.0
+ */
+class U_I18N_API MeasureUnit: public UObject {
+ public:
+    /**
+     * Return a polymorphic clone of this object.  The result will
+     * have the same class as returned by getDynamicClassID().
+     * @stable ICU 3.0
+     */
+    virtual UObject* clone() const = 0;
+
+    /**
+     * Destructor
+     * @stable ICU 3.0
+     */
+    virtual ~MeasureUnit();
+    
+    /**
+     * Equality operator.  Return true if this object is equal
+     * to the given object.
+     * @stable ICU 3.0
+     */
+    virtual UBool operator==(const UObject& other) const = 0;
+
+ protected:
+    /**
+     * Default constructor.
+     * @stable ICU 3.0
+     */
+    MeasureUnit();
+};
+
+U_NAMESPACE_END
+
+// NOTE: There is no measunit.cpp. For implementation, see measure.cpp. [alan]
+
+#endif // !UCONFIG_NO_FORMATTING
+#endif // __MEASUREUNIT_H__

Deleted: MacRuby/trunk/icu-1060/unicode/measure.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/measure.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/measure.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,137 +0,0 @@
-/*
-**********************************************************************
-* Copyright (c) 2004-2006, International Business Machines
-* Corporation and others.  All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: April 26, 2004
-* Since: ICU 3.0
-**********************************************************************
-*/
-#ifndef __MEASURE_H__
-#define __MEASURE_H__
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: MeasureUnit object.
- */
- 
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/fmtable.h"
-
-U_NAMESPACE_BEGIN
-
-class MeasureUnit;
-
-/**
- * An amount of a specified unit, consisting of a number and a Unit.
- * For example, a length measure consists of a number and a length
- * unit, such as feet or meters.  This is an abstract class.
- * Subclasses specify a concrete Unit type.
- *
- * <p>Measure objects are parsed and formatted by subclasses of
- * MeasureFormat.
- *
- * <p>Measure objects are immutable.
- *
- * <p>This is an abstract class.
- *
- * @author Alan Liu
- * @stable ICU 3.0
- */
-class U_I18N_API Measure: public UObject {
- public:
-    /**
-     * Construct an object with the given numeric amount and the given
-     * unit.  After this call, the caller must not delete the given
-     * unit object.
-     * @param number a numeric object; amount.isNumeric() must be TRUE
-     * @param adoptedUnit the unit object, which must not be NULL
-     * @param ec input-output error code. If the amount or the unit
-     * is invalid, then this will be set to a failing value.
-     * @stable ICU 3.0
-     */
-    Measure(const Formattable& number, MeasureUnit* adoptedUnit,
-            UErrorCode& ec);
-
-    /**
-     * Copy constructor
-     * @stable ICU 3.0
-     */
-    Measure(const Measure& other);
-
-    /**
-     * Assignment operator
-     * @stable ICU 3.0
-     */
-    Measure& operator=(const Measure& other);
-
-    /**
-     * Return a polymorphic clone of this object.  The result will
-     * have the same class as returned by getDynamicClassID().
-     * @stable ICU 3.0
-     */
-    virtual UObject* clone() const = 0;
-
-    /**
-     * Destructor
-     * @stable ICU 3.0
-     */
-    virtual ~Measure();
-    
-    /**
-     * Equality operator.  Return true if this object is equal
-     * to the given object.
-     * @stable ICU 3.0
-     */
-    UBool operator==(const UObject& other) const;
-
-    /**
-     * Return a reference to the numeric value of this object.  The
-     * numeric value may be of any numeric type supported by
-     * Formattable.
-     * @stable ICU 3.0
-     */
-    inline const Formattable& getNumber() const;
-
-    /**
-     * Return a reference to the unit of this object.
-     * @stable ICU 3.0
-     */
-    inline const MeasureUnit& getUnit() const;
-
- protected:
-    /**
-     * Default constructor.
-     * @stable ICU 3.0
-     */
-    Measure();
-
- private:
-    /**
-     * The numeric value of this object, e.g. 2.54 or 100.
-     */
-    Formattable number;
-
-    /**
-     * The unit of this object, e.g., "millimeter" or "JPY".  This is
-     * owned by this object.
-     */
-    MeasureUnit* unit;
-};
-
-inline const Formattable& Measure::getNumber() const {
-    return number;
-}
-
-inline const MeasureUnit& Measure::getUnit() const {
-    return *unit;
-}
-
-U_NAMESPACE_END
-
-#endif // !UCONFIG_NO_FORMATTING
-#endif // __MEASURE_H__

Copied: MacRuby/trunk/icu-1060/unicode/measure.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/measure.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/measure.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/measure.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,137 @@
+/*
+**********************************************************************
+* Copyright (c) 2004-2006, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: April 26, 2004
+* Since: ICU 3.0
+**********************************************************************
+*/
+#ifndef __MEASURE_H__
+#define __MEASURE_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: MeasureUnit object.
+ */
+ 
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/fmtable.h"
+
+U_NAMESPACE_BEGIN
+
+class MeasureUnit;
+
+/**
+ * An amount of a specified unit, consisting of a number and a Unit.
+ * For example, a length measure consists of a number and a length
+ * unit, such as feet or meters.  This is an abstract class.
+ * Subclasses specify a concrete Unit type.
+ *
+ * <p>Measure objects are parsed and formatted by subclasses of
+ * MeasureFormat.
+ *
+ * <p>Measure objects are immutable.
+ *
+ * <p>This is an abstract class.
+ *
+ * @author Alan Liu
+ * @stable ICU 3.0
+ */
+class U_I18N_API Measure: public UObject {
+ public:
+    /**
+     * Construct an object with the given numeric amount and the given
+     * unit.  After this call, the caller must not delete the given
+     * unit object.
+     * @param number a numeric object; amount.isNumeric() must be TRUE
+     * @param adoptedUnit the unit object, which must not be NULL
+     * @param ec input-output error code. If the amount or the unit
+     * is invalid, then this will be set to a failing value.
+     * @stable ICU 3.0
+     */
+    Measure(const Formattable& number, MeasureUnit* adoptedUnit,
+            UErrorCode& ec);
+
+    /**
+     * Copy constructor
+     * @stable ICU 3.0
+     */
+    Measure(const Measure& other);
+
+    /**
+     * Assignment operator
+     * @stable ICU 3.0
+     */
+    Measure& operator=(const Measure& other);
+
+    /**
+     * Return a polymorphic clone of this object.  The result will
+     * have the same class as returned by getDynamicClassID().
+     * @stable ICU 3.0
+     */
+    virtual UObject* clone() const = 0;
+
+    /**
+     * Destructor
+     * @stable ICU 3.0
+     */
+    virtual ~Measure();
+    
+    /**
+     * Equality operator.  Return true if this object is equal
+     * to the given object.
+     * @stable ICU 3.0
+     */
+    UBool operator==(const UObject& other) const;
+
+    /**
+     * Return a reference to the numeric value of this object.  The
+     * numeric value may be of any numeric type supported by
+     * Formattable.
+     * @stable ICU 3.0
+     */
+    inline const Formattable& getNumber() const;
+
+    /**
+     * Return a reference to the unit of this object.
+     * @stable ICU 3.0
+     */
+    inline const MeasureUnit& getUnit() const;
+
+ protected:
+    /**
+     * Default constructor.
+     * @stable ICU 3.0
+     */
+    Measure();
+
+ private:
+    /**
+     * The numeric value of this object, e.g. 2.54 or 100.
+     */
+    Formattable number;
+
+    /**
+     * The unit of this object, e.g., "millimeter" or "JPY".  This is
+     * owned by this object.
+     */
+    MeasureUnit* unit;
+};
+
+inline const Formattable& Measure::getNumber() const {
+    return number;
+}
+
+inline const MeasureUnit& Measure::getUnit() const {
+    return *unit;
+}
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_FORMATTING
+#endif // __MEASURE_H__

Deleted: MacRuby/trunk/icu-1060/unicode/msgfmt.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/msgfmt.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/msgfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,940 +0,0 @@
-/*
-* Copyright (C) 2007-2008, International Business Machines Corporation and others. All Rights Reserved.
-********************************************************************************
-*
-* File MSGFMT.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   02/19/97    aliu        Converted from java.
-*   03/20/97    helena      Finished first cut of implementation.
-*   07/22/98    stephen     Removed operator!= (defined in Format)
-*   08/19/2002  srl         Removing Javaisms
-********************************************************************************
-*/
-
-#ifndef MSGFMT_H
-#define MSGFMT_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Formats messages in a language-neutral way.
- */
- 
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/format.h"
-#include "unicode/locid.h"
-#include "unicode/parseerr.h"
-#include "unicode/uchar.h"
-
-U_NAMESPACE_BEGIN
-
-class NumberFormat;
-class DateFormat;
-
-/**
- *
- * A MessageFormat produces concatenated messages in a
- * language-neutral way.  It should be used for all string
- * concatenations that are visible to end users.
- * <P>
- * A MessageFormat contains an array of <EM>subformats</EM> arranged
- * within a <EM>template string</EM>.  Together, the subformats and
- * template string determine how the MessageFormat will operate during
- * formatting and parsing.
- * <P>
- * Typically, both the subformats and the template string are
- * specified at once in a <EM>pattern</EM>.  By using different
- * patterns for different locales, messages may be localized.
- * <P>
- * During formatting, the MessageFormat takes an array of arguments
- * and produces a user-readable string.  Each argument is a
- * Formattable object; they may be passed in in an array, or as a
- * single Formattable object which itself contains an array.  Each
- * argument is matched up with its corresponding subformat, which then
- * formats it into a string.  The resultant strings are then assembled
- * within the string template of the MessageFormat to produce the
- * final output string.
- * <p>
- * <strong>Note:</strong>
- * In ICU 4.0 MessageFormat supports named arguments.  If a named argument
- * is used, all arguments must be named.  Names start with a character in 
- * <code>UCHAR_ID_START</code> and continue with characters in 
- * <code>UCHARID_CONTINUE</code>, in particular they do not start with a digit.
- * If named arguments are used, {@link #usesNamedArguments()} will return true.
- * <p>
- * The other new methods supporting named arguments are 
- * {@link #getFormatNames(UErrorCode& status)},
- * {@link #getFormat(const UnicodeString& formatName, UErrorCode& status)}
- * {@link #setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status)},
- * {@link #adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status)},
- * {@link #format(const Formattable* arguments, const UnicodeString *argumentNames, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection, UErrorCode& success)},
- * {@link #format(const UnicodeString* argumentNames, const Formattable* arguments, int32_t count, UnicodeString& appendTo,UErrorCode& status)}.
- * These methods are all compatible with patterns that do not used named arguments-- 
- * in these cases the keys in the input or output use <code>UnicodeString</code>s 
- * that name the argument indices, e.g. "0", "1", "2"... etc.
- * <p>
- * When named arguments are used, certain methods on MessageFormat that take or
- * return arrays do not perform any action, since it is not possible to
- * identify positions in an array using a name.  UErrorCode is set to
- * U_ARGUMENT_TYPE_MISMATCH if there is a status/success field in the method.
- * These methods are 
- * {@link #adoptFormats(Format** newFormats, int32_t count)},
- * {@link #setFormats(const Format** newFormats,int32_t count)},
- * {@link #adoptFormat(int32_t n, Format *newFormat)},
- * {@link #getFormats(int32_t& cnt)},
- * {@link #format(const Formattable* source,int32_t cnt,UnicodeString& appendTo, FieldPosition& ignore, UErrorCode& success)},
- * {@link #format(const UnicodeString& pattern,const Formattable* arguments,int32_t cnt,UnicodeString& appendTo,UErrorCode& success)},
- * {@link #format(const Formattable& source, UnicodeString& appendTo,FieldPosition& ignore, UErrorCode& success)},
- * {@link #format(const Formattable* arguments, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection,UErrorCode& success)},
- * {@link #parse(const UnicodeString& source, ParsePosition& pos,int32_t& count)},
- * {@link #parse(const UnicodeString& source, int32_t& cnt, UErrorCode& status)}
- * <p>
- *
- * <P>
- * During parsing, an input string is matched against the string
- * template of the MessageFormat to produce an array of Formattable
- * objects.  Plain text of the template string is matched directly
- * against intput text.  At each position in the template string where
- * a subformat is located, the subformat is called to parse the
- * corresponding segment of input text to produce an output argument.
- * In this way, an array of arguments is created which together
- * constitute the parse result.
- * <P>
- * Parsing may fail or produce unexpected results in a number of
- * circumstances.
- * <UL>
- * <LI>If one of the arguments does not occur in the pattern, it
- * will be returned as a default Formattable.
- * <LI>If the format of an argument is loses information, such as with
- * a choice format where a large number formats to "many", then the
- * parse may not correspond to the originally formatted argument.
- * <LI>MessageFormat does not handle ChoiceFormat recursion during
- * parsing; such parses will fail.
- * <LI>Parsing will not always find a match (or the correct match) if
- * some part of the parse is ambiguous.  For example, if the pattern
- * "{1},{2}" is used with the string arguments {"a,b", "c"}, it will
- * format as "a,b,c".  When the result is parsed, it will return {"a",
- * "b,c"}.
- * <LI>If a single argument is formatted more than once in the string,
- * then the rightmost subformat in the pattern string will produce the
- * parse result; prior subformats with the same argument index will
- * have no effect.
- * </UL>
- * Here are some examples of usage:
- * <P>
- * Example 1:
- * <pre>
- * \code
- *     UErrorCode success = U_ZERO_ERROR;
- *     GregorianCalendar cal(success);
- *     Formattable arguments[] = {
- *         7L,
- *         Formattable( (Date) cal.getTime(success), Formattable::kIsDate),
- *         "a disturbance in the Force"
- *     };
- *
- *     UnicodeString result;
- *     MessageFormat::format(
- *          "At {1,time} on {1,date}, there was {2} on planet {0,number}.",
- *          arguments, 3, result, success );
- *
- *     cout << "result: " << result << endl;
- *     //<output>: At 4:34:20 PM on 23-Mar-98, there was a disturbance
- *     //             in the Force on planet 7.
- * \endcode
- * </pre>
- * Typically, the message format will come from resources, and the
- * arguments will be dynamically set at runtime.
- * <P>
- * Example 2:
- * <pre>
- *  \code
- *     success = U_ZERO_ERROR;
- *     Formattable testArgs[] = {3L, "MyDisk"};
- *
- *     MessageFormat form(
- *         "The disk \"{1}\" contains {0} file(s).", success );
- *
- *     UnicodeString string;
- *     FieldPosition fpos = 0;
- *     cout << "format: " << form.format(testArgs, 2, string, fpos, success ) << endl;
- *
- *     // output, with different testArgs:
- *     // output: The disk "MyDisk" contains 0 file(s).
- *     // output: The disk "MyDisk" contains 1 file(s).
- *     // output: The disk "MyDisk" contains 1,273 file(s).
- *  \endcode
- *  </pre>
- *
- *  The pattern is of the following form.  Legend:
- *  <pre>
- * \code
- *       {optional item}
- *       (group that may be repeated)*
- * \endcode
- *  </pre>
- *  Do not confuse optional items with items inside quotes braces, such
- *  as this: "{".  Quoted braces are literals.
- *  <pre>
- *  \code
- *       messageFormatPattern := string ( "{" messageFormatElement "}" string )*
- *
- *       messageFormatElement := argumentIndex | argumentName { "," elementFormat }
- *
- *       elementFormat := "time" { "," datetimeStyle }
- *                      | "date" { "," datetimeStyle }
- *                      | "number" { "," numberStyle }
- *                      | "choice" "," choiceStyle
- *
- *       datetimeStyle := "short"
- *                      | "medium"
- *                      | "long"
- *                      | "full"
- *                      | dateFormatPattern
- *
- *       numberStyle :=   "currency"
- *                      | "percent"
- *                      | "integer"
- *                      | numberFormatPattern
- *
- *       choiceStyle :=   choiceFormatPattern
- * 
- *       pluralStyle := pluralFormatPattern
- * \endcode
- * </pre>
- * If there is no elementFormat, then the argument must be a string,
- * which is substituted. If there is no dateTimeStyle or numberStyle,
- * then the default format is used (e.g.  NumberFormat::createInstance(),
- * DateFormat::createTimeInstance(DateFormat::kDefault, ...) or DateFormat::createDateInstance(DateFormat::kDefault, ...). For
- * a ChoiceFormat, the pattern must always be specified, since there
- * is no default.
- * <P>
- * In strings, single quotes can be used to quote syntax characters.
- * A literal single quote is represented by '', both within and outside
- * of single-quoted segments.  Inside a
- * messageFormatElement, quotes are <EM>not</EM> removed. For example,
- * {1,number,$'#',##} will produce a number format with the pound-sign
- * quoted, with a result such as: "$#31,45".
- * <P>
- * If a pattern is used, then unquoted braces in the pattern, if any,
- * must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab
- * {0'}' de" and "ab } de" are not.
- * <p>
- * <dl><dt><b>Warning:</b><dd>The rules for using quotes within message
- * format patterns unfortunately have shown to be somewhat confusing.
- * In particular, it isn't always obvious to localizers whether single
- * quotes need to be doubled or not. Make sure to inform localizers about
- * the rules, and tell them (for example, by using comments in resource
- * bundle source files) which strings will be processed by MessageFormat.
- * Note that localizers may need to use single quotes in translated
- * strings where the original version doesn't have them.
- * <br>Note also that the simplest way to avoid the problem is to
- * use the real apostrophe (single quote) character U+2019 (') for
- * human-readable text, and to use the ASCII apostrophe (U+0027 ' )
- * only in program syntax, like quoting in MessageFormat.
- * See the annotations for U+0027 Apostrophe in The Unicode Standard.</p>
- * </dl>
- * <P>
- * The argumentIndex is a non-negative integer, which corresponds to the
- * index of the arguments presented in an array to be formatted.  The
- * first argument has argumentIndex 0.
- * <P>
- * It is acceptable to have unused arguments in the array.  With missing
- * arguments or arguments that are not of the right class for the
- * specified format, a failing UErrorCode result is set.
- * <P>
- * For more sophisticated patterns, you can use a ChoiceFormat to get
- * output:
- * <pre>
- * \code
- *     UErrorCode success = U_ZERO_ERROR;
- *     MessageFormat* form("The disk \"{1}\" contains {0}.", success);
- *     double filelimits[] = {0,1,2};
- *     UnicodeString filepart[] = {"no files","one file","{0,number} files"};
- *     ChoiceFormat* fileform = new ChoiceFormat(filelimits, filepart, 3);
- *     form.setFormat(1, *fileform); // NOT zero, see below
- *
- *     Formattable testArgs[] = {1273L, "MyDisk"};
- *
- *     UnicodeString string;
- *     FieldPosition fpos = 0;
- *     cout << form.format(testArgs, 2, string, fpos, success) << endl;
- *
- *     // output, with different testArgs
- *     // output: The disk "MyDisk" contains no files.
- *     // output: The disk "MyDisk" contains one file.
- *     // output: The disk "MyDisk" contains 1,273 files.
- * \endcode
- * </pre>
- * You can either do this programmatically, as in the above example,
- * or by using a pattern (see ChoiceFormat for more information) as in:
- * <pre>
- * \code
- *    form.applyPattern(
- *      "There {0,choice,0#are no files|1#is one file|1<are {0,number,integer} files}.");
- * \endcode
- * </pre>
- * <P>
- * <EM>Note:</EM> As we see above, the string produced by a ChoiceFormat in
- * MessageFormat is treated specially; occurences of '{' are used to
- * indicated subformats, and cause recursion.  If you create both a
- * MessageFormat and ChoiceFormat programmatically (instead of using
- * the string patterns), then be careful not to produce a format that
- * recurses on itself, which will cause an infinite loop.
- * <P>
- * <EM>Note:</EM> Subformats are numbered by their order in the pattern.
- * This is <EM>not</EM> the same as the argumentIndex.
- * <pre>
- * \code
- *    For example: with "abc{2}def{3}ghi{0}...",
- *
- *    format0 affects the first variable {2}
- *    format1 affects the second variable {3}
- *    format2 affects the second variable {0}
- * \endcode
- * </pre>
- *
- * <p><em>User subclasses are not supported.</em> While clients may write
- * subclasses, such code will not necessarily work and will not be
- * guaranteed to work stably from release to release.
- */
-class U_I18N_API MessageFormat : public Format {
-public:
-    /**
-     * Enum type for kMaxFormat.
-     * @obsolete ICU 3.0.  The 10-argument limit was removed as of ICU 2.6,
-     * rendering this enum type obsolete.
-     */
-    enum EFormatNumber {
-        /**
-         * The maximum number of arguments.
-         * @obsolete ICU 3.0.  The 10-argument limit was removed as of ICU 2.6,
-         * rendering this constant obsolete.
-         */
-        kMaxFormat = 10
-    };
-
-    /**
-     * Constructs a new MessageFormat using the given pattern and the
-     * default locale.
-     *
-     * @param pattern   Pattern used to construct object.
-     * @param status    Input/output error code.  If the
-     *                  pattern cannot be parsed, set to failure code.
-     * @stable ICU 2.0
-     */
-    MessageFormat(const UnicodeString& pattern,
-                  UErrorCode &status);
-
-    /**
-     * Constructs a new MessageFormat using the given pattern and locale.
-     * @param pattern   Pattern used to construct object.
-     * @param newLocale The locale to use for formatting dates and numbers.
-     * @param status    Input/output error code.  If the
-     *                  pattern cannot be parsed, set to failure code.
-     * @stable ICU 2.0
-     */
-    MessageFormat(const UnicodeString& pattern,
-                  const Locale& newLocale,
-                        UErrorCode& status);
-    /**
-     * Constructs a new MessageFormat using the given pattern and locale.
-     * @param pattern   Pattern used to construct object.
-     * @param newLocale The locale to use for formatting dates and numbers.
-     * @param parseError Struct to recieve information on position 
-     *                   of error within the pattern.
-     * @param status    Input/output error code.  If the
-     *                  pattern cannot be parsed, set to failure code.
-     * @stable ICU 2.0
-     */
-    MessageFormat(const UnicodeString& pattern,
-                  const Locale& newLocale,
-                  UParseError& parseError,
-                  UErrorCode& status);
-    /**
-     * Constructs a new MessageFormat from an existing one.
-     * @stable ICU 2.0
-     */
-    MessageFormat(const MessageFormat&);
-
-    /**
-     * Assignment operator.
-     * @stable ICU 2.0
-     */
-    const MessageFormat& operator=(const MessageFormat&);
-
-    /**
-     * Destructor.
-     * @stable ICU 2.0
-     */
-    virtual ~MessageFormat();
-
-    /**
-     * Clones this Format object polymorphically.  The caller owns the
-     * result and should delete it when done.
-     * @stable ICU 2.0
-     */
-    virtual Format* clone(void) const;
-
-    /**
-     * Returns true if the given Format objects are semantically equal.
-     * Objects of different subclasses are considered unequal.
-     * @param other  the object to be compared with.
-     * @return       true if the given Format objects are semantically equal.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const Format& other) const;
-
-    /**
-     * Sets the locale. This locale is used for fetching default number or date
-     * format information.
-     * @param theLocale    the new locale value to be set.
-     * @stable ICU 2.0
-     */
-    virtual void setLocale(const Locale& theLocale);
-
-    /**
-     * Gets the locale. This locale is used for fetching default number or date
-     * format information.
-     * @return    the locale of the object.
-     * @stable ICU 2.0
-     */
-    virtual const Locale& getLocale(void) const;
-
-    /**
-     * Applies the given pattern string to this message format.
-     *
-     * @param pattern   The pattern to be applied.
-     * @param status    Input/output error code.  If the
-     *                  pattern cannot be parsed, set to failure code.
-     * @stable ICU 2.0
-     */
-    virtual void applyPattern(const UnicodeString& pattern,
-                              UErrorCode& status);
-    /**
-     * Applies the given pattern string to this message format.
-     *
-     * @param pattern    The pattern to be applied.
-     * @param parseError Struct to recieve information on position 
-     *                   of error within pattern.
-     * @param status    Input/output error code.  If the
-     *                  pattern cannot be parsed, set to failure code.
-     * @stable ICU 2.0
-     */
-    virtual void applyPattern(const UnicodeString& pattern,
-                             UParseError& parseError,
-                             UErrorCode& status);
-
-    /**
-     * Returns a pattern that can be used to recreate this object.
-     *
-     * @param appendTo  Output parameter to receive the pattern.
-     *                  Result is appended to existing contents.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& toPattern(UnicodeString& appendTo) const;
-
-    /**
-     * Sets subformats.
-     * See the class description about format numbering.
-     * The caller should not delete the Format objects after this call.
-     * <EM>The array formatsToAdopt is not itself adopted.</EM> Its
-     * ownership is retained by the caller. If the call fails because
-     * memory cannot be allocated, then the formats will be deleted
-     * by this method, and this object will remain unchanged.
-     * 
-     * @stable ICU 2.0
-     * @param formatsToAdopt    the format to be adopted.
-     * @param count             the size of the array.
-     */
-    virtual void adoptFormats(Format** formatsToAdopt, int32_t count);
-
-    /**
-     * Sets subformats.
-     * See the class description about format numbering.
-     * Each item in the array is cloned into the internal array.
-     * If the call fails because memory cannot be allocated, then this
-     * object will remain unchanged.
-     * 
-     * @stable ICU 2.0
-     * @param newFormats the new format to be set.
-     * @param cnt        the size of the array.
-     */
-    virtual void setFormats(const Format** newFormats, int32_t cnt);
-
-
-    /**
-     * Sets one subformat.
-     * See the class description about format numbering.
-     * The caller should not delete the Format object after this call.
-     * If the number is over the number of formats already set,
-     * the item will be deleted and ignored.
-     * @stable ICU 2.0
-     * @param formatNumber     index of the subformat.
-     * @param formatToAdopt    the format to be adopted.
-     */
-    virtual void adoptFormat(int32_t formatNumber, Format* formatToAdopt);
-
-    /**
-     * Sets one subformat.
-     * See the class description about format numbering.
-     * If the number is over the number of formats already set,
-     * the item will be ignored.
-     * @param formatNumber     index of the subformat.
-     * @param format    the format to be set.
-     * @stable ICU 2.0
-     */
-    virtual void setFormat(int32_t formatNumber, const Format& format);
-
-    /**
-     * Gets format names. This function returns formatNames in StringEnumerations
-     * which can be used with getFormat() and setFormat() to export formattable 
-     * array from current MessageFormat to another.  It is caller's resposibility 
-     * to delete the returned formatNames.
-     * @param status  output param set to success/failure code.
-     * @draft ICU 4.0
-     */
-    virtual StringEnumeration* getFormatNames(UErrorCode& status);
-    
-    /**
-     * Gets subformat pointer for given format name.   
-     * This function supports both named and numbered
-     * arguments-- if numbered, the formatName is the
-     * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
-     * The returned Format object should not be deleted by the caller,
-     * nor should the ponter of other object .  The pointer and its 
-     * contents remain valid only until the next call to any method
-     * of this class is made with this object. 
-     * @param formatName the name or number specifying a format
-     * @param status  output param set to success/failure code.
-     * @draft ICU 4.0
-     */
-    virtual Format* getFormat(const UnicodeString& formatName, UErrorCode& status);
-    
-    /**
-     * Sets one subformat for given format name.
-     * See the class description about format name. 
-     * This function supports both named and numbered
-     * arguments-- if numbered, the formatName is the
-     * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
-     * If there is no matched formatName or wrong type,
-     * the item will be ignored.
-     * @param formatName  Name of the subformat.
-     * @param format      the format to be set.
-     * @param status  output param set to success/failure code.
-     * @draft ICU 4.0
-     */
-    virtual void setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status);
-    
-    /**
-     * Sets one subformat for given format name.
-     * See the class description about format name. 
-     * This function supports both named and numbered
-     * arguments-- if numbered, the formatName is the
-     * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
-     * If there is no matched formatName or wrong type,
-     * the item will be ignored.
-     * The caller should not delete the Format object after this call.
-     * @param formatName  Name of the subformat.
-     * @param formatToAdopt  Format to be adopted.
-     * @param status      output param set to success/failure code.
-     * @draft ICU 4.0
-     */
-    virtual void adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status);
-
-
-    /**
-     * Gets an array of subformats of this object.  The returned array
-     * should not be deleted by the caller, nor should the pointers
-     * within the array.  The array and its contents remain valid only
-     * until the next call to any method of this class is made with
-     * this object.  See the class description about format numbering.
-     * @param count output parameter to receive the size of the array
-     * @return an array of count Format* objects, or NULL if out of
-     * memory.  Any or all of the array elements may be NULL.
-     * @stable ICU 2.0
-     */
-    virtual const Format** getFormats(int32_t& count) const;
-
-    /**
-     * Formats the given array of arguments into a user-readable string.
-     * Does not take ownership of the Formattable* array or its contents.
-     *
-     * @param source    An array of objects to be formatted.
-     * @param count     The number of elements of 'source'.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param ignore    Not used; inherited from base class API.
-     * @param status    Input/output error code.  If the
-     *                  pattern cannot be parsed, set to failure code.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(  const Formattable* source,
-                            int32_t count,
-                            UnicodeString& appendTo,
-                            FieldPosition& ignore,
-                            UErrorCode& status) const;
-
-    /**
-     * Formats the given array of arguments into a user-readable string
-     * using the given pattern.
-     *
-     * @param pattern   The pattern.
-     * @param arguments An array of objects to be formatted.
-     * @param count     The number of elements of 'source'.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param status    Input/output error code.  If the
-     *                  pattern cannot be parsed, set to failure code.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    static UnicodeString& format(const UnicodeString& pattern,
-                                 const Formattable* arguments,
-                                 int32_t count,
-                                 UnicodeString& appendTo,
-                                 UErrorCode& status);
-
-    /**
-     * Formats the given array of arguments into a user-readable
-     * string.  The array must be stored within a single Formattable
-     * object of type kArray. If the Formattable object type is not of
-     * type kArray, then returns a failing UErrorCode.
-     *
-     * @param obj       A Formattable of type kArray containing
-     *                  arguments to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @param status    Input/output error code.  If the
-     *                  pattern cannot be parsed, set to failure code.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& format(const Formattable& obj,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos,
-                                  UErrorCode& status) const;
-
-    /**
-     * Formats the given array of arguments into a user-readable
-     * string.  The array must be stored within a single Formattable
-     * object of type kArray. If the Formattable object type is not of
-     * type kArray, then returns a failing UErrorCode.
-     *
-     * @param obj       The object to format
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param status    Input/output error code.  If the
-     *                  pattern cannot be parsed, set to failure code.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(const Formattable& obj,
-                          UnicodeString& appendTo,
-                          UErrorCode& status) const;
-    
-
-    /**
-     * Formats the given array of arguments into a user-defined argument name
-     * array. This function supports both named and numbered
-     * arguments-- if numbered, the formatName is the
-     * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
-     *
-     * @param argumentNames argument name array
-     * @param arguments An array of objects to be formatted.
-     * @param count     The number of elements of 'argumentNames' and 
-     *                  arguments.  The number of argumentNames and arguments
-     *                  must be the same.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param status    Input/output error code.  If the
-     *                  pattern cannot be parsed, set to failure code.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 4.0
-     */
-    UnicodeString& format(const UnicodeString* argumentNames,
-                          const Formattable* arguments,
-                          int32_t count,
-                          UnicodeString& appendTo,
-                          UErrorCode& status) const;
-    /**
-     * Parses the given string into an array of output arguments.
-     *
-     * @param source    String to be parsed.
-     * @param pos       On input, starting position for parse. On output,
-     *                  final position after parse.  Unchanged if parse
-     *                  fails.
-     * @param count     Output parameter to receive the number of arguments
-     *                  parsed.
-     * @return an array of parsed arguments.  The caller owns both
-     * the array and its contents.
-     * @stable ICU 2.0
-     */
-    virtual Formattable* parse( const UnicodeString& source,
-                                ParsePosition& pos,
-                                int32_t& count) const;
-
-    /**
-     * Parses the given string into an array of output arguments.
-     *
-     * @param source    String to be parsed.
-     * @param count     Output param to receive size of returned array.
-     * @param status    Input/output error code.  If the
-     *                  pattern cannot be parsed, set to failure code. 
-     *                  If the MessageFormat is named argument, the status is 
-     *                  set to U_ARGUMENT_TYPE_MISMATCH.
-     * @return an array of parsed arguments.  The caller owns both
-     * the array and its contents. Return NULL if status is not U_ZERO_ERROR.
-     * 
-     * @stable ICU 2.0
-     */
-    virtual Formattable* parse( const UnicodeString& source,
-                                int32_t& count,
-                                UErrorCode& status) const;
-
-    /**
-     * Parses the given string into an array of output arguments
-     * stored within a single Formattable of type kArray.
-     *
-     * @param source    The string to be parsed into an object.
-     * @param result    Formattable to be set to the parse result.
-     *                  If parse fails, return contents are undefined.
-     * @param pos       On input, starting position for parse. On output,
-     *                  final position after parse.  Unchanged if parse
-     *                  fails.
-     * @stable ICU 2.0
-     */
-    virtual void parseObject(const UnicodeString& source,
-                             Formattable& result,
-                             ParsePosition& pos) const;
-
-    /**
-     * Convert an 'apostrophe-friendly' pattern into a standard
-     * pattern.  Standard patterns treat all apostrophes as
-     * quotes, which is problematic in some languages, e.g. 
-     * French, where apostrophe is commonly used.  This utility
-     * assumes that only an unpaired apostrophe immediately before
-     * a brace is a true quote.  Other unpaired apostrophes are paired,
-     * and the resulting standard pattern string is returned.
-     *
-     * <p><b>Note</b> it is not guaranteed that the returned pattern
-     * is indeed a valid pattern.  The only effect is to convert
-     * between patterns having different quoting semantics.
-     *
-     * @param pattern the 'apostrophe-friendly' patttern to convert
-     * @param status    Input/output error code.  If the pattern
-     *                  cannot be parsed, the failure code is set.
-     * @return the standard equivalent of the original pattern
-     * @stable ICU 3.4
-     */
-    static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern, 
-        UErrorCode& status);
-    
-    /**
-     * Returns true if this MessageFormat uses named arguments,
-     * and false otherwise.  See class description.
-     *
-     * @return true if named arguments are used.
-     * @draft ICU 4.0
-     */
-    UBool usesNamedArguments() const;
-    
-    /**
-     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
-     * This method is to implement a simple version of RTTI, since not all
-     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
-     * clone() methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-
-    /**
-     * Return the class ID for this class.  This is useful only for
-     * comparing to a return value from getDynamicClassID().  For example:
-     * <pre>
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .      Derived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-    
-private:
-
-    Locale              fLocale;
-    UnicodeString       fPattern;
-    Format**            formatAliases; // see getFormats
-    int32_t             formatAliasesCapacity;
-    UProperty           idStart;
-    UProperty           idContinue;
-
-    MessageFormat(); // default constructor not implemented
-
-    /*
-     * A structure representing one subformat of this MessageFormat.
-     * Each subformat has a Format object, an offset into the plain
-     * pattern text fPattern, and an argument number.  The argument
-     * number corresponds to the array of arguments to be formatted.
-     * @internal
-     */
-    class Subformat;
-
-    /**
-     * A MessageFormat contains an array of subformats.  This array
-     * needs to grow dynamically if the MessageFormat is modified.
-     */
-    Subformat* subformats;
-    int32_t    subformatCount;
-    int32_t    subformatCapacity;
-
-    /**
-     * A MessageFormat formats an array of arguments.  Each argument
-     * has an expected type, based on the pattern.  For example, if
-     * the pattern contains the subformat "{3,number,integer}", then
-     * we expect argument 3 to have type Formattable::kLong.  This
-     * array needs to grow dynamically if the MessageFormat is
-     * modified.
-     */
-    Formattable::Type* argTypes;
-    int32_t            argTypeCount;
-    int32_t            argTypeCapacity;
-
-    /**
-      * Is true iff all argument names are non-negative numbers.
-      * 
-      */
-    UBool isArgNumeric;
-
-    // Variable-size array management
-    UBool allocateSubformats(int32_t capacity);
-    UBool allocateArgTypes(int32_t capacity);
-
-    /**
-     * Default Format objects used when no format is specified and a
-     * numeric or date argument is formatted.  These are volatile
-     * cache objects maintained only for performance.  They do not
-     * participate in operator=(), copy constructor(), nor
-     * operator==().
-     */
-    NumberFormat* defaultNumberFormat;
-    DateFormat*   defaultDateFormat;
-
-    /**
-     * Method to retrieve default formats (or NULL on failure).
-     * These are semantically const, but may modify *this.
-     */
-    const NumberFormat* getDefaultNumberFormat(UErrorCode&) const;
-    const DateFormat*   getDefaultDateFormat(UErrorCode&) const;
-
-    /**
-     * Finds the word s, in the keyword list and returns the located index.
-     * @param s the keyword to be searched for.
-     * @param list the list of keywords to be searched with.
-     * @return the index of the list which matches the keyword s.
-     */
-    static int32_t findKeyword( const UnicodeString& s,
-                                const UChar * const *list);
-
-    /**
-     * Formats the array of arguments and copies the result into the
-     * result buffer, updates the field position.
-     *
-     * @param arguments The formattable objects array.
-     * @param cnt       The array count.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param status    Field position status.
-     * @param recursionProtection
-     *                  Initially zero. Bits 0..9 are used to indicate
-     *                  that a parameter has already been seen, to
-     *                  avoid recursion.  Currently unused.
-     * @param success   The error code status.
-     * @return          Reference to 'appendTo' parameter.
-     */
-    UnicodeString&  format( const Formattable* arguments,
-                            int32_t cnt,
-                            UnicodeString& appendTo,
-                            FieldPosition& status,
-                            int32_t recursionProtection,
-                            UErrorCode& success) const;
-    
-    UnicodeString&  format( const Formattable* arguments, 
-                            const UnicodeString *argumentNames,
-                            int32_t cnt,
-                            UnicodeString& appendTo,
-                            FieldPosition& status,
-                            int32_t recursionProtection,
-                            UErrorCode& success) const;
-
-    void             makeFormat(int32_t offsetNumber,
-                                UnicodeString* segments,
-                                UParseError& parseError,
-                                UErrorCode& success);
-
-    /**
-     * Convenience method that ought to be in NumberFormat
-     */
-    NumberFormat* createIntegerFormat(const Locale& locale, UErrorCode& status) const;
-
-    /**
-     * Checks the range of the source text to quote the special
-     * characters, { and ' and copy to target buffer.
-     * @param source
-     * @param start the text offset to start the process of in the source string
-     * @param end the text offset to end the process of in the source string
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     */
-    static void copyAndFixQuotes(const UnicodeString& appendTo, int32_t start, int32_t end, UnicodeString& target);
-
-    /**
-     * Returns array of argument types in the parsed pattern 
-     * for use in C API.  Only for the use of umsg_vformat().  Not
-     * for public consumption.
-     * @param listCount  Output parameter to receive the size of array
-     * @return           The array of formattable types in the pattern
-     * @internal
-     */
-    const Formattable::Type* getArgTypeList(int32_t& listCount) const {
-        listCount = argTypeCount;
-        return argTypes; 
-    }
-    
-    /**
-     * Returns FALSE if the argument name is not legal.
-     * @param  argName   argument name.
-     * @return TRUE if the argument name is legal, otherwise return FALSE.
-     */
-    UBool isLegalArgName(const UnicodeString& argName) const;
-    
-    friend class MessageFormatAdapter; // getFormatTypeList() access
-};
-
-inline UnicodeString&
-MessageFormat::format(const Formattable& obj,
-                      UnicodeString& appendTo,
-                      UErrorCode& status) const {
-    return Format::format(obj, appendTo, status);
-}
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _MSGFMT
-//eof
-

Copied: MacRuby/trunk/icu-1060/unicode/msgfmt.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/msgfmt.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/msgfmt.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/msgfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,940 @@
+/*
+* Copyright (C) 2007-2008, International Business Machines Corporation and others. All Rights Reserved.
+********************************************************************************
+*
+* File MSGFMT.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   02/19/97    aliu        Converted from java.
+*   03/20/97    helena      Finished first cut of implementation.
+*   07/22/98    stephen     Removed operator!= (defined in Format)
+*   08/19/2002  srl         Removing Javaisms
+********************************************************************************
+*/
+
+#ifndef MSGFMT_H
+#define MSGFMT_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Formats messages in a language-neutral way.
+ */
+ 
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/format.h"
+#include "unicode/locid.h"
+#include "unicode/parseerr.h"
+#include "unicode/uchar.h"
+
+U_NAMESPACE_BEGIN
+
+class NumberFormat;
+class DateFormat;
+
+/**
+ *
+ * A MessageFormat produces concatenated messages in a
+ * language-neutral way.  It should be used for all string
+ * concatenations that are visible to end users.
+ * <P>
+ * A MessageFormat contains an array of <EM>subformats</EM> arranged
+ * within a <EM>template string</EM>.  Together, the subformats and
+ * template string determine how the MessageFormat will operate during
+ * formatting and parsing.
+ * <P>
+ * Typically, both the subformats and the template string are
+ * specified at once in a <EM>pattern</EM>.  By using different
+ * patterns for different locales, messages may be localized.
+ * <P>
+ * During formatting, the MessageFormat takes an array of arguments
+ * and produces a user-readable string.  Each argument is a
+ * Formattable object; they may be passed in in an array, or as a
+ * single Formattable object which itself contains an array.  Each
+ * argument is matched up with its corresponding subformat, which then
+ * formats it into a string.  The resultant strings are then assembled
+ * within the string template of the MessageFormat to produce the
+ * final output string.
+ * <p>
+ * <strong>Note:</strong>
+ * In ICU 4.0 MessageFormat supports named arguments.  If a named argument
+ * is used, all arguments must be named.  Names start with a character in 
+ * <code>UCHAR_ID_START</code> and continue with characters in 
+ * <code>UCHARID_CONTINUE</code>, in particular they do not start with a digit.
+ * If named arguments are used, {@link #usesNamedArguments()} will return true.
+ * <p>
+ * The other new methods supporting named arguments are 
+ * {@link #getFormatNames(UErrorCode& status)},
+ * {@link #getFormat(const UnicodeString& formatName, UErrorCode& status)}
+ * {@link #setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status)},
+ * {@link #adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status)},
+ * {@link #format(const Formattable* arguments, const UnicodeString *argumentNames, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection, UErrorCode& success)},
+ * {@link #format(const UnicodeString* argumentNames, const Formattable* arguments, int32_t count, UnicodeString& appendTo,UErrorCode& status)}.
+ * These methods are all compatible with patterns that do not used named arguments-- 
+ * in these cases the keys in the input or output use <code>UnicodeString</code>s 
+ * that name the argument indices, e.g. "0", "1", "2"... etc.
+ * <p>
+ * When named arguments are used, certain methods on MessageFormat that take or
+ * return arrays do not perform any action, since it is not possible to
+ * identify positions in an array using a name.  UErrorCode is set to
+ * U_ARGUMENT_TYPE_MISMATCH if there is a status/success field in the method.
+ * These methods are 
+ * {@link #adoptFormats(Format** newFormats, int32_t count)},
+ * {@link #setFormats(const Format** newFormats,int32_t count)},
+ * {@link #adoptFormat(int32_t n, Format *newFormat)},
+ * {@link #getFormats(int32_t& cnt)},
+ * {@link #format(const Formattable* source,int32_t cnt,UnicodeString& appendTo, FieldPosition& ignore, UErrorCode& success)},
+ * {@link #format(const UnicodeString& pattern,const Formattable* arguments,int32_t cnt,UnicodeString& appendTo,UErrorCode& success)},
+ * {@link #format(const Formattable& source, UnicodeString& appendTo,FieldPosition& ignore, UErrorCode& success)},
+ * {@link #format(const Formattable* arguments, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection,UErrorCode& success)},
+ * {@link #parse(const UnicodeString& source, ParsePosition& pos,int32_t& count)},
+ * {@link #parse(const UnicodeString& source, int32_t& cnt, UErrorCode& status)}
+ * <p>
+ *
+ * <P>
+ * During parsing, an input string is matched against the string
+ * template of the MessageFormat to produce an array of Formattable
+ * objects.  Plain text of the template string is matched directly
+ * against intput text.  At each position in the template string where
+ * a subformat is located, the subformat is called to parse the
+ * corresponding segment of input text to produce an output argument.
+ * In this way, an array of arguments is created which together
+ * constitute the parse result.
+ * <P>
+ * Parsing may fail or produce unexpected results in a number of
+ * circumstances.
+ * <UL>
+ * <LI>If one of the arguments does not occur in the pattern, it
+ * will be returned as a default Formattable.
+ * <LI>If the format of an argument is loses information, such as with
+ * a choice format where a large number formats to "many", then the
+ * parse may not correspond to the originally formatted argument.
+ * <LI>MessageFormat does not handle ChoiceFormat recursion during
+ * parsing; such parses will fail.
+ * <LI>Parsing will not always find a match (or the correct match) if
+ * some part of the parse is ambiguous.  For example, if the pattern
+ * "{1},{2}" is used with the string arguments {"a,b", "c"}, it will
+ * format as "a,b,c".  When the result is parsed, it will return {"a",
+ * "b,c"}.
+ * <LI>If a single argument is formatted more than once in the string,
+ * then the rightmost subformat in the pattern string will produce the
+ * parse result; prior subformats with the same argument index will
+ * have no effect.
+ * </UL>
+ * Here are some examples of usage:
+ * <P>
+ * Example 1:
+ * <pre>
+ * \code
+ *     UErrorCode success = U_ZERO_ERROR;
+ *     GregorianCalendar cal(success);
+ *     Formattable arguments[] = {
+ *         7L,
+ *         Formattable( (Date) cal.getTime(success), Formattable::kIsDate),
+ *         "a disturbance in the Force"
+ *     };
+ *
+ *     UnicodeString result;
+ *     MessageFormat::format(
+ *          "At {1,time} on {1,date}, there was {2} on planet {0,number}.",
+ *          arguments, 3, result, success );
+ *
+ *     cout << "result: " << result << endl;
+ *     //<output>: At 4:34:20 PM on 23-Mar-98, there was a disturbance
+ *     //             in the Force on planet 7.
+ * \endcode
+ * </pre>
+ * Typically, the message format will come from resources, and the
+ * arguments will be dynamically set at runtime.
+ * <P>
+ * Example 2:
+ * <pre>
+ *  \code
+ *     success = U_ZERO_ERROR;
+ *     Formattable testArgs[] = {3L, "MyDisk"};
+ *
+ *     MessageFormat form(
+ *         "The disk \"{1}\" contains {0} file(s).", success );
+ *
+ *     UnicodeString string;
+ *     FieldPosition fpos = 0;
+ *     cout << "format: " << form.format(testArgs, 2, string, fpos, success ) << endl;
+ *
+ *     // output, with different testArgs:
+ *     // output: The disk "MyDisk" contains 0 file(s).
+ *     // output: The disk "MyDisk" contains 1 file(s).
+ *     // output: The disk "MyDisk" contains 1,273 file(s).
+ *  \endcode
+ *  </pre>
+ *
+ *  The pattern is of the following form.  Legend:
+ *  <pre>
+ * \code
+ *       {optional item}
+ *       (group that may be repeated)*
+ * \endcode
+ *  </pre>
+ *  Do not confuse optional items with items inside quotes braces, such
+ *  as this: "{".  Quoted braces are literals.
+ *  <pre>
+ *  \code
+ *       messageFormatPattern := string ( "{" messageFormatElement "}" string )*
+ *
+ *       messageFormatElement := argumentIndex | argumentName { "," elementFormat }
+ *
+ *       elementFormat := "time" { "," datetimeStyle }
+ *                      | "date" { "," datetimeStyle }
+ *                      | "number" { "," numberStyle }
+ *                      | "choice" "," choiceStyle
+ *
+ *       datetimeStyle := "short"
+ *                      | "medium"
+ *                      | "long"
+ *                      | "full"
+ *                      | dateFormatPattern
+ *
+ *       numberStyle :=   "currency"
+ *                      | "percent"
+ *                      | "integer"
+ *                      | numberFormatPattern
+ *
+ *       choiceStyle :=   choiceFormatPattern
+ * 
+ *       pluralStyle := pluralFormatPattern
+ * \endcode
+ * </pre>
+ * If there is no elementFormat, then the argument must be a string,
+ * which is substituted. If there is no dateTimeStyle or numberStyle,
+ * then the default format is used (e.g.  NumberFormat::createInstance(),
+ * DateFormat::createTimeInstance(DateFormat::kDefault, ...) or DateFormat::createDateInstance(DateFormat::kDefault, ...). For
+ * a ChoiceFormat, the pattern must always be specified, since there
+ * is no default.
+ * <P>
+ * In strings, single quotes can be used to quote syntax characters.
+ * A literal single quote is represented by '', both within and outside
+ * of single-quoted segments.  Inside a
+ * messageFormatElement, quotes are <EM>not</EM> removed. For example,
+ * {1,number,$'#',##} will produce a number format with the pound-sign
+ * quoted, with a result such as: "$#31,45".
+ * <P>
+ * If a pattern is used, then unquoted braces in the pattern, if any,
+ * must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab
+ * {0'}' de" and "ab } de" are not.
+ * <p>
+ * <dl><dt><b>Warning:</b><dd>The rules for using quotes within message
+ * format patterns unfortunately have shown to be somewhat confusing.
+ * In particular, it isn't always obvious to localizers whether single
+ * quotes need to be doubled or not. Make sure to inform localizers about
+ * the rules, and tell them (for example, by using comments in resource
+ * bundle source files) which strings will be processed by MessageFormat.
+ * Note that localizers may need to use single quotes in translated
+ * strings where the original version doesn't have them.
+ * <br>Note also that the simplest way to avoid the problem is to
+ * use the real apostrophe (single quote) character U+2019 (') for
+ * human-readable text, and to use the ASCII apostrophe (U+0027 ' )
+ * only in program syntax, like quoting in MessageFormat.
+ * See the annotations for U+0027 Apostrophe in The Unicode Standard.</p>
+ * </dl>
+ * <P>
+ * The argumentIndex is a non-negative integer, which corresponds to the
+ * index of the arguments presented in an array to be formatted.  The
+ * first argument has argumentIndex 0.
+ * <P>
+ * It is acceptable to have unused arguments in the array.  With missing
+ * arguments or arguments that are not of the right class for the
+ * specified format, a failing UErrorCode result is set.
+ * <P>
+ * For more sophisticated patterns, you can use a ChoiceFormat to get
+ * output:
+ * <pre>
+ * \code
+ *     UErrorCode success = U_ZERO_ERROR;
+ *     MessageFormat* form("The disk \"{1}\" contains {0}.", success);
+ *     double filelimits[] = {0,1,2};
+ *     UnicodeString filepart[] = {"no files","one file","{0,number} files"};
+ *     ChoiceFormat* fileform = new ChoiceFormat(filelimits, filepart, 3);
+ *     form.setFormat(1, *fileform); // NOT zero, see below
+ *
+ *     Formattable testArgs[] = {1273L, "MyDisk"};
+ *
+ *     UnicodeString string;
+ *     FieldPosition fpos = 0;
+ *     cout << form.format(testArgs, 2, string, fpos, success) << endl;
+ *
+ *     // output, with different testArgs
+ *     // output: The disk "MyDisk" contains no files.
+ *     // output: The disk "MyDisk" contains one file.
+ *     // output: The disk "MyDisk" contains 1,273 files.
+ * \endcode
+ * </pre>
+ * You can either do this programmatically, as in the above example,
+ * or by using a pattern (see ChoiceFormat for more information) as in:
+ * <pre>
+ * \code
+ *    form.applyPattern(
+ *      "There {0,choice,0#are no files|1#is one file|1<are {0,number,integer} files}.");
+ * \endcode
+ * </pre>
+ * <P>
+ * <EM>Note:</EM> As we see above, the string produced by a ChoiceFormat in
+ * MessageFormat is treated specially; occurences of '{' are used to
+ * indicated subformats, and cause recursion.  If you create both a
+ * MessageFormat and ChoiceFormat programmatically (instead of using
+ * the string patterns), then be careful not to produce a format that
+ * recurses on itself, which will cause an infinite loop.
+ * <P>
+ * <EM>Note:</EM> Subformats are numbered by their order in the pattern.
+ * This is <EM>not</EM> the same as the argumentIndex.
+ * <pre>
+ * \code
+ *    For example: with "abc{2}def{3}ghi{0}...",
+ *
+ *    format0 affects the first variable {2}
+ *    format1 affects the second variable {3}
+ *    format2 affects the second variable {0}
+ * \endcode
+ * </pre>
+ *
+ * <p><em>User subclasses are not supported.</em> While clients may write
+ * subclasses, such code will not necessarily work and will not be
+ * guaranteed to work stably from release to release.
+ */
+class U_I18N_API MessageFormat : public Format {
+public:
+    /**
+     * Enum type for kMaxFormat.
+     * @obsolete ICU 3.0.  The 10-argument limit was removed as of ICU 2.6,
+     * rendering this enum type obsolete.
+     */
+    enum EFormatNumber {
+        /**
+         * The maximum number of arguments.
+         * @obsolete ICU 3.0.  The 10-argument limit was removed as of ICU 2.6,
+         * rendering this constant obsolete.
+         */
+        kMaxFormat = 10
+    };
+
+    /**
+     * Constructs a new MessageFormat using the given pattern and the
+     * default locale.
+     *
+     * @param pattern   Pattern used to construct object.
+     * @param status    Input/output error code.  If the
+     *                  pattern cannot be parsed, set to failure code.
+     * @stable ICU 2.0
+     */
+    MessageFormat(const UnicodeString& pattern,
+                  UErrorCode &status);
+
+    /**
+     * Constructs a new MessageFormat using the given pattern and locale.
+     * @param pattern   Pattern used to construct object.
+     * @param newLocale The locale to use for formatting dates and numbers.
+     * @param status    Input/output error code.  If the
+     *                  pattern cannot be parsed, set to failure code.
+     * @stable ICU 2.0
+     */
+    MessageFormat(const UnicodeString& pattern,
+                  const Locale& newLocale,
+                        UErrorCode& status);
+    /**
+     * Constructs a new MessageFormat using the given pattern and locale.
+     * @param pattern   Pattern used to construct object.
+     * @param newLocale The locale to use for formatting dates and numbers.
+     * @param parseError Struct to recieve information on position 
+     *                   of error within the pattern.
+     * @param status    Input/output error code.  If the
+     *                  pattern cannot be parsed, set to failure code.
+     * @stable ICU 2.0
+     */
+    MessageFormat(const UnicodeString& pattern,
+                  const Locale& newLocale,
+                  UParseError& parseError,
+                  UErrorCode& status);
+    /**
+     * Constructs a new MessageFormat from an existing one.
+     * @stable ICU 2.0
+     */
+    MessageFormat(const MessageFormat&);
+
+    /**
+     * Assignment operator.
+     * @stable ICU 2.0
+     */
+    const MessageFormat& operator=(const MessageFormat&);
+
+    /**
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~MessageFormat();
+
+    /**
+     * Clones this Format object polymorphically.  The caller owns the
+     * result and should delete it when done.
+     * @stable ICU 2.0
+     */
+    virtual Format* clone(void) const;
+
+    /**
+     * Returns true if the given Format objects are semantically equal.
+     * Objects of different subclasses are considered unequal.
+     * @param other  the object to be compared with.
+     * @return       true if the given Format objects are semantically equal.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const Format& other) const;
+
+    /**
+     * Sets the locale. This locale is used for fetching default number or date
+     * format information.
+     * @param theLocale    the new locale value to be set.
+     * @stable ICU 2.0
+     */
+    virtual void setLocale(const Locale& theLocale);
+
+    /**
+     * Gets the locale. This locale is used for fetching default number or date
+     * format information.
+     * @return    the locale of the object.
+     * @stable ICU 2.0
+     */
+    virtual const Locale& getLocale(void) const;
+
+    /**
+     * Applies the given pattern string to this message format.
+     *
+     * @param pattern   The pattern to be applied.
+     * @param status    Input/output error code.  If the
+     *                  pattern cannot be parsed, set to failure code.
+     * @stable ICU 2.0
+     */
+    virtual void applyPattern(const UnicodeString& pattern,
+                              UErrorCode& status);
+    /**
+     * Applies the given pattern string to this message format.
+     *
+     * @param pattern    The pattern to be applied.
+     * @param parseError Struct to recieve information on position 
+     *                   of error within pattern.
+     * @param status    Input/output error code.  If the
+     *                  pattern cannot be parsed, set to failure code.
+     * @stable ICU 2.0
+     */
+    virtual void applyPattern(const UnicodeString& pattern,
+                             UParseError& parseError,
+                             UErrorCode& status);
+
+    /**
+     * Returns a pattern that can be used to recreate this object.
+     *
+     * @param appendTo  Output parameter to receive the pattern.
+     *                  Result is appended to existing contents.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& toPattern(UnicodeString& appendTo) const;
+
+    /**
+     * Sets subformats.
+     * See the class description about format numbering.
+     * The caller should not delete the Format objects after this call.
+     * <EM>The array formatsToAdopt is not itself adopted.</EM> Its
+     * ownership is retained by the caller. If the call fails because
+     * memory cannot be allocated, then the formats will be deleted
+     * by this method, and this object will remain unchanged.
+     * 
+     * @stable ICU 2.0
+     * @param formatsToAdopt    the format to be adopted.
+     * @param count             the size of the array.
+     */
+    virtual void adoptFormats(Format** formatsToAdopt, int32_t count);
+
+    /**
+     * Sets subformats.
+     * See the class description about format numbering.
+     * Each item in the array is cloned into the internal array.
+     * If the call fails because memory cannot be allocated, then this
+     * object will remain unchanged.
+     * 
+     * @stable ICU 2.0
+     * @param newFormats the new format to be set.
+     * @param cnt        the size of the array.
+     */
+    virtual void setFormats(const Format** newFormats, int32_t cnt);
+
+
+    /**
+     * Sets one subformat.
+     * See the class description about format numbering.
+     * The caller should not delete the Format object after this call.
+     * If the number is over the number of formats already set,
+     * the item will be deleted and ignored.
+     * @stable ICU 2.0
+     * @param formatNumber     index of the subformat.
+     * @param formatToAdopt    the format to be adopted.
+     */
+    virtual void adoptFormat(int32_t formatNumber, Format* formatToAdopt);
+
+    /**
+     * Sets one subformat.
+     * See the class description about format numbering.
+     * If the number is over the number of formats already set,
+     * the item will be ignored.
+     * @param formatNumber     index of the subformat.
+     * @param format    the format to be set.
+     * @stable ICU 2.0
+     */
+    virtual void setFormat(int32_t formatNumber, const Format& format);
+
+    /**
+     * Gets format names. This function returns formatNames in StringEnumerations
+     * which can be used with getFormat() and setFormat() to export formattable 
+     * array from current MessageFormat to another.  It is caller's resposibility 
+     * to delete the returned formatNames.
+     * @param status  output param set to success/failure code.
+     * @draft ICU 4.0
+     */
+    virtual StringEnumeration* getFormatNames(UErrorCode& status);
+    
+    /**
+     * Gets subformat pointer for given format name.   
+     * This function supports both named and numbered
+     * arguments-- if numbered, the formatName is the
+     * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
+     * The returned Format object should not be deleted by the caller,
+     * nor should the ponter of other object .  The pointer and its 
+     * contents remain valid only until the next call to any method
+     * of this class is made with this object. 
+     * @param formatName the name or number specifying a format
+     * @param status  output param set to success/failure code.
+     * @draft ICU 4.0
+     */
+    virtual Format* getFormat(const UnicodeString& formatName, UErrorCode& status);
+    
+    /**
+     * Sets one subformat for given format name.
+     * See the class description about format name. 
+     * This function supports both named and numbered
+     * arguments-- if numbered, the formatName is the
+     * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
+     * If there is no matched formatName or wrong type,
+     * the item will be ignored.
+     * @param formatName  Name of the subformat.
+     * @param format      the format to be set.
+     * @param status  output param set to success/failure code.
+     * @draft ICU 4.0
+     */
+    virtual void setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status);
+    
+    /**
+     * Sets one subformat for given format name.
+     * See the class description about format name. 
+     * This function supports both named and numbered
+     * arguments-- if numbered, the formatName is the
+     * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
+     * If there is no matched formatName or wrong type,
+     * the item will be ignored.
+     * The caller should not delete the Format object after this call.
+     * @param formatName  Name of the subformat.
+     * @param formatToAdopt  Format to be adopted.
+     * @param status      output param set to success/failure code.
+     * @draft ICU 4.0
+     */
+    virtual void adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status);
+
+
+    /**
+     * Gets an array of subformats of this object.  The returned array
+     * should not be deleted by the caller, nor should the pointers
+     * within the array.  The array and its contents remain valid only
+     * until the next call to any method of this class is made with
+     * this object.  See the class description about format numbering.
+     * @param count output parameter to receive the size of the array
+     * @return an array of count Format* objects, or NULL if out of
+     * memory.  Any or all of the array elements may be NULL.
+     * @stable ICU 2.0
+     */
+    virtual const Format** getFormats(int32_t& count) const;
+
+    /**
+     * Formats the given array of arguments into a user-readable string.
+     * Does not take ownership of the Formattable* array or its contents.
+     *
+     * @param source    An array of objects to be formatted.
+     * @param count     The number of elements of 'source'.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param ignore    Not used; inherited from base class API.
+     * @param status    Input/output error code.  If the
+     *                  pattern cannot be parsed, set to failure code.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(  const Formattable* source,
+                            int32_t count,
+                            UnicodeString& appendTo,
+                            FieldPosition& ignore,
+                            UErrorCode& status) const;
+
+    /**
+     * Formats the given array of arguments into a user-readable string
+     * using the given pattern.
+     *
+     * @param pattern   The pattern.
+     * @param arguments An array of objects to be formatted.
+     * @param count     The number of elements of 'source'.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param status    Input/output error code.  If the
+     *                  pattern cannot be parsed, set to failure code.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    static UnicodeString& format(const UnicodeString& pattern,
+                                 const Formattable* arguments,
+                                 int32_t count,
+                                 UnicodeString& appendTo,
+                                 UErrorCode& status);
+
+    /**
+     * Formats the given array of arguments into a user-readable
+     * string.  The array must be stored within a single Formattable
+     * object of type kArray. If the Formattable object type is not of
+     * type kArray, then returns a failing UErrorCode.
+     *
+     * @param obj       A Formattable of type kArray containing
+     *                  arguments to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @param status    Input/output error code.  If the
+     *                  pattern cannot be parsed, set to failure code.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& format(const Formattable& obj,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos,
+                                  UErrorCode& status) const;
+
+    /**
+     * Formats the given array of arguments into a user-readable
+     * string.  The array must be stored within a single Formattable
+     * object of type kArray. If the Formattable object type is not of
+     * type kArray, then returns a failing UErrorCode.
+     *
+     * @param obj       The object to format
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param status    Input/output error code.  If the
+     *                  pattern cannot be parsed, set to failure code.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(const Formattable& obj,
+                          UnicodeString& appendTo,
+                          UErrorCode& status) const;
+    
+
+    /**
+     * Formats the given array of arguments into a user-defined argument name
+     * array. This function supports both named and numbered
+     * arguments-- if numbered, the formatName is the
+     * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
+     *
+     * @param argumentNames argument name array
+     * @param arguments An array of objects to be formatted.
+     * @param count     The number of elements of 'argumentNames' and 
+     *                  arguments.  The number of argumentNames and arguments
+     *                  must be the same.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param status    Input/output error code.  If the
+     *                  pattern cannot be parsed, set to failure code.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 4.0
+     */
+    UnicodeString& format(const UnicodeString* argumentNames,
+                          const Formattable* arguments,
+                          int32_t count,
+                          UnicodeString& appendTo,
+                          UErrorCode& status) const;
+    /**
+     * Parses the given string into an array of output arguments.
+     *
+     * @param source    String to be parsed.
+     * @param pos       On input, starting position for parse. On output,
+     *                  final position after parse.  Unchanged if parse
+     *                  fails.
+     * @param count     Output parameter to receive the number of arguments
+     *                  parsed.
+     * @return an array of parsed arguments.  The caller owns both
+     * the array and its contents.
+     * @stable ICU 2.0
+     */
+    virtual Formattable* parse( const UnicodeString& source,
+                                ParsePosition& pos,
+                                int32_t& count) const;
+
+    /**
+     * Parses the given string into an array of output arguments.
+     *
+     * @param source    String to be parsed.
+     * @param count     Output param to receive size of returned array.
+     * @param status    Input/output error code.  If the
+     *                  pattern cannot be parsed, set to failure code. 
+     *                  If the MessageFormat is named argument, the status is 
+     *                  set to U_ARGUMENT_TYPE_MISMATCH.
+     * @return an array of parsed arguments.  The caller owns both
+     * the array and its contents. Return NULL if status is not U_ZERO_ERROR.
+     * 
+     * @stable ICU 2.0
+     */
+    virtual Formattable* parse( const UnicodeString& source,
+                                int32_t& count,
+                                UErrorCode& status) const;
+
+    /**
+     * Parses the given string into an array of output arguments
+     * stored within a single Formattable of type kArray.
+     *
+     * @param source    The string to be parsed into an object.
+     * @param result    Formattable to be set to the parse result.
+     *                  If parse fails, return contents are undefined.
+     * @param pos       On input, starting position for parse. On output,
+     *                  final position after parse.  Unchanged if parse
+     *                  fails.
+     * @stable ICU 2.0
+     */
+    virtual void parseObject(const UnicodeString& source,
+                             Formattable& result,
+                             ParsePosition& pos) const;
+
+    /**
+     * Convert an 'apostrophe-friendly' pattern into a standard
+     * pattern.  Standard patterns treat all apostrophes as
+     * quotes, which is problematic in some languages, e.g. 
+     * French, where apostrophe is commonly used.  This utility
+     * assumes that only an unpaired apostrophe immediately before
+     * a brace is a true quote.  Other unpaired apostrophes are paired,
+     * and the resulting standard pattern string is returned.
+     *
+     * <p><b>Note</b> it is not guaranteed that the returned pattern
+     * is indeed a valid pattern.  The only effect is to convert
+     * between patterns having different quoting semantics.
+     *
+     * @param pattern the 'apostrophe-friendly' patttern to convert
+     * @param status    Input/output error code.  If the pattern
+     *                  cannot be parsed, the failure code is set.
+     * @return the standard equivalent of the original pattern
+     * @stable ICU 3.4
+     */
+    static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern, 
+        UErrorCode& status);
+    
+    /**
+     * Returns true if this MessageFormat uses named arguments,
+     * and false otherwise.  See class description.
+     *
+     * @return true if named arguments are used.
+     * @draft ICU 4.0
+     */
+    UBool usesNamedArguments() const;
+    
+    /**
+     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
+     * This method is to implement a simple version of RTTI, since not all
+     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
+     * clone() methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+    /**
+     * Return the class ID for this class.  This is useful only for
+     * comparing to a return value from getDynamicClassID().  For example:
+     * <pre>
+     * .   Base* polymorphic_pointer = createPolymorphicObject();
+     * .   if (polymorphic_pointer->getDynamicClassID() ==
+     * .      Derived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+    
+private:
+
+    Locale              fLocale;
+    UnicodeString       fPattern;
+    Format**            formatAliases; // see getFormats
+    int32_t             formatAliasesCapacity;
+    UProperty           idStart;
+    UProperty           idContinue;
+
+    MessageFormat(); // default constructor not implemented
+
+    /*
+     * A structure representing one subformat of this MessageFormat.
+     * Each subformat has a Format object, an offset into the plain
+     * pattern text fPattern, and an argument number.  The argument
+     * number corresponds to the array of arguments to be formatted.
+     * @internal
+     */
+    class Subformat;
+
+    /**
+     * A MessageFormat contains an array of subformats.  This array
+     * needs to grow dynamically if the MessageFormat is modified.
+     */
+    Subformat* subformats;
+    int32_t    subformatCount;
+    int32_t    subformatCapacity;
+
+    /**
+     * A MessageFormat formats an array of arguments.  Each argument
+     * has an expected type, based on the pattern.  For example, if
+     * the pattern contains the subformat "{3,number,integer}", then
+     * we expect argument 3 to have type Formattable::kLong.  This
+     * array needs to grow dynamically if the MessageFormat is
+     * modified.
+     */
+    Formattable::Type* argTypes;
+    int32_t            argTypeCount;
+    int32_t            argTypeCapacity;
+
+    /**
+      * Is true iff all argument names are non-negative numbers.
+      * 
+      */
+    UBool isArgNumeric;
+
+    // Variable-size array management
+    UBool allocateSubformats(int32_t capacity);
+    UBool allocateArgTypes(int32_t capacity);
+
+    /**
+     * Default Format objects used when no format is specified and a
+     * numeric or date argument is formatted.  These are volatile
+     * cache objects maintained only for performance.  They do not
+     * participate in operator=(), copy constructor(), nor
+     * operator==().
+     */
+    NumberFormat* defaultNumberFormat;
+    DateFormat*   defaultDateFormat;
+
+    /**
+     * Method to retrieve default formats (or NULL on failure).
+     * These are semantically const, but may modify *this.
+     */
+    const NumberFormat* getDefaultNumberFormat(UErrorCode&) const;
+    const DateFormat*   getDefaultDateFormat(UErrorCode&) const;
+
+    /**
+     * Finds the word s, in the keyword list and returns the located index.
+     * @param s the keyword to be searched for.
+     * @param list the list of keywords to be searched with.
+     * @return the index of the list which matches the keyword s.
+     */
+    static int32_t findKeyword( const UnicodeString& s,
+                                const UChar * const *list);
+
+    /**
+     * Formats the array of arguments and copies the result into the
+     * result buffer, updates the field position.
+     *
+     * @param arguments The formattable objects array.
+     * @param cnt       The array count.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param status    Field position status.
+     * @param recursionProtection
+     *                  Initially zero. Bits 0..9 are used to indicate
+     *                  that a parameter has already been seen, to
+     *                  avoid recursion.  Currently unused.
+     * @param success   The error code status.
+     * @return          Reference to 'appendTo' parameter.
+     */
+    UnicodeString&  format( const Formattable* arguments,
+                            int32_t cnt,
+                            UnicodeString& appendTo,
+                            FieldPosition& status,
+                            int32_t recursionProtection,
+                            UErrorCode& success) const;
+    
+    UnicodeString&  format( const Formattable* arguments, 
+                            const UnicodeString *argumentNames,
+                            int32_t cnt,
+                            UnicodeString& appendTo,
+                            FieldPosition& status,
+                            int32_t recursionProtection,
+                            UErrorCode& success) const;
+
+    void             makeFormat(int32_t offsetNumber,
+                                UnicodeString* segments,
+                                UParseError& parseError,
+                                UErrorCode& success);
+
+    /**
+     * Convenience method that ought to be in NumberFormat
+     */
+    NumberFormat* createIntegerFormat(const Locale& locale, UErrorCode& status) const;
+
+    /**
+     * Checks the range of the source text to quote the special
+     * characters, { and ' and copy to target buffer.
+     * @param source
+     * @param start the text offset to start the process of in the source string
+     * @param end the text offset to end the process of in the source string
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     */
+    static void copyAndFixQuotes(const UnicodeString& appendTo, int32_t start, int32_t end, UnicodeString& target);
+
+    /**
+     * Returns array of argument types in the parsed pattern 
+     * for use in C API.  Only for the use of umsg_vformat().  Not
+     * for public consumption.
+     * @param listCount  Output parameter to receive the size of array
+     * @return           The array of formattable types in the pattern
+     * @internal
+     */
+    const Formattable::Type* getArgTypeList(int32_t& listCount) const {
+        listCount = argTypeCount;
+        return argTypes; 
+    }
+    
+    /**
+     * Returns FALSE if the argument name is not legal.
+     * @param  argName   argument name.
+     * @return TRUE if the argument name is legal, otherwise return FALSE.
+     */
+    UBool isLegalArgName(const UnicodeString& argName) const;
+    
+    friend class MessageFormatAdapter; // getFormatTypeList() access
+};
+
+inline UnicodeString&
+MessageFormat::format(const Formattable& obj,
+                      UnicodeString& appendTo,
+                      UErrorCode& status) const {
+    return Format::format(obj, appendTo, status);
+}
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _MSGFMT
+//eof
+

Deleted: MacRuby/trunk/icu-1060/unicode/normlzr.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/normlzr.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/normlzr.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,823 +0,0 @@
-/*
- ********************************************************************
- * COPYRIGHT:
- * Copyright (c) 1996-2006, International Business Machines Corporation and
- * others. All Rights Reserved.
- ********************************************************************
- */
-
-#ifndef NORMLZR_H
-#define NORMLZR_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Unicode Normalization
- */
- 
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-#include "unicode/chariter.h"
-#include "unicode/unorm.h"
-
-
-struct UCharIterator;
-typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
-
-U_NAMESPACE_BEGIN
-/**
- * The Normalizer class supports the standard normalization forms described in
- * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
- * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
- *
- * The Normalizer class consists of two parts:
- * - static functions that normalize strings or test if strings are normalized
- * - a Normalizer object is an iterator that takes any kind of text and
- *   provides iteration over its normalized form
- *
- * The Normalizer class is not suitable for subclassing.
- *
- * The static functions are basically wrappers around the C implementation,
- * using UnicodeString instead of UChar*.
- * For basic information about normalization forms and details about the C API
- * please see the documentation in unorm.h.
- *
- * The iterator API with the Normalizer constructors and the non-static functions
- * uses a CharacterIterator as input. It is possible to pass a string which
- * is then internally wrapped in a CharacterIterator.
- * The input text is not normalized all at once, but incrementally where needed
- * (providing efficient random access).
- * This allows to pass in a large text but spend only a small amount of time
- * normalizing a small part of that text.
- * However, if the entire text is normalized, then the iterator will be
- * slower than normalizing the entire text at once and iterating over the result.
- * A possible use of the Normalizer iterator is also to report an index into the
- * original text that is close to where the normalized characters come from.
- *
- * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
- * The earlier implementation reported the getIndex() inconsistently,
- * and previous() could not be used after setIndex(), next(), first(), and current().
- *
- * Normalizer allows to start normalizing from anywhere in the input text by
- * calling setIndexOnly(), first(), or last().
- * Without calling any of these, the iterator will start at the beginning of the text.
- *
- * At any time, next() returns the next normalized code point (UChar32),
- * with post-increment semantics (like CharacterIterator::next32PostInc()).
- * previous() returns the previous normalized code point (UChar32),
- * with pre-decrement semantics (like CharacterIterator::previous32()).
- *
- * current() returns the current code point
- * (respectively the one at the newly set index) without moving
- * the getIndex(). Note that if the text at the current position
- * needs to be normalized, then these functions will do that.
- * (This is why current() is not const.)
- * It is more efficient to call setIndexOnly() instead, which does not
- * normalize.
- *
- * getIndex() always refers to the position in the input text where the normalized
- * code points are returned from. It does not always change with each returned
- * code point.
- * The code point that is returned from any of the functions
- * corresponds to text at or after getIndex(), according to the
- * function's iteration semantics (post-increment or pre-decrement).
- *
- * next() returns a code point from at or after the getIndex()
- * from before the next() call. After the next() call, the getIndex()
- * might have moved to where the next code point will be returned from
- * (from a next() or current() call).
- * This is semantically equivalent to array access with array[index++]
- * (post-increment semantics).
- *
- * previous() returns a code point from at or after the getIndex()
- * from after the previous() call.
- * This is semantically equivalent to array access with array[--index]
- * (pre-decrement semantics).
- *
- * Internally, the Normalizer iterator normalizes a small piece of text
- * starting at the getIndex() and ending at a following "safe" index.
- * The normalized results is stored in an internal string buffer, and
- * the code points are iterated from there.
- * With multiple iteration calls, this is repeated until the next piece
- * of text needs to be normalized, and the getIndex() needs to be moved.
- *
- * The following "safe" index, the internal buffer, and the secondary
- * iteration index into that buffer are not exposed on the API.
- * This also means that it is currently not practical to return to
- * a particular, arbitrary position in the text because one would need to
- * know, and be able to set, in addition to the getIndex(), at least also the
- * current index into the internal buffer.
- * It is currently only possible to observe when getIndex() changes
- * (with careful consideration of the iteration semantics),
- * at which time the internal index will be 0.
- * For example, if getIndex() is different after next() than before it,
- * then the internal index is 0 and one can return to this getIndex()
- * later with setIndexOnly().
- *
- * @author Laura Werner, Mark Davis, Markus Scherer
- * @stable ICU 2.0
- */
-class U_COMMON_API Normalizer : public UObject {
-public:
-  /**
-   * If DONE is returned from an iteration function that returns a code point,
-   * then there are no more normalization results available.
-   * @stable ICU 2.0
-   */
-  enum {
-      DONE=0xffff
-  };
-
-  // Constructors
-
-  /**
-   * Creates a new <code>Normalizer</code> object for iterating over the
-   * normalized form of a given string.
-   * <p>
-   * @param str   The string to be normalized.  The normalization
-   *              will start at the beginning of the string.
-   *
-   * @param mode  The normalization mode.
-   * @stable ICU 2.0
-   */
-  Normalizer(const UnicodeString& str, UNormalizationMode mode);
-
-  /**
-   * Creates a new <code>Normalizer</code> object for iterating over the
-   * normalized form of a given string.
-   * <p>
-   * @param str   The string to be normalized.  The normalization
-   *              will start at the beginning of the string.
-   *
-   * @param length Length of the string, or -1 if NUL-terminated.
-   * @param mode  The normalization mode.
-   * @stable ICU 2.0
-   */
-  Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
-
-  /**
-   * Creates a new <code>Normalizer</code> object for iterating over the
-   * normalized form of the given text.
-   * <p>
-   * @param iter  The input text to be normalized.  The normalization
-   *              will start at the beginning of the string.
-   *
-   * @param mode  The normalization mode.
-   * @stable ICU 2.0
-   */
-  Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
-
-  /**
-   * Copy constructor.
-   * @param copy The object to be copied.
-   * @stable ICU 2.0
-   */
-  Normalizer(const Normalizer& copy);
-
-  /**
-   * Destructor
-   * @stable ICU 2.0
-   */
-  virtual ~Normalizer();
-
-
-  //-------------------------------------------------------------------------
-  // Static utility methods
-  //-------------------------------------------------------------------------
-
-  /**
-   * Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
-   * This is a wrapper for unorm_normalize(), using UnicodeString's.
-   *
-   * The <code>options</code> parameter specifies which optional
-   * <code>Normalizer</code> features are to be enabled for this operation.
-   *
-   * @param source    the input string to be normalized.
-   * @param mode      the normalization mode
-   * @param options   the optional features to be enabled (0 for no options)
-   * @param result    The normalized string (on output).
-   * @param status    The error code.
-   * @stable ICU 2.0
-   */
-  static void U_EXPORT2 normalize(const UnicodeString& source,
-                        UNormalizationMode mode, int32_t options,
-                        UnicodeString& result,
-                        UErrorCode &status);
-
-  /**
-   * Compose a <code>UnicodeString</code>.
-   * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
-   * This is a wrapper for unorm_normalize(), using UnicodeString's.
-   *
-   * The <code>options</code> parameter specifies which optional
-   * <code>Normalizer</code> features are to be enabled for this operation.
-   *
-   * @param source    the string to be composed.
-   * @param compat    Perform compatibility decomposition before composition.
-   *                  If this argument is <code>FALSE</code>, only canonical
-   *                  decomposition will be performed.
-   * @param options   the optional features to be enabled (0 for no options)
-   * @param result    The composed string (on output).
-   * @param status    The error code.
-   * @stable ICU 2.0
-   */
-  static void U_EXPORT2 compose(const UnicodeString& source,
-                      UBool compat, int32_t options,
-                      UnicodeString& result,
-                      UErrorCode &status);
-
-  /**
-   * Static method to decompose a <code>UnicodeString</code>.
-   * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
-   * This is a wrapper for unorm_normalize(), using UnicodeString's.
-   *
-   * The <code>options</code> parameter specifies which optional
-   * <code>Normalizer</code> features are to be enabled for this operation.
-   *
-   * @param source    the string to be decomposed.
-   * @param compat    Perform compatibility decomposition.
-   *                  If this argument is <code>FALSE</code>, only canonical
-   *                  decomposition will be performed.
-   * @param options   the optional features to be enabled (0 for no options)
-   * @param result    The decomposed string (on output).
-   * @param status    The error code.
-   * @stable ICU 2.0
-   */
-  static void U_EXPORT2 decompose(const UnicodeString& source,
-                        UBool compat, int32_t options,
-                        UnicodeString& result,
-                        UErrorCode &status);
-
-  /**
-   * Performing quick check on a string, to quickly determine if the string is
-   * in a particular normalization format.
-   * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
-   *
-   * Three types of result can be returned UNORM_YES, UNORM_NO or
-   * UNORM_MAYBE. Result UNORM_YES indicates that the argument
-   * string is in the desired normalized format, UNORM_NO determines that
-   * argument string is not in the desired normalized format. A
-   * UNORM_MAYBE result indicates that a more thorough check is required,
-   * the user may have to put the string in its normalized form and compare the
-   * results.
-   * @param source       string for determining if it is in a normalized format
-   * @param mode         normalization format
-   * @param status A reference to a UErrorCode to receive any errors
-   * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
-   *
-   * @see isNormalized
-   * @stable ICU 2.0
-   */
-  static inline UNormalizationCheckResult
-  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
-
-  /**
-   * Performing quick check on a string; same as the other version of quickCheck
-   * but takes an extra options parameter like most normalization functions.
-   *
-   * @param source       string for determining if it is in a normalized format
-   * @param mode         normalization format
-   * @param options      the optional features to be enabled (0 for no options)
-   * @param status A reference to a UErrorCode to receive any errors
-   * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
-   *
-   * @see isNormalized
-   * @stable ICU 2.6
-   */
-  static inline UNormalizationCheckResult
-  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
-
-  /**
-   * Test if a string is in a given normalization form.
-   * This is semantically equivalent to source.equals(normalize(source, mode)) .
-   *
-   * Unlike unorm_quickCheck(), this function returns a definitive result,
-   * never a "maybe".
-   * For NFD, NFKD, and FCD, both functions work exactly the same.
-   * For NFC and NFKC where quickCheck may return "maybe", this function will
-   * perform further tests to arrive at a TRUE/FALSE result.
-   *
-   * @param src        String that is to be tested if it is in a normalization format.
-   * @param mode       Which normalization form to test for.
-   * @param errorCode  ICU error code in/out parameter.
-   *                   Must fulfill U_SUCCESS before the function call.
-   * @return Boolean value indicating whether the source string is in the
-   *         "mode" normalization form.
-   *
-   * @see quickCheck
-   * @stable ICU 2.2
-   */
-  static inline UBool
-  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
-
-  /**
-   * Test if a string is in a given normalization form; same as the other version of isNormalized
-   * but takes an extra options parameter like most normalization functions.
-   *
-   * @param src        String that is to be tested if it is in a normalization format.
-   * @param mode       Which normalization form to test for.
-   * @param options      the optional features to be enabled (0 for no options)
-   * @param errorCode  ICU error code in/out parameter.
-   *                   Must fulfill U_SUCCESS before the function call.
-   * @return Boolean value indicating whether the source string is in the
-   *         "mode" normalization form.
-   *
-   * @see quickCheck
-   * @stable ICU 2.6
-   */
-  static inline UBool
-  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
-
-  /**
-   * Concatenate normalized strings, making sure that the result is normalized as well.
-   *
-   * If both the left and the right strings are in
-   * the normalization form according to "mode/options",
-   * then the result will be
-   *
-   * \code
-   *     dest=normalize(left+right, mode, options)
-   * \endcode
-   *
-   * For details see unorm_concatenate in unorm.h.
-   *
-   * @param left Left source string.
-   * @param right Right source string.
-   * @param result The output string.
-   * @param mode The normalization mode.
-   * @param options A bit set of normalization options.
-   * @param errorCode ICU error code in/out parameter.
-   *                   Must fulfill U_SUCCESS before the function call.
-   * @return result
-   *
-   * @see unorm_concatenate
-   * @see normalize
-   * @see unorm_next
-   * @see unorm_previous
-   *
-   * @stable ICU 2.1
-   */
-  static UnicodeString &
-  U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right,
-              UnicodeString &result,
-              UNormalizationMode mode, int32_t options,
-              UErrorCode &errorCode);
-
-  /**
-   * Compare two strings for canonical equivalence.
-   * Further options include case-insensitive comparison and
-   * code point order (as opposed to code unit order).
-   *
-   * Canonical equivalence between two strings is defined as their normalized
-   * forms (NFD or NFC) being identical.
-   * This function compares strings incrementally instead of normalizing
-   * (and optionally case-folding) both strings entirely,
-   * improving performance significantly.
-   *
-   * Bulk normalization is only necessary if the strings do not fulfill the FCD
-   * conditions. Only in this case, and only if the strings are relatively long,
-   * is memory allocated temporarily.
-   * For FCD strings and short non-FCD strings there is no memory allocation.
-   *
-   * Semantically, this is equivalent to
-   *   strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
-   * where code point order and foldCase are all optional.
-   *
-   * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
-   * the case folding must be performed first, then the normalization.
-   *
-   * @param s1 First source string.
-   * @param s2 Second source string.
-   *
-   * @param options A bit set of options:
-   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
-   *     Case-sensitive comparison in code unit order, and the input strings
-   *     are quick-checked for FCD.
-   *
-   *   - UNORM_INPUT_IS_FCD
-   *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
-   *     If not set, the function will quickCheck for FCD
-   *     and normalize if necessary.
-   *
-   *   - U_COMPARE_CODE_POINT_ORDER
-   *     Set to choose code point order instead of code unit order
-   *     (see u_strCompare for details).
-   *
-   *   - U_COMPARE_IGNORE_CASE
-   *     Set to compare strings case-insensitively using case folding,
-   *     instead of case-sensitively.
-   *     If set, then the following case folding options are used.
-   *
-   *   - Options as used with case-insensitive comparisons, currently:
-   *
-   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
-   *    (see u_strCaseCompare for details)
-   *
-   *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
-   *
-   * @param errorCode ICU error code in/out parameter.
-   *                  Must fulfill U_SUCCESS before the function call.
-   * @return <0 or 0 or >0 as usual for string comparisons
-   *
-   * @see unorm_compare
-   * @see normalize
-   * @see UNORM_FCD
-   * @see u_strCompare
-   * @see u_strCaseCompare
-   *
-   * @stable ICU 2.2
-   */
-  static inline int32_t
-  compare(const UnicodeString &s1, const UnicodeString &s2,
-          uint32_t options,
-          UErrorCode &errorCode);
-
-  //-------------------------------------------------------------------------
-  // Iteration API
-  //-------------------------------------------------------------------------
-
-  /**
-   * Return the current character in the normalized text.
-   * current() may need to normalize some text at getIndex().
-   * The getIndex() is not changed.
-   *
-   * @return the current normalized code point
-   * @stable ICU 2.0
-   */
-  UChar32              current(void);
-
-  /**
-   * Return the first character in the normalized text.
-   * This is equivalent to setIndexOnly(startIndex()) followed by next().
-   * (Post-increment semantics.)
-   *
-   * @return the first normalized code point
-   * @stable ICU 2.0
-   */
-  UChar32              first(void);
-
-  /**
-   * Return the last character in the normalized text.
-   * This is equivalent to setIndexOnly(endIndex()) followed by previous().
-   * (Pre-decrement semantics.)
-   *
-   * @return the last normalized code point
-   * @stable ICU 2.0
-   */
-  UChar32              last(void);
-
-  /**
-   * Return the next character in the normalized text.
-   * (Post-increment semantics.)
-   * If the end of the text has already been reached, DONE is returned.
-   * The DONE value could be confused with a U+FFFF non-character code point
-   * in the text. If this is possible, you can test getIndex()<endIndex()
-   * before calling next(), or (getIndex()<endIndex() || last()!=DONE)
-   * after calling next(). (Calling last() will change the iterator state!)
-   *
-   * The C API unorm_next() is more efficient and does not have this ambiguity.
-   *
-   * @return the next normalized code point
-   * @stable ICU 2.0
-   */
-  UChar32              next(void);
-
-  /**
-   * Return the previous character in the normalized text and decrement.
-   * (Pre-decrement semantics.)
-   * If the beginning of the text has already been reached, DONE is returned.
-   * The DONE value could be confused with a U+FFFF non-character code point
-   * in the text. If this is possible, you can test
-   * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change
-   * the iterator state!)
-   *
-   * The C API unorm_previous() is more efficient and does not have this ambiguity.
-   *
-   * @return the previous normalized code point
-   * @stable ICU 2.0
-   */
-  UChar32              previous(void);
-
-  /**
-   * Set the iteration position in the input text that is being normalized,
-   * without any immediate normalization.
-   * After setIndexOnly(), getIndex() will return the same index that is
-   * specified here.
-   *
-   * @param index the desired index in the input text.
-   * @stable ICU 2.0
-   */
-  void                 setIndexOnly(int32_t index);
-
-  /**
-   * Reset the index to the beginning of the text.
-   * This is equivalent to setIndexOnly(startIndex)).
-   * @stable ICU 2.0
-   */
-  void                reset(void);
-
-  /**
-   * Retrieve the current iteration position in the input text that is
-   * being normalized.
-   *
-   * A following call to next() will return a normalized code point from
-   * the input text at or after this index.
-   *
-   * After a call to previous(), getIndex() will point at or before the
-   * position in the input text where the normalized code point
-   * was returned from with previous().
-   *
-   * @return the current index in the input text
-   * @stable ICU 2.0
-   */
-  int32_t            getIndex(void) const;
-
-  /**
-   * Retrieve the index of the start of the input text. This is the begin index
-   * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
-   * over which this <code>Normalizer</code> is iterating.
-   *
-   * @return the smallest index in the input text where the Normalizer operates
-   * @stable ICU 2.0
-   */
-  int32_t            startIndex(void) const;
-
-  /**
-   * Retrieve the index of the end of the input text. This is the end index
-   * of the <code>CharacterIterator</code> or the length of the string
-   * over which this <code>Normalizer</code> is iterating.
-   * This end index is exclusive, i.e., the Normalizer operates only on characters
-   * before this index.
-   *
-   * @return the first index in the input text where the Normalizer does not operate
-   * @stable ICU 2.0
-   */
-  int32_t            endIndex(void) const;
-
-  /**
-   * Returns TRUE when both iterators refer to the same character in the same
-   * input text.
-   *
-   * @param that a Normalizer object to compare this one to
-   * @return comparison result
-   * @stable ICU 2.0
-   */
-  UBool        operator==(const Normalizer& that) const;
-
-  /**
-   * Returns FALSE when both iterators refer to the same character in the same
-   * input text.
-   *
-   * @param that a Normalizer object to compare this one to
-   * @return comparison result
-   * @stable ICU 2.0
-   */
-  inline UBool        operator!=(const Normalizer& that) const;
-
-  /**
-   * Returns a pointer to a new Normalizer that is a clone of this one.
-   * The caller is responsible for deleting the new clone.
-   * @return a pointer to a new Normalizer
-   * @stable ICU 2.0
-   */
-  Normalizer*        clone(void) const;
-
-  /**
-   * Generates a hash code for this iterator.
-   *
-   * @return the hash code
-   * @stable ICU 2.0
-   */
-  int32_t                hashCode(void) const;
-
-  //-------------------------------------------------------------------------
-  // Property access methods
-  //-------------------------------------------------------------------------
-
-  /**
-   * Set the normalization mode for this object.
-   * <p>
-   * <b>Note:</b>If the normalization mode is changed while iterating
-   * over a string, calls to {@link #next() } and {@link #previous() } may
-   * return previously buffers characters in the old normalization mode
-   * until the iteration is able to re-sync at the next base character.
-   * It is safest to call {@link #setIndexOnly }, {@link #reset() },
-   * {@link #setText }, {@link #first() },
-   * {@link #last() }, etc. after calling <code>setMode</code>.
-   * <p>
-   * @param newMode the new mode for this <code>Normalizer</code>.
-   * @see #getUMode
-   * @stable ICU 2.0
-   */
-  void setMode(UNormalizationMode newMode);
-
-  /**
-   * Return the normalization mode for this object.
-   *
-   * This is an unusual name because there used to be a getMode() that
-   * returned a different type.
-   *
-   * @return the mode for this <code>Normalizer</code>
-   * @see #setMode
-   * @stable ICU 2.0
-   */
-  UNormalizationMode getUMode(void) const;
-
-  /**
-   * Set options that affect this <code>Normalizer</code>'s operation.
-   * Options do not change the basic composition or decomposition operation
-   * that is being performed, but they control whether
-   * certain optional portions of the operation are done.
-   * Currently the only available option is obsolete.
-   *
-   * It is possible to specify multiple options that are all turned on or off.
-   *
-   * @param   option  the option(s) whose value is/are to be set.
-   * @param   value   the new setting for the option.  Use <code>TRUE</code> to
-   *                  turn the option(s) on and <code>FALSE</code> to turn it/them off.
-   *
-   * @see #getOption
-   * @stable ICU 2.0
-   */
-  void setOption(int32_t option,
-         UBool value);
-
-  /**
-   * Determine whether an option is turned on or off.
-   * If multiple options are specified, then the result is TRUE if any
-   * of them are set.
-   * <p>
-   * @param option the option(s) that are to be checked
-   * @return TRUE if any of the option(s) are set
-   * @see #setOption
-   * @stable ICU 2.0
-   */
-  UBool getOption(int32_t option) const;
-
-  /**
-   * Set the input text over which this <code>Normalizer</code> will iterate.
-   * The iteration position is set to the beginning.
-   *
-   * @param newText a string that replaces the current input text
-   * @param status a UErrorCode
-   * @stable ICU 2.0
-   */
-  void setText(const UnicodeString& newText,
-           UErrorCode &status);
-
-  /**
-   * Set the input text over which this <code>Normalizer</code> will iterate.
-   * The iteration position is set to the beginning.
-   *
-   * @param newText a CharacterIterator object that replaces the current input text
-   * @param status a UErrorCode
-   * @stable ICU 2.0
-   */
-  void setText(const CharacterIterator& newText,
-           UErrorCode &status);
-
-  /**
-   * Set the input text over which this <code>Normalizer</code> will iterate.
-   * The iteration position is set to the beginning.
-   *
-   * @param newText a string that replaces the current input text
-   * @param length the length of the string, or -1 if NUL-terminated
-   * @param status a UErrorCode
-   * @stable ICU 2.0
-   */
-  void setText(const UChar* newText,
-                    int32_t length,
-            UErrorCode &status);
-  /**
-   * Copies the input text into the UnicodeString argument.
-   *
-   * @param result Receives a copy of the text under iteration.
-   * @stable ICU 2.0
-   */
-  void            getText(UnicodeString&  result);
-
-  /**
-   * ICU "poor man's RTTI", returns a UClassID for this class.
-   * @returns a UClassID for this class.
-   * @stable ICU 2.2
-   */
-  static UClassID U_EXPORT2 getStaticClassID();
-
-  /**
-   * ICU "poor man's RTTI", returns a UClassID for the actual class.
-   * @return a UClassID for the actual class.
-   * @stable ICU 2.2
-   */
-  virtual UClassID getDynamicClassID() const;
-
-private:
-  //-------------------------------------------------------------------------
-  // Private functions
-  //-------------------------------------------------------------------------
-
-  Normalizer(); // default constructor not implemented
-  Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
-
-  // Private utility methods for iteration
-  // For documentation, see the source code
-  UBool nextNormalize();
-  UBool previousNormalize();
-
-  void    init(CharacterIterator *iter);
-  void    clearBuffer(void);
-
-  //-------------------------------------------------------------------------
-  // Private data
-  //-------------------------------------------------------------------------
-
-  UNormalizationMode  fUMode;
-  int32_t             fOptions;
-
-  // The input text and our position in it
-  UCharIterator       *text;
-
-  // The normalization buffer is the result of normalization
-  // of the source in [currentIndex..nextIndex[ .
-  int32_t         currentIndex, nextIndex;
-
-  // A buffer for holding intermediate results
-  UnicodeString       buffer;
-  int32_t         bufferPos;
-
-};
-
-//-------------------------------------------------------------------------
-// Inline implementations
-//-------------------------------------------------------------------------
-
-inline UBool
-Normalizer::operator!= (const Normalizer& other) const
-{ return ! operator==(other); }
-
-inline UNormalizationCheckResult
-Normalizer::quickCheck(const UnicodeString& source,
-                       UNormalizationMode mode,
-                       UErrorCode &status) {
-    if(U_FAILURE(status)) {
-        return UNORM_MAYBE;
-    }
-
-    return unorm_quickCheck(source.getBuffer(), source.length(),
-                            mode, &status);
-}
-
-inline UNormalizationCheckResult
-Normalizer::quickCheck(const UnicodeString& source,
-                       UNormalizationMode mode, int32_t options,
-                       UErrorCode &status) {
-    if(U_FAILURE(status)) {
-        return UNORM_MAYBE;
-    }
-
-    return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
-                                       mode, options, &status);
-}
-
-inline UBool
-Normalizer::isNormalized(const UnicodeString& source,
-                         UNormalizationMode mode,
-                         UErrorCode &status) {
-    if(U_FAILURE(status)) {
-        return FALSE;
-    }
-
-    return unorm_isNormalized(source.getBuffer(), source.length(),
-                              mode, &status);
-}
-
-inline UBool
-Normalizer::isNormalized(const UnicodeString& source,
-                         UNormalizationMode mode, int32_t options,
-                         UErrorCode &status) {
-    if(U_FAILURE(status)) {
-        return FALSE;
-    }
-
-    return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
-                                         mode, options, &status);
-}
-
-inline int32_t
-Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
-                    uint32_t options,
-                    UErrorCode &errorCode) {
-  // all argument checking is done in unorm_compare
-  return unorm_compare(s1.getBuffer(), s1.length(),
-                       s2.getBuffer(), s2.length(),
-                       options,
-                       &errorCode);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
-
-#endif // NORMLZR_H

Copied: MacRuby/trunk/icu-1060/unicode/normlzr.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/normlzr.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/normlzr.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/normlzr.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,823 @@
+/*
+ ********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 1996-2006, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************
+ */
+
+#ifndef NORMLZR_H
+#define NORMLZR_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Unicode Normalization
+ */
+ 
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#include "unicode/unorm.h"
+
+
+struct UCharIterator;
+typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
+
+U_NAMESPACE_BEGIN
+/**
+ * The Normalizer class supports the standard normalization forms described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
+ * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
+ *
+ * The Normalizer class consists of two parts:
+ * - static functions that normalize strings or test if strings are normalized
+ * - a Normalizer object is an iterator that takes any kind of text and
+ *   provides iteration over its normalized form
+ *
+ * The Normalizer class is not suitable for subclassing.
+ *
+ * The static functions are basically wrappers around the C implementation,
+ * using UnicodeString instead of UChar*.
+ * For basic information about normalization forms and details about the C API
+ * please see the documentation in unorm.h.
+ *
+ * The iterator API with the Normalizer constructors and the non-static functions
+ * uses a CharacterIterator as input. It is possible to pass a string which
+ * is then internally wrapped in a CharacterIterator.
+ * The input text is not normalized all at once, but incrementally where needed
+ * (providing efficient random access).
+ * This allows to pass in a large text but spend only a small amount of time
+ * normalizing a small part of that text.
+ * However, if the entire text is normalized, then the iterator will be
+ * slower than normalizing the entire text at once and iterating over the result.
+ * A possible use of the Normalizer iterator is also to report an index into the
+ * original text that is close to where the normalized characters come from.
+ *
+ * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
+ * The earlier implementation reported the getIndex() inconsistently,
+ * and previous() could not be used after setIndex(), next(), first(), and current().
+ *
+ * Normalizer allows to start normalizing from anywhere in the input text by
+ * calling setIndexOnly(), first(), or last().
+ * Without calling any of these, the iterator will start at the beginning of the text.
+ *
+ * At any time, next() returns the next normalized code point (UChar32),
+ * with post-increment semantics (like CharacterIterator::next32PostInc()).
+ * previous() returns the previous normalized code point (UChar32),
+ * with pre-decrement semantics (like CharacterIterator::previous32()).
+ *
+ * current() returns the current code point
+ * (respectively the one at the newly set index) without moving
+ * the getIndex(). Note that if the text at the current position
+ * needs to be normalized, then these functions will do that.
+ * (This is why current() is not const.)
+ * It is more efficient to call setIndexOnly() instead, which does not
+ * normalize.
+ *
+ * getIndex() always refers to the position in the input text where the normalized
+ * code points are returned from. It does not always change with each returned
+ * code point.
+ * The code point that is returned from any of the functions
+ * corresponds to text at or after getIndex(), according to the
+ * function's iteration semantics (post-increment or pre-decrement).
+ *
+ * next() returns a code point from at or after the getIndex()
+ * from before the next() call. After the next() call, the getIndex()
+ * might have moved to where the next code point will be returned from
+ * (from a next() or current() call).
+ * This is semantically equivalent to array access with array[index++]
+ * (post-increment semantics).
+ *
+ * previous() returns a code point from at or after the getIndex()
+ * from after the previous() call.
+ * This is semantically equivalent to array access with array[--index]
+ * (pre-decrement semantics).
+ *
+ * Internally, the Normalizer iterator normalizes a small piece of text
+ * starting at the getIndex() and ending at a following "safe" index.
+ * The normalized results is stored in an internal string buffer, and
+ * the code points are iterated from there.
+ * With multiple iteration calls, this is repeated until the next piece
+ * of text needs to be normalized, and the getIndex() needs to be moved.
+ *
+ * The following "safe" index, the internal buffer, and the secondary
+ * iteration index into that buffer are not exposed on the API.
+ * This also means that it is currently not practical to return to
+ * a particular, arbitrary position in the text because one would need to
+ * know, and be able to set, in addition to the getIndex(), at least also the
+ * current index into the internal buffer.
+ * It is currently only possible to observe when getIndex() changes
+ * (with careful consideration of the iteration semantics),
+ * at which time the internal index will be 0.
+ * For example, if getIndex() is different after next() than before it,
+ * then the internal index is 0 and one can return to this getIndex()
+ * later with setIndexOnly().
+ *
+ * @author Laura Werner, Mark Davis, Markus Scherer
+ * @stable ICU 2.0
+ */
+class U_COMMON_API Normalizer : public UObject {
+public:
+  /**
+   * If DONE is returned from an iteration function that returns a code point,
+   * then there are no more normalization results available.
+   * @stable ICU 2.0
+   */
+  enum {
+      DONE=0xffff
+  };
+
+  // Constructors
+
+  /**
+   * Creates a new <code>Normalizer</code> object for iterating over the
+   * normalized form of a given string.
+   * <p>
+   * @param str   The string to be normalized.  The normalization
+   *              will start at the beginning of the string.
+   *
+   * @param mode  The normalization mode.
+   * @stable ICU 2.0
+   */
+  Normalizer(const UnicodeString& str, UNormalizationMode mode);
+
+  /**
+   * Creates a new <code>Normalizer</code> object for iterating over the
+   * normalized form of a given string.
+   * <p>
+   * @param str   The string to be normalized.  The normalization
+   *              will start at the beginning of the string.
+   *
+   * @param length Length of the string, or -1 if NUL-terminated.
+   * @param mode  The normalization mode.
+   * @stable ICU 2.0
+   */
+  Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
+
+  /**
+   * Creates a new <code>Normalizer</code> object for iterating over the
+   * normalized form of the given text.
+   * <p>
+   * @param iter  The input text to be normalized.  The normalization
+   *              will start at the beginning of the string.
+   *
+   * @param mode  The normalization mode.
+   * @stable ICU 2.0
+   */
+  Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
+
+  /**
+   * Copy constructor.
+   * @param copy The object to be copied.
+   * @stable ICU 2.0
+   */
+  Normalizer(const Normalizer& copy);
+
+  /**
+   * Destructor
+   * @stable ICU 2.0
+   */
+  virtual ~Normalizer();
+
+
+  //-------------------------------------------------------------------------
+  // Static utility methods
+  //-------------------------------------------------------------------------
+
+  /**
+   * Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
+   * This is a wrapper for unorm_normalize(), using UnicodeString's.
+   *
+   * The <code>options</code> parameter specifies which optional
+   * <code>Normalizer</code> features are to be enabled for this operation.
+   *
+   * @param source    the input string to be normalized.
+   * @param mode      the normalization mode
+   * @param options   the optional features to be enabled (0 for no options)
+   * @param result    The normalized string (on output).
+   * @param status    The error code.
+   * @stable ICU 2.0
+   */
+  static void U_EXPORT2 normalize(const UnicodeString& source,
+                        UNormalizationMode mode, int32_t options,
+                        UnicodeString& result,
+                        UErrorCode &status);
+
+  /**
+   * Compose a <code>UnicodeString</code>.
+   * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
+   * This is a wrapper for unorm_normalize(), using UnicodeString's.
+   *
+   * The <code>options</code> parameter specifies which optional
+   * <code>Normalizer</code> features are to be enabled for this operation.
+   *
+   * @param source    the string to be composed.
+   * @param compat    Perform compatibility decomposition before composition.
+   *                  If this argument is <code>FALSE</code>, only canonical
+   *                  decomposition will be performed.
+   * @param options   the optional features to be enabled (0 for no options)
+   * @param result    The composed string (on output).
+   * @param status    The error code.
+   * @stable ICU 2.0
+   */
+  static void U_EXPORT2 compose(const UnicodeString& source,
+                      UBool compat, int32_t options,
+                      UnicodeString& result,
+                      UErrorCode &status);
+
+  /**
+   * Static method to decompose a <code>UnicodeString</code>.
+   * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
+   * This is a wrapper for unorm_normalize(), using UnicodeString's.
+   *
+   * The <code>options</code> parameter specifies which optional
+   * <code>Normalizer</code> features are to be enabled for this operation.
+   *
+   * @param source    the string to be decomposed.
+   * @param compat    Perform compatibility decomposition.
+   *                  If this argument is <code>FALSE</code>, only canonical
+   *                  decomposition will be performed.
+   * @param options   the optional features to be enabled (0 for no options)
+   * @param result    The decomposed string (on output).
+   * @param status    The error code.
+   * @stable ICU 2.0
+   */
+  static void U_EXPORT2 decompose(const UnicodeString& source,
+                        UBool compat, int32_t options,
+                        UnicodeString& result,
+                        UErrorCode &status);
+
+  /**
+   * Performing quick check on a string, to quickly determine if the string is
+   * in a particular normalization format.
+   * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
+   *
+   * Three types of result can be returned UNORM_YES, UNORM_NO or
+   * UNORM_MAYBE. Result UNORM_YES indicates that the argument
+   * string is in the desired normalized format, UNORM_NO determines that
+   * argument string is not in the desired normalized format. A
+   * UNORM_MAYBE result indicates that a more thorough check is required,
+   * the user may have to put the string in its normalized form and compare the
+   * results.
+   * @param source       string for determining if it is in a normalized format
+   * @param mode         normalization format
+   * @param status A reference to a UErrorCode to receive any errors
+   * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+   *
+   * @see isNormalized
+   * @stable ICU 2.0
+   */
+  static inline UNormalizationCheckResult
+  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
+
+  /**
+   * Performing quick check on a string; same as the other version of quickCheck
+   * but takes an extra options parameter like most normalization functions.
+   *
+   * @param source       string for determining if it is in a normalized format
+   * @param mode         normalization format
+   * @param options      the optional features to be enabled (0 for no options)
+   * @param status A reference to a UErrorCode to receive any errors
+   * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+   *
+   * @see isNormalized
+   * @stable ICU 2.6
+   */
+  static inline UNormalizationCheckResult
+  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
+
+  /**
+   * Test if a string is in a given normalization form.
+   * This is semantically equivalent to source.equals(normalize(source, mode)) .
+   *
+   * Unlike unorm_quickCheck(), this function returns a definitive result,
+   * never a "maybe".
+   * For NFD, NFKD, and FCD, both functions work exactly the same.
+   * For NFC and NFKC where quickCheck may return "maybe", this function will
+   * perform further tests to arrive at a TRUE/FALSE result.
+   *
+   * @param src        String that is to be tested if it is in a normalization format.
+   * @param mode       Which normalization form to test for.
+   * @param errorCode  ICU error code in/out parameter.
+   *                   Must fulfill U_SUCCESS before the function call.
+   * @return Boolean value indicating whether the source string is in the
+   *         "mode" normalization form.
+   *
+   * @see quickCheck
+   * @stable ICU 2.2
+   */
+  static inline UBool
+  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
+
+  /**
+   * Test if a string is in a given normalization form; same as the other version of isNormalized
+   * but takes an extra options parameter like most normalization functions.
+   *
+   * @param src        String that is to be tested if it is in a normalization format.
+   * @param mode       Which normalization form to test for.
+   * @param options      the optional features to be enabled (0 for no options)
+   * @param errorCode  ICU error code in/out parameter.
+   *                   Must fulfill U_SUCCESS before the function call.
+   * @return Boolean value indicating whether the source string is in the
+   *         "mode" normalization form.
+   *
+   * @see quickCheck
+   * @stable ICU 2.6
+   */
+  static inline UBool
+  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
+
+  /**
+   * Concatenate normalized strings, making sure that the result is normalized as well.
+   *
+   * If both the left and the right strings are in
+   * the normalization form according to "mode/options",
+   * then the result will be
+   *
+   * \code
+   *     dest=normalize(left+right, mode, options)
+   * \endcode
+   *
+   * For details see unorm_concatenate in unorm.h.
+   *
+   * @param left Left source string.
+   * @param right Right source string.
+   * @param result The output string.
+   * @param mode The normalization mode.
+   * @param options A bit set of normalization options.
+   * @param errorCode ICU error code in/out parameter.
+   *                   Must fulfill U_SUCCESS before the function call.
+   * @return result
+   *
+   * @see unorm_concatenate
+   * @see normalize
+   * @see unorm_next
+   * @see unorm_previous
+   *
+   * @stable ICU 2.1
+   */
+  static UnicodeString &
+  U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right,
+              UnicodeString &result,
+              UNormalizationMode mode, int32_t options,
+              UErrorCode &errorCode);
+
+  /**
+   * Compare two strings for canonical equivalence.
+   * Further options include case-insensitive comparison and
+   * code point order (as opposed to code unit order).
+   *
+   * Canonical equivalence between two strings is defined as their normalized
+   * forms (NFD or NFC) being identical.
+   * This function compares strings incrementally instead of normalizing
+   * (and optionally case-folding) both strings entirely,
+   * improving performance significantly.
+   *
+   * Bulk normalization is only necessary if the strings do not fulfill the FCD
+   * conditions. Only in this case, and only if the strings are relatively long,
+   * is memory allocated temporarily.
+   * For FCD strings and short non-FCD strings there is no memory allocation.
+   *
+   * Semantically, this is equivalent to
+   *   strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
+   * where code point order and foldCase are all optional.
+   *
+   * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
+   * the case folding must be performed first, then the normalization.
+   *
+   * @param s1 First source string.
+   * @param s2 Second source string.
+   *
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Case-sensitive comparison in code unit order, and the input strings
+   *     are quick-checked for FCD.
+   *
+   *   - UNORM_INPUT_IS_FCD
+   *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
+   *     If not set, the function will quickCheck for FCD
+   *     and normalize if necessary.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_COMPARE_IGNORE_CASE
+   *     Set to compare strings case-insensitively using case folding,
+   *     instead of case-sensitively.
+   *     If set, then the following case folding options are used.
+   *
+   *   - Options as used with case-insensitive comparisons, currently:
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *    (see u_strCaseCompare for details)
+   *
+   *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
+   *
+   * @param errorCode ICU error code in/out parameter.
+   *                  Must fulfill U_SUCCESS before the function call.
+   * @return <0 or 0 or >0 as usual for string comparisons
+   *
+   * @see unorm_compare
+   * @see normalize
+   * @see UNORM_FCD
+   * @see u_strCompare
+   * @see u_strCaseCompare
+   *
+   * @stable ICU 2.2
+   */
+  static inline int32_t
+  compare(const UnicodeString &s1, const UnicodeString &s2,
+          uint32_t options,
+          UErrorCode &errorCode);
+
+  //-------------------------------------------------------------------------
+  // Iteration API
+  //-------------------------------------------------------------------------
+
+  /**
+   * Return the current character in the normalized text.
+   * current() may need to normalize some text at getIndex().
+   * The getIndex() is not changed.
+   *
+   * @return the current normalized code point
+   * @stable ICU 2.0
+   */
+  UChar32              current(void);
+
+  /**
+   * Return the first character in the normalized text.
+   * This is equivalent to setIndexOnly(startIndex()) followed by next().
+   * (Post-increment semantics.)
+   *
+   * @return the first normalized code point
+   * @stable ICU 2.0
+   */
+  UChar32              first(void);
+
+  /**
+   * Return the last character in the normalized text.
+   * This is equivalent to setIndexOnly(endIndex()) followed by previous().
+   * (Pre-decrement semantics.)
+   *
+   * @return the last normalized code point
+   * @stable ICU 2.0
+   */
+  UChar32              last(void);
+
+  /**
+   * Return the next character in the normalized text.
+   * (Post-increment semantics.)
+   * If the end of the text has already been reached, DONE is returned.
+   * The DONE value could be confused with a U+FFFF non-character code point
+   * in the text. If this is possible, you can test getIndex()<endIndex()
+   * before calling next(), or (getIndex()<endIndex() || last()!=DONE)
+   * after calling next(). (Calling last() will change the iterator state!)
+   *
+   * The C API unorm_next() is more efficient and does not have this ambiguity.
+   *
+   * @return the next normalized code point
+   * @stable ICU 2.0
+   */
+  UChar32              next(void);
+
+  /**
+   * Return the previous character in the normalized text and decrement.
+   * (Pre-decrement semantics.)
+   * If the beginning of the text has already been reached, DONE is returned.
+   * The DONE value could be confused with a U+FFFF non-character code point
+   * in the text. If this is possible, you can test
+   * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change
+   * the iterator state!)
+   *
+   * The C API unorm_previous() is more efficient and does not have this ambiguity.
+   *
+   * @return the previous normalized code point
+   * @stable ICU 2.0
+   */
+  UChar32              previous(void);
+
+  /**
+   * Set the iteration position in the input text that is being normalized,
+   * without any immediate normalization.
+   * After setIndexOnly(), getIndex() will return the same index that is
+   * specified here.
+   *
+   * @param index the desired index in the input text.
+   * @stable ICU 2.0
+   */
+  void                 setIndexOnly(int32_t index);
+
+  /**
+   * Reset the index to the beginning of the text.
+   * This is equivalent to setIndexOnly(startIndex)).
+   * @stable ICU 2.0
+   */
+  void                reset(void);
+
+  /**
+   * Retrieve the current iteration position in the input text that is
+   * being normalized.
+   *
+   * A following call to next() will return a normalized code point from
+   * the input text at or after this index.
+   *
+   * After a call to previous(), getIndex() will point at or before the
+   * position in the input text where the normalized code point
+   * was returned from with previous().
+   *
+   * @return the current index in the input text
+   * @stable ICU 2.0
+   */
+  int32_t            getIndex(void) const;
+
+  /**
+   * Retrieve the index of the start of the input text. This is the begin index
+   * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
+   * over which this <code>Normalizer</code> is iterating.
+   *
+   * @return the smallest index in the input text where the Normalizer operates
+   * @stable ICU 2.0
+   */
+  int32_t            startIndex(void) const;
+
+  /**
+   * Retrieve the index of the end of the input text. This is the end index
+   * of the <code>CharacterIterator</code> or the length of the string
+   * over which this <code>Normalizer</code> is iterating.
+   * This end index is exclusive, i.e., the Normalizer operates only on characters
+   * before this index.
+   *
+   * @return the first index in the input text where the Normalizer does not operate
+   * @stable ICU 2.0
+   */
+  int32_t            endIndex(void) const;
+
+  /**
+   * Returns TRUE when both iterators refer to the same character in the same
+   * input text.
+   *
+   * @param that a Normalizer object to compare this one to
+   * @return comparison result
+   * @stable ICU 2.0
+   */
+  UBool        operator==(const Normalizer& that) const;
+
+  /**
+   * Returns FALSE when both iterators refer to the same character in the same
+   * input text.
+   *
+   * @param that a Normalizer object to compare this one to
+   * @return comparison result
+   * @stable ICU 2.0
+   */
+  inline UBool        operator!=(const Normalizer& that) const;
+
+  /**
+   * Returns a pointer to a new Normalizer that is a clone of this one.
+   * The caller is responsible for deleting the new clone.
+   * @return a pointer to a new Normalizer
+   * @stable ICU 2.0
+   */
+  Normalizer*        clone(void) const;
+
+  /**
+   * Generates a hash code for this iterator.
+   *
+   * @return the hash code
+   * @stable ICU 2.0
+   */
+  int32_t                hashCode(void) const;
+
+  //-------------------------------------------------------------------------
+  // Property access methods
+  //-------------------------------------------------------------------------
+
+  /**
+   * Set the normalization mode for this object.
+   * <p>
+   * <b>Note:</b>If the normalization mode is changed while iterating
+   * over a string, calls to {@link #next() } and {@link #previous() } may
+   * return previously buffers characters in the old normalization mode
+   * until the iteration is able to re-sync at the next base character.
+   * It is safest to call {@link #setIndexOnly }, {@link #reset() },
+   * {@link #setText }, {@link #first() },
+   * {@link #last() }, etc. after calling <code>setMode</code>.
+   * <p>
+   * @param newMode the new mode for this <code>Normalizer</code>.
+   * @see #getUMode
+   * @stable ICU 2.0
+   */
+  void setMode(UNormalizationMode newMode);
+
+  /**
+   * Return the normalization mode for this object.
+   *
+   * This is an unusual name because there used to be a getMode() that
+   * returned a different type.
+   *
+   * @return the mode for this <code>Normalizer</code>
+   * @see #setMode
+   * @stable ICU 2.0
+   */
+  UNormalizationMode getUMode(void) const;
+
+  /**
+   * Set options that affect this <code>Normalizer</code>'s operation.
+   * Options do not change the basic composition or decomposition operation
+   * that is being performed, but they control whether
+   * certain optional portions of the operation are done.
+   * Currently the only available option is obsolete.
+   *
+   * It is possible to specify multiple options that are all turned on or off.
+   *
+   * @param   option  the option(s) whose value is/are to be set.
+   * @param   value   the new setting for the option.  Use <code>TRUE</code> to
+   *                  turn the option(s) on and <code>FALSE</code> to turn it/them off.
+   *
+   * @see #getOption
+   * @stable ICU 2.0
+   */
+  void setOption(int32_t option,
+         UBool value);
+
+  /**
+   * Determine whether an option is turned on or off.
+   * If multiple options are specified, then the result is TRUE if any
+   * of them are set.
+   * <p>
+   * @param option the option(s) that are to be checked
+   * @return TRUE if any of the option(s) are set
+   * @see #setOption
+   * @stable ICU 2.0
+   */
+  UBool getOption(int32_t option) const;
+
+  /**
+   * Set the input text over which this <code>Normalizer</code> will iterate.
+   * The iteration position is set to the beginning.
+   *
+   * @param newText a string that replaces the current input text
+   * @param status a UErrorCode
+   * @stable ICU 2.0
+   */
+  void setText(const UnicodeString& newText,
+           UErrorCode &status);
+
+  /**
+   * Set the input text over which this <code>Normalizer</code> will iterate.
+   * The iteration position is set to the beginning.
+   *
+   * @param newText a CharacterIterator object that replaces the current input text
+   * @param status a UErrorCode
+   * @stable ICU 2.0
+   */
+  void setText(const CharacterIterator& newText,
+           UErrorCode &status);
+
+  /**
+   * Set the input text over which this <code>Normalizer</code> will iterate.
+   * The iteration position is set to the beginning.
+   *
+   * @param newText a string that replaces the current input text
+   * @param length the length of the string, or -1 if NUL-terminated
+   * @param status a UErrorCode
+   * @stable ICU 2.0
+   */
+  void setText(const UChar* newText,
+                    int32_t length,
+            UErrorCode &status);
+  /**
+   * Copies the input text into the UnicodeString argument.
+   *
+   * @param result Receives a copy of the text under iteration.
+   * @stable ICU 2.0
+   */
+  void            getText(UnicodeString&  result);
+
+  /**
+   * ICU "poor man's RTTI", returns a UClassID for this class.
+   * @returns a UClassID for this class.
+   * @stable ICU 2.2
+   */
+  static UClassID U_EXPORT2 getStaticClassID();
+
+  /**
+   * ICU "poor man's RTTI", returns a UClassID for the actual class.
+   * @return a UClassID for the actual class.
+   * @stable ICU 2.2
+   */
+  virtual UClassID getDynamicClassID() const;
+
+private:
+  //-------------------------------------------------------------------------
+  // Private functions
+  //-------------------------------------------------------------------------
+
+  Normalizer(); // default constructor not implemented
+  Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
+
+  // Private utility methods for iteration
+  // For documentation, see the source code
+  UBool nextNormalize();
+  UBool previousNormalize();
+
+  void    init(CharacterIterator *iter);
+  void    clearBuffer(void);
+
+  //-------------------------------------------------------------------------
+  // Private data
+  //-------------------------------------------------------------------------
+
+  UNormalizationMode  fUMode;
+  int32_t             fOptions;
+
+  // The input text and our position in it
+  UCharIterator       *text;
+
+  // The normalization buffer is the result of normalization
+  // of the source in [currentIndex..nextIndex[ .
+  int32_t         currentIndex, nextIndex;
+
+  // A buffer for holding intermediate results
+  UnicodeString       buffer;
+  int32_t         bufferPos;
+
+};
+
+//-------------------------------------------------------------------------
+// Inline implementations
+//-------------------------------------------------------------------------
+
+inline UBool
+Normalizer::operator!= (const Normalizer& other) const
+{ return ! operator==(other); }
+
+inline UNormalizationCheckResult
+Normalizer::quickCheck(const UnicodeString& source,
+                       UNormalizationMode mode,
+                       UErrorCode &status) {
+    if(U_FAILURE(status)) {
+        return UNORM_MAYBE;
+    }
+
+    return unorm_quickCheck(source.getBuffer(), source.length(),
+                            mode, &status);
+}
+
+inline UNormalizationCheckResult
+Normalizer::quickCheck(const UnicodeString& source,
+                       UNormalizationMode mode, int32_t options,
+                       UErrorCode &status) {
+    if(U_FAILURE(status)) {
+        return UNORM_MAYBE;
+    }
+
+    return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
+                                       mode, options, &status);
+}
+
+inline UBool
+Normalizer::isNormalized(const UnicodeString& source,
+                         UNormalizationMode mode,
+                         UErrorCode &status) {
+    if(U_FAILURE(status)) {
+        return FALSE;
+    }
+
+    return unorm_isNormalized(source.getBuffer(), source.length(),
+                              mode, &status);
+}
+
+inline UBool
+Normalizer::isNormalized(const UnicodeString& source,
+                         UNormalizationMode mode, int32_t options,
+                         UErrorCode &status) {
+    if(U_FAILURE(status)) {
+        return FALSE;
+    }
+
+    return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
+                                         mode, options, &status);
+}
+
+inline int32_t
+Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
+                    uint32_t options,
+                    UErrorCode &errorCode) {
+  // all argument checking is done in unorm_compare
+  return unorm_compare(s1.getBuffer(), s1.length(),
+                       s2.getBuffer(), s2.length(),
+                       options,
+                       &errorCode);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif // NORMLZR_H

Deleted: MacRuby/trunk/icu-1060/unicode/numfmt.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/numfmt.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/numfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,886 +0,0 @@
-/*
-********************************************************************************
-* Copyright (C) 1997-2009, International Business Machines Corporation and others.
-* All Rights Reserved.
-********************************************************************************
-*
-* File NUMFMT.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   02/19/97    aliu        Converted from java.
-*   03/18/97    clhuang     Updated per C++ implementation.
-*   04/17/97    aliu        Changed DigitCount to int per code review.
-*    07/20/98    stephen        JDK 1.2 sync up. Added scientific support.
-*                            Changed naming conventions to match C++ guidelines
-*                            Derecated Java style constants (eg, INTEGER_FIELD)
-********************************************************************************
-*/
-
-#ifndef NUMFMT_H
-#define NUMFMT_H
-
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Abstract base class for all number formats.
- */
- 
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/unistr.h"
-#include "unicode/format.h"
-#include "unicode/unum.h" // UNumberFormatStyle
-#include "unicode/locid.h"
-
-U_NAMESPACE_BEGIN
-
-#if !UCONFIG_NO_SERVICE
-class NumberFormatFactory;
-class StringEnumeration;
-#endif
-
-/**
- *
- * Abstract base class for all number formats.  Provides interface for
- * formatting and parsing a number.  Also provides methods for
- * determining which locales have number formats, and what their names
- * are.
- * <P>
- * NumberFormat helps you to format and parse numbers for any locale.
- * Your code can be completely independent of the locale conventions
- * for decimal points, thousands-separators, or even the particular
- * decimal digits used, or whether the number format is even decimal.
- * <P>
- * To format a number for the current Locale, use one of the static
- * factory methods:
- * <pre>
- * \code
- *    double myNumber = 7.0;
- *    UnicodeString myString;
- *    UErrorCode success = U_ZERO_ERROR;
- *    NumberFormat* nf = NumberFormat::createInstance(success)
- *    nf->format(myNumber, myString);
- *    cout << " Example 1: " << myString << endl;
- * \endcode
- * </pre>
- * If you are formatting multiple numbers, it is more efficient to get
- * the format and use it multiple times so that the system doesn't
- * have to fetch the information about the local language and country
- * conventions multiple times.
- * <pre>
- * \code
- *     UnicodeString myString;
- *     UErrorCode success = U_ZERO_ERROR;
- *     nf = NumberFormat::createInstance( success );
- *     int32_t a[] = { 123, 3333, -1234567 };
- *     const int32_t a_len = sizeof(a) / sizeof(a[0]);
- *     myString.remove();
- *     for (int32_t i = 0; i < a_len; i++) {
- *         nf->format(a[i], myString);
- *         myString += " ; ";
- *     }
- *     cout << " Example 2: " << myString << endl;
- * \endcode
- * </pre>
- * To format a number for a different Locale, specify it in the
- * call to createInstance().
- * <pre>
- * \code
- *     nf = NumberFormat::createInstance( Locale::FRENCH, success );
- * \endcode
- * </pre>
- * You can use a NumberFormat to parse also.
- * <pre>
- * \code
- *    UErrorCode success;
- *    Formattable result(-999);  // initialized with error code
- *    nf->parse(myString, result, success);
- * \endcode
- * </pre>
- * Use createInstance to get the normal number format for that country.
- * There are other static factory methods available.  Use getCurrency
- * to get the currency number format for that country.  Use getPercent
- * to get a format for displaying percentages. With this format, a
- * fraction from 0.53 is displayed as 53%.
- * <P>
- * You can also control the display of numbers with such methods as
- * getMinimumFractionDigits.  If you want even more control over the
- * format or parsing, or want to give your users more control, you can
- * try casting the NumberFormat you get from the factory methods to a
- * DecimalNumberFormat. This will work for the vast majority of
- * countries; just remember to put it in a try block in case you
- * encounter an unusual one.
- * <P>
- * You can also use forms of the parse and format methods with
- * ParsePosition and FieldPosition to allow you to:
- * <ul type=round>
- *   <li>(a) progressively parse through pieces of a string.
- *   <li>(b) align the decimal point and other areas.
- * </ul>
- * For example, you can align numbers in two ways.
- * <P>
- * If you are using a monospaced font with spacing for alignment, you
- * can pass the FieldPosition in your format call, with field =
- * INTEGER_FIELD. On output, getEndIndex will be set to the offset
- * between the last character of the integer and the decimal. Add
- * (desiredSpaceCount - getEndIndex) spaces at the front of the
- * string.
- * <P>
- * If you are using proportional fonts, instead of padding with
- * spaces, measure the width of the string in pixels from the start to
- * getEndIndex.  Then move the pen by (desiredPixelWidth -
- * widthToAlignmentPoint) before drawing the text.  It also works
- * where there is no decimal, but possibly additional characters at
- * the end, e.g. with parentheses in negative numbers: "(12)" for -12.
- * <p>
- * <em>User subclasses are not supported.</em> While clients may write
- * subclasses, such code will not necessarily work and will not be
- * guaranteed to work stably from release to release.
- *
- * @stable ICU 2.0
- */
-class U_I18N_API NumberFormat : public Format {
-public:
-
-    /**
-     * Alignment Field constants used to construct a FieldPosition object.
-     * Signifies that the position of the integer part or fraction part of
-     * a formatted number should be returned.
-     *
-     * @see FieldPosition
-     * @stable ICU 2.0
-     */
-    enum EAlignmentFields {
-        kIntegerField,
-        kFractionField,
-
-
-    /**
-     * These constants are provided for backwards compatibility only.
-     * Please use the C++ style constants defined above.
-     * @stable ICU 2.0
-     */
-        INTEGER_FIELD        = kIntegerField,
-        FRACTION_FIELD        = kFractionField
-    };
-
-    /**
-     * Destructor.
-     * @stable ICU 2.0
-     */
-    virtual ~NumberFormat();
-
-    /**
-     * Return true if the given Format objects are semantically equal.
-     * Objects of different subclasses are considered unequal.
-     * @return    true if the given Format objects are semantically equal.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const Format& other) const;
-
-    /**
-     * Format an object to produce a string.  This method handles
-     * Formattable objects with numeric types. If the Formattable
-     * object type is not a numeric type, then it returns a failing
-     * UErrorCode.
-     *
-     * @param obj       The object to format.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @param status    Output param filled with success/failure status.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& format(const Formattable& obj,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos,
-                                  UErrorCode& status) const;
-
-    /**
-     * Parse a string to produce an object.  This methods handles
-     * parsing of numeric strings into Formattable objects with numeric
-     * types.
-     * <P>
-     * Before calling, set parse_pos.index to the offset you want to
-     * start parsing at in the source. After calling, parse_pos.index
-     * indicates the position after the successfully parsed text.  If
-     * an error occurs, parse_pos.index is unchanged.
-     * <P>
-     * When parsing, leading whitespace is discarded (with successful
-     * parse), while trailing whitespace is left as is.
-     * <P>
-     * See Format::parseObject() for more.
-     *
-     * @param source    The string to be parsed into an object.
-     * @param result    Formattable to be set to the parse result.
-     *                  If parse fails, return contents are undefined.
-     * @param parse_pos The position to start parsing at. Upon return
-     *                  this param is set to the position after the
-     *                  last character successfully parsed. If the
-     *                  source is not parsed successfully, this param
-     *                  will remain unchanged.
-     * @return          A newly created Formattable* object, or NULL
-     *                  on failure.  The caller owns this and should
-     *                  delete it when done.
-     * @stable ICU 2.0
-     */
-    virtual void parseObject(const UnicodeString& source,
-                             Formattable& result,
-                             ParsePosition& parse_pos) const;
-
-    /**
-     * Format a double number. These methods call the NumberFormat
-     * pure virtual format() methods with the default FieldPosition.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(  double number,
-                            UnicodeString& appendTo) const;
-
-    /**
-     * Format a long number. These methods call the NumberFormat
-     * pure virtual format() methods with the default FieldPosition.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(  int32_t number,
-                            UnicodeString& appendTo) const;
-
-    /**
-     * Format an int64 number. These methods call the NumberFormat
-     * pure virtual format() methods with the default FieldPosition.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.8
-     */
-    UnicodeString& format(  int64_t number,
-                            UnicodeString& appendTo) const;
-
-    /**
-     * Format a double number. Concrete subclasses must implement
-     * these pure virtual methods.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& format(double number,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos) const = 0;
-    /**
-     * Format a long number. Concrete subclasses must implement
-     * these pure virtual methods.
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-    */
-    virtual UnicodeString& format(int32_t number,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos) const = 0;
-
-    /**
-     * Format an int64 number. (Not abstract to retain compatibility
-     * with earlier releases, however subclasses should override this
-     * method as it just delegates to format(int32_t number...);
-     *
-     * @param number    The value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       On input: an alignment field, if desired.
-     *                  On output: the offsets of the alignment field.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.8
-    */
-    virtual UnicodeString& format(int64_t number,
-                                  UnicodeString& appendTo,
-                                  FieldPosition& pos) const;
-    /**
-     * Redeclared Format method.
-     * @param obj       The object to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param status    Output parameter set to a failure error code
-     *                  when a failure occurs.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(const Formattable& obj,
-                          UnicodeString& appendTo,
-                          UErrorCode& status) const;
-
-   /**
-    * Return a long if possible (e.g. within range LONG_MAX,
-    * LONG_MAX], and with no decimals), otherwise a double.  If
-    * IntegerOnly is set, will stop at a decimal point (or equivalent;
-    * e.g. for rational numbers "1 2/3", will stop after the 1).
-    * <P>
-    * If no object can be parsed, index is unchanged, and NULL is
-    * returned.
-    * <P>
-    * This is a pure virtual which concrete subclasses must implement.
-    *
-    * @param text           The text to be parsed.
-    * @param result         Formattable to be set to the parse result.
-    *                       If parse fails, return contents are undefined.
-    * @param parsePosition  The position to start parsing at on input.
-    *                       On output, moved to after the last successfully
-    *                       parse character. On parse failure, does not change.
-    * @return               A Formattable object of numeric type.  The caller
-    *                       owns this an must delete it.  NULL on failure.
-    * @stable ICU 2.0
-    */
-    virtual void parse(const UnicodeString& text,
-                       Formattable& result,
-                       ParsePosition& parsePosition) const = 0;
-
-    /**
-     * Parse a string as a numeric value, and return a Formattable
-     * numeric object. This method parses integers only if IntegerOnly
-     * is set.
-     *
-     * @param text          The text to be parsed.
-     * @param result        Formattable to be set to the parse result.
-     *                      If parse fails, return contents are undefined.
-     * @param status        Output parameter set to a failure error code
-     *                      when a failure occurs.
-     * @return              A Formattable object of numeric type.  The caller
-     *                      owns this an must delete it.  NULL on failure.
-     * @see                 NumberFormat::isParseIntegerOnly
-     * @stable ICU 2.0
-     */
-    virtual void parse( const UnicodeString& text,
-                        Formattable& result,
-                        UErrorCode& status) const;
-
-    /**
-     * Parses text from the given string as a currency amount.  Unlike
-     * the parse() method, this method will attempt to parse a generic
-     * currency name, searching for a match of this object's locale's
-     * currency display names, or for a 3-letter ISO currency code.
-     * This method will fail if this format is not a currency format,
-     * that is, if it does not contain the currency pattern symbol
-     * (U+00A4) in its prefix or suffix.
-     *
-     * @param text the string to parse
-     * @param result output parameter to receive result. This will have
-     * its currency set to the parsed ISO currency code.
-     * @param pos input-output position; on input, the position within
-     * text to match; must have 0 <= pos.getIndex() < text.length();
-     * on output, the position after the last matched character. If
-     * the parse fails, the position in unchanged upon output.
-     * @return a reference to result
-     * @internal
-     */
-    virtual Formattable& parseCurrency(const UnicodeString& text,
-                                       Formattable& result,
-                                       ParsePosition& pos) const;
-
-    /**
-     * Return true if this format will parse numbers as integers
-     * only.  For example in the English locale, with ParseIntegerOnly
-     * true, the string "1234." would be parsed as the integer value
-     * 1234 and parsing would stop at the "." character.  Of course,
-     * the exact format accepted by the parse operation is locale
-     * dependant and determined by sub-classes of NumberFormat.
-     * @return    true if this format will parse numbers as integers
-     *            only.
-     * @stable ICU 2.0
-     */
-    UBool isParseIntegerOnly(void) const;
-
-    /**
-     * Sets whether or not numbers should be parsed as integers only.
-     * @param value    set True, this format will parse numbers as integers
-     *                 only.
-     * @see isParseIntegerOnly
-     * @stable ICU 2.0
-     */
-    virtual void setParseIntegerOnly(UBool value);
-	
-    /**
-     * Return whether or not strict parsing is in effect.
-     *
-     * @return <code>TRUE</code> if strict parsing is in effect,
-     *         <code>FALSE</code> otherwise.
-     *  @internal
-     */
-    UBool isParseStrict(void) const;
-	
-    /**
-     * Set whether or not strict parsing should be used.
-     *
-     * @param value <code>TRUE</code> if strict parsing should be used,
-     *              <code>FALSE</code> otherwise.
-     *  @internal
-     */
-    virtual void setParseStrict(UBool value);
-	
-    /**
-     * Returns the default number format for the current default
-     * locale.  The default format is one of the styles provided by
-     * the other factory methods: getNumberInstance,
-     * getCurrencyInstance or getPercentInstance.  Exactly which one
-     * is locale dependant.
-     * @stable ICU 2.0
-     */
-    static NumberFormat* U_EXPORT2 createInstance(UErrorCode&);
-
-    /**
-     * Returns the default number format for the specified locale.
-     * The default format is one of the styles provided by the other
-     * factory methods: getNumberInstance, getCurrencyInstance or
-     * getPercentInstance.  Exactly which one is locale dependant.
-     * @param inLocale    the given locale.
-     * @stable ICU 2.0
-     */
-    static NumberFormat* U_EXPORT2 createInstance(const Locale& inLocale,
-                                        UErrorCode&);
-
-    /**
-     * Returns a currency format for the current default locale.
-     * @stable ICU 2.0
-     */
-    static NumberFormat* U_EXPORT2 createCurrencyInstance(UErrorCode&);
-
-    /**
-     * Returns a currency format for the specified locale.
-     * @param inLocale    the given locale.
-     * @stable ICU 2.0
-     */
-    static NumberFormat* U_EXPORT2 createCurrencyInstance(const Locale& inLocale,
-                                                UErrorCode&);
-
-    /**
-     * Returns a percentage format for the current default locale.
-     * @stable ICU 2.0
-     */
-    static NumberFormat* U_EXPORT2 createPercentInstance(UErrorCode&);
-
-    /**
-     * Returns a percentage format for the specified locale.
-     * @param inLocale    the given locale.
-     * @stable ICU 2.0
-     */
-    static NumberFormat* U_EXPORT2 createPercentInstance(const Locale& inLocale,
-                                               UErrorCode&);
-
-    /**
-     * Returns a scientific format for the current default locale.
-     * @stable ICU 2.0
-     */
-    static NumberFormat* U_EXPORT2 createScientificInstance(UErrorCode&);
-
-    /**
-     * Returns a scientific format for the specified locale.
-     * @param inLocale    the given locale.
-     * @stable ICU 2.0
-     */
-    static NumberFormat* U_EXPORT2 createScientificInstance(const Locale& inLocale,
-                                                UErrorCode&);
-
-    /**
-     * Get the set of Locales for which NumberFormats are installed.
-     * @param count    Output param to receive the size of the locales
-     * @stable ICU 2.0
-     */
-    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
-
-#if !UCONFIG_NO_SERVICE
-    /**
-     * Register a new NumberFormatFactory.  The factory will be adopted.
-     * @param toAdopt the NumberFormatFactory instance to be adopted
-     * @param status the in/out status code, no special meanings are assigned
-     * @return a registry key that can be used to unregister this factory
-     * @stable ICU 2.6
-     */
-    static URegistryKey U_EXPORT2 registerFactory(NumberFormatFactory* toAdopt, UErrorCode& status);
-
-    /**
-     * Unregister a previously-registered NumberFormatFactory using the key returned from the
-     * register call.  Key becomes invalid after a successful call and should not be used again.
-     * The NumberFormatFactory corresponding to the key will be deleted.
-     * @param key the registry key returned by a previous call to registerFactory
-     * @param status the in/out status code, no special meanings are assigned
-     * @return TRUE if the factory for the key was successfully unregistered
-     * @stable ICU 2.6
-     */
-    static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
-
-    /**
-     * Return a StringEnumeration over the locales available at the time of the call,
-     * including registered locales.
-     * @return a StringEnumeration over the locales available at the time of the call
-     * @stable ICU 2.6
-     */
-    static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
-#endif /* UCONFIG_NO_SERVICE */
-
-    /**
-     * Returns true if grouping is used in this format. For example,
-     * in the English locale, with grouping on, the number 1234567
-     * might be formatted as "1,234,567". The grouping separator as
-     * well as the size of each group is locale dependant and is
-     * determined by sub-classes of NumberFormat.
-     * @see setGroupingUsed
-     * @stable ICU 2.0
-     */
-    UBool isGroupingUsed(void) const;
-
-    /**
-     * Set whether or not grouping will be used in this format.
-     * @param newValue    True, grouping will be used in this format.
-     * @see getGroupingUsed
-     * @stable ICU 2.0
-     */
-    virtual void setGroupingUsed(UBool newValue);
-
-    /**
-     * Returns the maximum number of digits allowed in the integer portion of a
-     * number.
-     * @return     the maximum number of digits allowed in the integer portion of a
-     *             number.
-     * @see setMaximumIntegerDigits
-     * @stable ICU 2.0
-     */
-    int32_t getMaximumIntegerDigits(void) const;
-
-    /**
-     * Sets the maximum number of digits allowed in the integer portion of a
-     * number. maximumIntegerDigits must be >= minimumIntegerDigits.  If the
-     * new value for maximumIntegerDigits is less than the current value
-     * of minimumIntegerDigits, then minimumIntegerDigits will also be set to
-     * the new value.
-     *
-     * @param newValue    the new value for the maximum number of digits
-     *                    allowed in the integer portion of a number.
-     * @see getMaximumIntegerDigits
-     * @stable ICU 2.0
-     */
-    virtual void setMaximumIntegerDigits(int32_t newValue);
-
-    /**
-     * Returns the minimum number of digits allowed in the integer portion of a
-     * number.
-     * @return    the minimum number of digits allowed in the integer portion of a
-     *            number.
-     * @see setMinimumIntegerDigits
-     * @stable ICU 2.0
-     */
-    int32_t getMinimumIntegerDigits(void) const;
-
-    /**
-     * Sets the minimum number of digits allowed in the integer portion of a
-     * number. minimumIntegerDigits must be &lt;= maximumIntegerDigits.  If the
-     * new value for minimumIntegerDigits exceeds the current value
-     * of maximumIntegerDigits, then maximumIntegerDigits will also be set to
-     * the new value.
-     * @param newValue    the new value to be set.
-     * @see getMinimumIntegerDigits
-     * @stable ICU 2.0
-     */
-    virtual void setMinimumIntegerDigits(int32_t newValue);
-
-    /**
-     * Returns the maximum number of digits allowed in the fraction portion of a
-     * number.
-     * @return    the maximum number of digits allowed in the fraction portion of a
-     *            number.
-     * @see setMaximumFractionDigits
-     * @stable ICU 2.0
-     */
-    int32_t getMaximumFractionDigits(void) const;
-
-    /**
-     * Sets the maximum number of digits allowed in the fraction portion of a
-     * number. maximumFractionDigits must be >= minimumFractionDigits.  If the
-     * new value for maximumFractionDigits is less than the current value
-     * of minimumFractionDigits, then minimumFractionDigits will also be set to
-     * the new value.
-     * @param newValue    the new value to be set.
-     * @see getMaximumFractionDigits
-     * @stable ICU 2.0
-     */
-    virtual void setMaximumFractionDigits(int32_t newValue);
-
-    /**
-     * Returns the minimum number of digits allowed in the fraction portion of a
-     * number.
-     * @return    the minimum number of digits allowed in the fraction portion of a
-     *            number.
-     * @see setMinimumFractionDigits
-     * @stable ICU 2.0
-     */
-    int32_t getMinimumFractionDigits(void) const;
-
-    /**
-     * Sets the minimum number of digits allowed in the fraction portion of a
-     * number. minimumFractionDigits must be &lt;= maximumFractionDigits.   If the
-     * new value for minimumFractionDigits exceeds the current value
-     * of maximumFractionDigits, then maximumIntegerDigits will also be set to
-     * the new value
-     * @param newValue    the new value to be set.
-     * @see getMinimumFractionDigits
-     * @stable ICU 2.0
-     */
-    virtual void setMinimumFractionDigits(int32_t newValue);
-
-    /**
-     * Sets the currency used to display currency
-     * amounts.  This takes effect immediately, if this format is a
-     * currency format.  If this format is not a currency format, then
-     * the currency is used if and when this object becomes a
-     * currency format.
-     * @param theCurrency a 3-letter ISO code indicating new currency
-     * to use.  It need not be null-terminated.  May be the empty
-     * string or NULL to indicate no currency.
-     * @param ec input-output error code
-     * @stable ICU 3.0
-     */
-    virtual void setCurrency(const UChar* theCurrency, UErrorCode& ec);
-
-    /**
-     * Gets the currency used to display currency
-     * amounts.  This may be an empty string for some subclasses.
-     * @return a 3-letter null-terminated ISO code indicating
-     * the currency in use, or a pointer to the empty string.
-     * @stable ICU 2.6
-     */
-    const UChar* getCurrency() const;
-
-public:
-
-    /**
-     * Return the class ID for this class.  This is useful for
-     * comparing to a return value from getDynamicClassID(). Note that,
-     * because NumberFormat is an abstract base class, no fully constructed object
-     * will have the class ID returned by NumberFormat::getStaticClassID().
-     * @return The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
-     * This method is to implement a simple version of RTTI, since not all
-     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
-     * clone() methods call this method.
-     * <P>
-     * @return The class ID for this object. All objects of a
-     * given class have the same class ID.  Objects of
-     * other classes have different class IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const = 0;
-
-protected:
-
-    /**
-     * Default constructor for subclass use only.
-     * @stable ICU 2.0
-     */
-    NumberFormat();
-
-    /**
-     * Copy constructor.
-     * @stable ICU 2.0
-     */
-    NumberFormat(const NumberFormat&);
-
-    /**
-     * Assignment operator.
-     * @stable ICU 2.0
-     */
-    NumberFormat& operator=(const NumberFormat&);
-
-    /**
-     * Returns the currency in effect for this formatter.  Subclasses
-     * should override this method as needed.  Unlike getCurrency(),
-     * this method should never return "".
-     * @result output parameter for null-terminated result, which must
-     * have a capacity of at least 4
-     * @internal
-     */
-    virtual void getEffectiveCurrency(UChar* result, UErrorCode& ec) const;
-
-private:
-
-    enum EStyles {
-        kNumberStyle,
-        kCurrencyStyle,
-        kPercentStyle,
-        kScientificStyle,
-        kStyleCount // ALWAYS LAST ENUM: number of styles
-    };
-
-    /**
-     * Creates the specified decimal format style of the desired locale.
-     * Hook for service registration, uses makeInstance directly if no services
-     * registered.
-     * @param desiredLocale    the given locale.
-     * @param choice           the given style.
-     * @param success          Output param filled with success/failure status.
-     * @return                 A new NumberFormat instance.
-     */
-    static NumberFormat* U_EXPORT2 createInstance(const Locale& desiredLocale, EStyles choice, UErrorCode& success);
-
-    /**
-     * Creates the specified decimal format style of the desired locale.
-     * @param desiredLocale    the given locale.
-     * @param choice           the given style.
-     * @param success          Output param filled with success/failure status.
-     * @return                 A new NumberFormat instance.
-     */
-    static NumberFormat* makeInstance(const Locale& desiredLocale, EStyles choice, UErrorCode& success);
-
-    UBool      fGroupingUsed;
-    int32_t    fMaxIntegerDigits;
-    int32_t    fMinIntegerDigits;
-    int32_t    fMaxFractionDigits;
-    int32_t    fMinFractionDigits;
-    UBool      fParseIntegerOnly;
-    UBool      fParseStrict;
-
-    // ISO currency code
-    UChar      fCurrency[4];
-
-    friend class ICUNumberFormatFactory; // access to makeInstance, EStyles
-    friend class ICUNumberFormatService;
-};
-
-#if !UCONFIG_NO_SERVICE
-/**
- * A NumberFormatFactory is used to register new number formats.  The factory
- * should be able to create any of the predefined formats for each locale it
- * supports.  When registered, the locales it supports extend or override the
- * locale already supported by ICU.
- *
- * @stable ICU 2.6
- */
-class U_I18N_API NumberFormatFactory : public UObject {
-public:
-
-    /**
-     * Destructor
-     * @stable ICU 3.0
-     */
-    virtual ~NumberFormatFactory();
-
-    /**
-     * Return true if this factory will be visible.  Default is true.
-     * If not visible, the locales supported by this factory will not
-     * be listed by getAvailableLocales.
-     * @stable ICU 2.6
-     */
-    virtual UBool visible(void) const = 0;
-
-    /**
-     * Return the locale names directly supported by this factory.  The number of names
-     * is returned in count;
-     * @stable ICU 2.6
-     */
-    virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) const = 0;
-
-    /**
-     * Return a number format of the appropriate type.  If the locale
-     * is not supported, return null.  If the locale is supported, but
-     * the type is not provided by this service, return null.  Otherwise
-     * return an appropriate instance of NumberFormat.
-     * @stable ICU 2.6
-     */
-    virtual NumberFormat* createFormat(const Locale& loc, UNumberFormatStyle formatType) = 0;
-};
-
-/**
- * A NumberFormatFactory that supports a single locale.  It can be visible or invisible.
- * @stable ICU 2.6
- */
-class U_I18N_API SimpleNumberFormatFactory : public NumberFormatFactory {
-protected:
-    /**
-     * True if the locale supported by this factory is visible.
-     * @stable ICU 2.6
-     */
-    const UBool _visible;
-
-    /**
-     * The locale supported by this factory, as a UnicodeString.
-     * @stable ICU 2.6
-     */
-    UnicodeString _id;
-
-public:
-    /**
-     * @stable ICU 2.6
-     */
-    SimpleNumberFormatFactory(const Locale& locale, UBool visible = TRUE);
-
-    /**
-     * @stable ICU 3.0
-     */
-    virtual ~SimpleNumberFormatFactory();
-
-    /**
-     * @stable ICU 2.6
-     */
-    virtual UBool visible(void) const;
-
-    /**
-     * @stable ICU 2.6
-     */
-    virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) const;
-};
-#endif /* #if !UCONFIG_NO_SERVICE */
-
-// -------------------------------------
-
-inline UBool
-NumberFormat::isParseIntegerOnly() const
-{
-    return fParseIntegerOnly;
-}
-
-inline UBool
-NumberFormat::isParseStrict() const
-{
-	return fParseStrict;
-}
-
-inline UnicodeString&
-NumberFormat::format(const Formattable& obj,
-                     UnicodeString& appendTo,
-                     UErrorCode& status) const {
-    return Format::format(obj, appendTo, status);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _NUMFMT
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/numfmt.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/numfmt.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/numfmt.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/numfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,886 @@
+/*
+********************************************************************************
+* Copyright (C) 1997-2009, International Business Machines Corporation and others.
+* All Rights Reserved.
+********************************************************************************
+*
+* File NUMFMT.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   02/19/97    aliu        Converted from java.
+*   03/18/97    clhuang     Updated per C++ implementation.
+*   04/17/97    aliu        Changed DigitCount to int per code review.
+*    07/20/98    stephen        JDK 1.2 sync up. Added scientific support.
+*                            Changed naming conventions to match C++ guidelines
+*                            Derecated Java style constants (eg, INTEGER_FIELD)
+********************************************************************************
+*/
+
+#ifndef NUMFMT_H
+#define NUMFMT_H
+
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Abstract base class for all number formats.
+ */
+ 
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/unistr.h"
+#include "unicode/format.h"
+#include "unicode/unum.h" // UNumberFormatStyle
+#include "unicode/locid.h"
+
+U_NAMESPACE_BEGIN
+
+#if !UCONFIG_NO_SERVICE
+class NumberFormatFactory;
+class StringEnumeration;
+#endif
+
+/**
+ *
+ * Abstract base class for all number formats.  Provides interface for
+ * formatting and parsing a number.  Also provides methods for
+ * determining which locales have number formats, and what their names
+ * are.
+ * <P>
+ * NumberFormat helps you to format and parse numbers for any locale.
+ * Your code can be completely independent of the locale conventions
+ * for decimal points, thousands-separators, or even the particular
+ * decimal digits used, or whether the number format is even decimal.
+ * <P>
+ * To format a number for the current Locale, use one of the static
+ * factory methods:
+ * <pre>
+ * \code
+ *    double myNumber = 7.0;
+ *    UnicodeString myString;
+ *    UErrorCode success = U_ZERO_ERROR;
+ *    NumberFormat* nf = NumberFormat::createInstance(success)
+ *    nf->format(myNumber, myString);
+ *    cout << " Example 1: " << myString << endl;
+ * \endcode
+ * </pre>
+ * If you are formatting multiple numbers, it is more efficient to get
+ * the format and use it multiple times so that the system doesn't
+ * have to fetch the information about the local language and country
+ * conventions multiple times.
+ * <pre>
+ * \code
+ *     UnicodeString myString;
+ *     UErrorCode success = U_ZERO_ERROR;
+ *     nf = NumberFormat::createInstance( success );
+ *     int32_t a[] = { 123, 3333, -1234567 };
+ *     const int32_t a_len = sizeof(a) / sizeof(a[0]);
+ *     myString.remove();
+ *     for (int32_t i = 0; i < a_len; i++) {
+ *         nf->format(a[i], myString);
+ *         myString += " ; ";
+ *     }
+ *     cout << " Example 2: " << myString << endl;
+ * \endcode
+ * </pre>
+ * To format a number for a different Locale, specify it in the
+ * call to createInstance().
+ * <pre>
+ * \code
+ *     nf = NumberFormat::createInstance( Locale::FRENCH, success );
+ * \endcode
+ * </pre>
+ * You can use a NumberFormat to parse also.
+ * <pre>
+ * \code
+ *    UErrorCode success;
+ *    Formattable result(-999);  // initialized with error code
+ *    nf->parse(myString, result, success);
+ * \endcode
+ * </pre>
+ * Use createInstance to get the normal number format for that country.
+ * There are other static factory methods available.  Use getCurrency
+ * to get the currency number format for that country.  Use getPercent
+ * to get a format for displaying percentages. With this format, a
+ * fraction from 0.53 is displayed as 53%.
+ * <P>
+ * You can also control the display of numbers with such methods as
+ * getMinimumFractionDigits.  If you want even more control over the
+ * format or parsing, or want to give your users more control, you can
+ * try casting the NumberFormat you get from the factory methods to a
+ * DecimalNumberFormat. This will work for the vast majority of
+ * countries; just remember to put it in a try block in case you
+ * encounter an unusual one.
+ * <P>
+ * You can also use forms of the parse and format methods with
+ * ParsePosition and FieldPosition to allow you to:
+ * <ul type=round>
+ *   <li>(a) progressively parse through pieces of a string.
+ *   <li>(b) align the decimal point and other areas.
+ * </ul>
+ * For example, you can align numbers in two ways.
+ * <P>
+ * If you are using a monospaced font with spacing for alignment, you
+ * can pass the FieldPosition in your format call, with field =
+ * INTEGER_FIELD. On output, getEndIndex will be set to the offset
+ * between the last character of the integer and the decimal. Add
+ * (desiredSpaceCount - getEndIndex) spaces at the front of the
+ * string.
+ * <P>
+ * If you are using proportional fonts, instead of padding with
+ * spaces, measure the width of the string in pixels from the start to
+ * getEndIndex.  Then move the pen by (desiredPixelWidth -
+ * widthToAlignmentPoint) before drawing the text.  It also works
+ * where there is no decimal, but possibly additional characters at
+ * the end, e.g. with parentheses in negative numbers: "(12)" for -12.
+ * <p>
+ * <em>User subclasses are not supported.</em> While clients may write
+ * subclasses, such code will not necessarily work and will not be
+ * guaranteed to work stably from release to release.
+ *
+ * @stable ICU 2.0
+ */
+class U_I18N_API NumberFormat : public Format {
+public:
+
+    /**
+     * Alignment Field constants used to construct a FieldPosition object.
+     * Signifies that the position of the integer part or fraction part of
+     * a formatted number should be returned.
+     *
+     * @see FieldPosition
+     * @stable ICU 2.0
+     */
+    enum EAlignmentFields {
+        kIntegerField,
+        kFractionField,
+
+
+    /**
+     * These constants are provided for backwards compatibility only.
+     * Please use the C++ style constants defined above.
+     * @stable ICU 2.0
+     */
+        INTEGER_FIELD        = kIntegerField,
+        FRACTION_FIELD        = kFractionField
+    };
+
+    /**
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~NumberFormat();
+
+    /**
+     * Return true if the given Format objects are semantically equal.
+     * Objects of different subclasses are considered unequal.
+     * @return    true if the given Format objects are semantically equal.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const Format& other) const;
+
+    /**
+     * Format an object to produce a string.  This method handles
+     * Formattable objects with numeric types. If the Formattable
+     * object type is not a numeric type, then it returns a failing
+     * UErrorCode.
+     *
+     * @param obj       The object to format.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @param status    Output param filled with success/failure status.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& format(const Formattable& obj,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos,
+                                  UErrorCode& status) const;
+
+    /**
+     * Parse a string to produce an object.  This methods handles
+     * parsing of numeric strings into Formattable objects with numeric
+     * types.
+     * <P>
+     * Before calling, set parse_pos.index to the offset you want to
+     * start parsing at in the source. After calling, parse_pos.index
+     * indicates the position after the successfully parsed text.  If
+     * an error occurs, parse_pos.index is unchanged.
+     * <P>
+     * When parsing, leading whitespace is discarded (with successful
+     * parse), while trailing whitespace is left as is.
+     * <P>
+     * See Format::parseObject() for more.
+     *
+     * @param source    The string to be parsed into an object.
+     * @param result    Formattable to be set to the parse result.
+     *                  If parse fails, return contents are undefined.
+     * @param parse_pos The position to start parsing at. Upon return
+     *                  this param is set to the position after the
+     *                  last character successfully parsed. If the
+     *                  source is not parsed successfully, this param
+     *                  will remain unchanged.
+     * @return          A newly created Formattable* object, or NULL
+     *                  on failure.  The caller owns this and should
+     *                  delete it when done.
+     * @stable ICU 2.0
+     */
+    virtual void parseObject(const UnicodeString& source,
+                             Formattable& result,
+                             ParsePosition& parse_pos) const;
+
+    /**
+     * Format a double number. These methods call the NumberFormat
+     * pure virtual format() methods with the default FieldPosition.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(  double number,
+                            UnicodeString& appendTo) const;
+
+    /**
+     * Format a long number. These methods call the NumberFormat
+     * pure virtual format() methods with the default FieldPosition.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(  int32_t number,
+                            UnicodeString& appendTo) const;
+
+    /**
+     * Format an int64 number. These methods call the NumberFormat
+     * pure virtual format() methods with the default FieldPosition.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.8
+     */
+    UnicodeString& format(  int64_t number,
+                            UnicodeString& appendTo) const;
+
+    /**
+     * Format a double number. Concrete subclasses must implement
+     * these pure virtual methods.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& format(double number,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos) const = 0;
+    /**
+     * Format a long number. Concrete subclasses must implement
+     * these pure virtual methods.
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+    */
+    virtual UnicodeString& format(int32_t number,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos) const = 0;
+
+    /**
+     * Format an int64 number. (Not abstract to retain compatibility
+     * with earlier releases, however subclasses should override this
+     * method as it just delegates to format(int32_t number...);
+     *
+     * @param number    The value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       On input: an alignment field, if desired.
+     *                  On output: the offsets of the alignment field.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.8
+    */
+    virtual UnicodeString& format(int64_t number,
+                                  UnicodeString& appendTo,
+                                  FieldPosition& pos) const;
+    /**
+     * Redeclared Format method.
+     * @param obj       The object to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param status    Output parameter set to a failure error code
+     *                  when a failure occurs.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(const Formattable& obj,
+                          UnicodeString& appendTo,
+                          UErrorCode& status) const;
+
+   /**
+    * Return a long if possible (e.g. within range LONG_MAX,
+    * LONG_MAX], and with no decimals), otherwise a double.  If
+    * IntegerOnly is set, will stop at a decimal point (or equivalent;
+    * e.g. for rational numbers "1 2/3", will stop after the 1).
+    * <P>
+    * If no object can be parsed, index is unchanged, and NULL is
+    * returned.
+    * <P>
+    * This is a pure virtual which concrete subclasses must implement.
+    *
+    * @param text           The text to be parsed.
+    * @param result         Formattable to be set to the parse result.
+    *                       If parse fails, return contents are undefined.
+    * @param parsePosition  The position to start parsing at on input.
+    *                       On output, moved to after the last successfully
+    *                       parse character. On parse failure, does not change.
+    * @return               A Formattable object of numeric type.  The caller
+    *                       owns this an must delete it.  NULL on failure.
+    * @stable ICU 2.0
+    */
+    virtual void parse(const UnicodeString& text,
+                       Formattable& result,
+                       ParsePosition& parsePosition) const = 0;
+
+    /**
+     * Parse a string as a numeric value, and return a Formattable
+     * numeric object. This method parses integers only if IntegerOnly
+     * is set.
+     *
+     * @param text          The text to be parsed.
+     * @param result        Formattable to be set to the parse result.
+     *                      If parse fails, return contents are undefined.
+     * @param status        Output parameter set to a failure error code
+     *                      when a failure occurs.
+     * @return              A Formattable object of numeric type.  The caller
+     *                      owns this an must delete it.  NULL on failure.
+     * @see                 NumberFormat::isParseIntegerOnly
+     * @stable ICU 2.0
+     */
+    virtual void parse( const UnicodeString& text,
+                        Formattable& result,
+                        UErrorCode& status) const;
+
+    /**
+     * Parses text from the given string as a currency amount.  Unlike
+     * the parse() method, this method will attempt to parse a generic
+     * currency name, searching for a match of this object's locale's
+     * currency display names, or for a 3-letter ISO currency code.
+     * This method will fail if this format is not a currency format,
+     * that is, if it does not contain the currency pattern symbol
+     * (U+00A4) in its prefix or suffix.
+     *
+     * @param text the string to parse
+     * @param result output parameter to receive result. This will have
+     * its currency set to the parsed ISO currency code.
+     * @param pos input-output position; on input, the position within
+     * text to match; must have 0 <= pos.getIndex() < text.length();
+     * on output, the position after the last matched character. If
+     * the parse fails, the position in unchanged upon output.
+     * @return a reference to result
+     * @internal
+     */
+    virtual Formattable& parseCurrency(const UnicodeString& text,
+                                       Formattable& result,
+                                       ParsePosition& pos) const;
+
+    /**
+     * Return true if this format will parse numbers as integers
+     * only.  For example in the English locale, with ParseIntegerOnly
+     * true, the string "1234." would be parsed as the integer value
+     * 1234 and parsing would stop at the "." character.  Of course,
+     * the exact format accepted by the parse operation is locale
+     * dependant and determined by sub-classes of NumberFormat.
+     * @return    true if this format will parse numbers as integers
+     *            only.
+     * @stable ICU 2.0
+     */
+    UBool isParseIntegerOnly(void) const;
+
+    /**
+     * Sets whether or not numbers should be parsed as integers only.
+     * @param value    set True, this format will parse numbers as integers
+     *                 only.
+     * @see isParseIntegerOnly
+     * @stable ICU 2.0
+     */
+    virtual void setParseIntegerOnly(UBool value);
+	
+    /**
+     * Return whether or not strict parsing is in effect.
+     *
+     * @return <code>TRUE</code> if strict parsing is in effect,
+     *         <code>FALSE</code> otherwise.
+     *  @internal
+     */
+    UBool isParseStrict(void) const;
+	
+    /**
+     * Set whether or not strict parsing should be used.
+     *
+     * @param value <code>TRUE</code> if strict parsing should be used,
+     *              <code>FALSE</code> otherwise.
+     *  @internal
+     */
+    virtual void setParseStrict(UBool value);
+	
+    /**
+     * Returns the default number format for the current default
+     * locale.  The default format is one of the styles provided by
+     * the other factory methods: getNumberInstance,
+     * getCurrencyInstance or getPercentInstance.  Exactly which one
+     * is locale dependant.
+     * @stable ICU 2.0
+     */
+    static NumberFormat* U_EXPORT2 createInstance(UErrorCode&);
+
+    /**
+     * Returns the default number format for the specified locale.
+     * The default format is one of the styles provided by the other
+     * factory methods: getNumberInstance, getCurrencyInstance or
+     * getPercentInstance.  Exactly which one is locale dependant.
+     * @param inLocale    the given locale.
+     * @stable ICU 2.0
+     */
+    static NumberFormat* U_EXPORT2 createInstance(const Locale& inLocale,
+                                        UErrorCode&);
+
+    /**
+     * Returns a currency format for the current default locale.
+     * @stable ICU 2.0
+     */
+    static NumberFormat* U_EXPORT2 createCurrencyInstance(UErrorCode&);
+
+    /**
+     * Returns a currency format for the specified locale.
+     * @param inLocale    the given locale.
+     * @stable ICU 2.0
+     */
+    static NumberFormat* U_EXPORT2 createCurrencyInstance(const Locale& inLocale,
+                                                UErrorCode&);
+
+    /**
+     * Returns a percentage format for the current default locale.
+     * @stable ICU 2.0
+     */
+    static NumberFormat* U_EXPORT2 createPercentInstance(UErrorCode&);
+
+    /**
+     * Returns a percentage format for the specified locale.
+     * @param inLocale    the given locale.
+     * @stable ICU 2.0
+     */
+    static NumberFormat* U_EXPORT2 createPercentInstance(const Locale& inLocale,
+                                               UErrorCode&);
+
+    /**
+     * Returns a scientific format for the current default locale.
+     * @stable ICU 2.0
+     */
+    static NumberFormat* U_EXPORT2 createScientificInstance(UErrorCode&);
+
+    /**
+     * Returns a scientific format for the specified locale.
+     * @param inLocale    the given locale.
+     * @stable ICU 2.0
+     */
+    static NumberFormat* U_EXPORT2 createScientificInstance(const Locale& inLocale,
+                                                UErrorCode&);
+
+    /**
+     * Get the set of Locales for which NumberFormats are installed.
+     * @param count    Output param to receive the size of the locales
+     * @stable ICU 2.0
+     */
+    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+
+#if !UCONFIG_NO_SERVICE
+    /**
+     * Register a new NumberFormatFactory.  The factory will be adopted.
+     * @param toAdopt the NumberFormatFactory instance to be adopted
+     * @param status the in/out status code, no special meanings are assigned
+     * @return a registry key that can be used to unregister this factory
+     * @stable ICU 2.6
+     */
+    static URegistryKey U_EXPORT2 registerFactory(NumberFormatFactory* toAdopt, UErrorCode& status);
+
+    /**
+     * Unregister a previously-registered NumberFormatFactory using the key returned from the
+     * register call.  Key becomes invalid after a successful call and should not be used again.
+     * The NumberFormatFactory corresponding to the key will be deleted.
+     * @param key the registry key returned by a previous call to registerFactory
+     * @param status the in/out status code, no special meanings are assigned
+     * @return TRUE if the factory for the key was successfully unregistered
+     * @stable ICU 2.6
+     */
+    static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
+
+    /**
+     * Return a StringEnumeration over the locales available at the time of the call,
+     * including registered locales.
+     * @return a StringEnumeration over the locales available at the time of the call
+     * @stable ICU 2.6
+     */
+    static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
+#endif /* UCONFIG_NO_SERVICE */
+
+    /**
+     * Returns true if grouping is used in this format. For example,
+     * in the English locale, with grouping on, the number 1234567
+     * might be formatted as "1,234,567". The grouping separator as
+     * well as the size of each group is locale dependant and is
+     * determined by sub-classes of NumberFormat.
+     * @see setGroupingUsed
+     * @stable ICU 2.0
+     */
+    UBool isGroupingUsed(void) const;
+
+    /**
+     * Set whether or not grouping will be used in this format.
+     * @param newValue    True, grouping will be used in this format.
+     * @see getGroupingUsed
+     * @stable ICU 2.0
+     */
+    virtual void setGroupingUsed(UBool newValue);
+
+    /**
+     * Returns the maximum number of digits allowed in the integer portion of a
+     * number.
+     * @return     the maximum number of digits allowed in the integer portion of a
+     *             number.
+     * @see setMaximumIntegerDigits
+     * @stable ICU 2.0
+     */
+    int32_t getMaximumIntegerDigits(void) const;
+
+    /**
+     * Sets the maximum number of digits allowed in the integer portion of a
+     * number. maximumIntegerDigits must be >= minimumIntegerDigits.  If the
+     * new value for maximumIntegerDigits is less than the current value
+     * of minimumIntegerDigits, then minimumIntegerDigits will also be set to
+     * the new value.
+     *
+     * @param newValue    the new value for the maximum number of digits
+     *                    allowed in the integer portion of a number.
+     * @see getMaximumIntegerDigits
+     * @stable ICU 2.0
+     */
+    virtual void setMaximumIntegerDigits(int32_t newValue);
+
+    /**
+     * Returns the minimum number of digits allowed in the integer portion of a
+     * number.
+     * @return    the minimum number of digits allowed in the integer portion of a
+     *            number.
+     * @see setMinimumIntegerDigits
+     * @stable ICU 2.0
+     */
+    int32_t getMinimumIntegerDigits(void) const;
+
+    /**
+     * Sets the minimum number of digits allowed in the integer portion of a
+     * number. minimumIntegerDigits must be &lt;= maximumIntegerDigits.  If the
+     * new value for minimumIntegerDigits exceeds the current value
+     * of maximumIntegerDigits, then maximumIntegerDigits will also be set to
+     * the new value.
+     * @param newValue    the new value to be set.
+     * @see getMinimumIntegerDigits
+     * @stable ICU 2.0
+     */
+    virtual void setMinimumIntegerDigits(int32_t newValue);
+
+    /**
+     * Returns the maximum number of digits allowed in the fraction portion of a
+     * number.
+     * @return    the maximum number of digits allowed in the fraction portion of a
+     *            number.
+     * @see setMaximumFractionDigits
+     * @stable ICU 2.0
+     */
+    int32_t getMaximumFractionDigits(void) const;
+
+    /**
+     * Sets the maximum number of digits allowed in the fraction portion of a
+     * number. maximumFractionDigits must be >= minimumFractionDigits.  If the
+     * new value for maximumFractionDigits is less than the current value
+     * of minimumFractionDigits, then minimumFractionDigits will also be set to
+     * the new value.
+     * @param newValue    the new value to be set.
+     * @see getMaximumFractionDigits
+     * @stable ICU 2.0
+     */
+    virtual void setMaximumFractionDigits(int32_t newValue);
+
+    /**
+     * Returns the minimum number of digits allowed in the fraction portion of a
+     * number.
+     * @return    the minimum number of digits allowed in the fraction portion of a
+     *            number.
+     * @see setMinimumFractionDigits
+     * @stable ICU 2.0
+     */
+    int32_t getMinimumFractionDigits(void) const;
+
+    /**
+     * Sets the minimum number of digits allowed in the fraction portion of a
+     * number. minimumFractionDigits must be &lt;= maximumFractionDigits.   If the
+     * new value for minimumFractionDigits exceeds the current value
+     * of maximumFractionDigits, then maximumIntegerDigits will also be set to
+     * the new value
+     * @param newValue    the new value to be set.
+     * @see getMinimumFractionDigits
+     * @stable ICU 2.0
+     */
+    virtual void setMinimumFractionDigits(int32_t newValue);
+
+    /**
+     * Sets the currency used to display currency
+     * amounts.  This takes effect immediately, if this format is a
+     * currency format.  If this format is not a currency format, then
+     * the currency is used if and when this object becomes a
+     * currency format.
+     * @param theCurrency a 3-letter ISO code indicating new currency
+     * to use.  It need not be null-terminated.  May be the empty
+     * string or NULL to indicate no currency.
+     * @param ec input-output error code
+     * @stable ICU 3.0
+     */
+    virtual void setCurrency(const UChar* theCurrency, UErrorCode& ec);
+
+    /**
+     * Gets the currency used to display currency
+     * amounts.  This may be an empty string for some subclasses.
+     * @return a 3-letter null-terminated ISO code indicating
+     * the currency in use, or a pointer to the empty string.
+     * @stable ICU 2.6
+     */
+    const UChar* getCurrency() const;
+
+public:
+
+    /**
+     * Return the class ID for this class.  This is useful for
+     * comparing to a return value from getDynamicClassID(). Note that,
+     * because NumberFormat is an abstract base class, no fully constructed object
+     * will have the class ID returned by NumberFormat::getStaticClassID().
+     * @return The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
+     * This method is to implement a simple version of RTTI, since not all
+     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
+     * clone() methods call this method.
+     * <P>
+     * @return The class ID for this object. All objects of a
+     * given class have the same class ID.  Objects of
+     * other classes have different class IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const = 0;
+
+protected:
+
+    /**
+     * Default constructor for subclass use only.
+     * @stable ICU 2.0
+     */
+    NumberFormat();
+
+    /**
+     * Copy constructor.
+     * @stable ICU 2.0
+     */
+    NumberFormat(const NumberFormat&);
+
+    /**
+     * Assignment operator.
+     * @stable ICU 2.0
+     */
+    NumberFormat& operator=(const NumberFormat&);
+
+    /**
+     * Returns the currency in effect for this formatter.  Subclasses
+     * should override this method as needed.  Unlike getCurrency(),
+     * this method should never return "".
+     * @result output parameter for null-terminated result, which must
+     * have a capacity of at least 4
+     * @internal
+     */
+    virtual void getEffectiveCurrency(UChar* result, UErrorCode& ec) const;
+
+private:
+
+    enum EStyles {
+        kNumberStyle,
+        kCurrencyStyle,
+        kPercentStyle,
+        kScientificStyle,
+        kStyleCount // ALWAYS LAST ENUM: number of styles
+    };
+
+    /**
+     * Creates the specified decimal format style of the desired locale.
+     * Hook for service registration, uses makeInstance directly if no services
+     * registered.
+     * @param desiredLocale    the given locale.
+     * @param choice           the given style.
+     * @param success          Output param filled with success/failure status.
+     * @return                 A new NumberFormat instance.
+     */
+    static NumberFormat* U_EXPORT2 createInstance(const Locale& desiredLocale, EStyles choice, UErrorCode& success);
+
+    /**
+     * Creates the specified decimal format style of the desired locale.
+     * @param desiredLocale    the given locale.
+     * @param choice           the given style.
+     * @param success          Output param filled with success/failure status.
+     * @return                 A new NumberFormat instance.
+     */
+    static NumberFormat* makeInstance(const Locale& desiredLocale, EStyles choice, UErrorCode& success);
+
+    UBool      fGroupingUsed;
+    int32_t    fMaxIntegerDigits;
+    int32_t    fMinIntegerDigits;
+    int32_t    fMaxFractionDigits;
+    int32_t    fMinFractionDigits;
+    UBool      fParseIntegerOnly;
+    UBool      fParseStrict;
+
+    // ISO currency code
+    UChar      fCurrency[4];
+
+    friend class ICUNumberFormatFactory; // access to makeInstance, EStyles
+    friend class ICUNumberFormatService;
+};
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * A NumberFormatFactory is used to register new number formats.  The factory
+ * should be able to create any of the predefined formats for each locale it
+ * supports.  When registered, the locales it supports extend or override the
+ * locale already supported by ICU.
+ *
+ * @stable ICU 2.6
+ */
+class U_I18N_API NumberFormatFactory : public UObject {
+public:
+
+    /**
+     * Destructor
+     * @stable ICU 3.0
+     */
+    virtual ~NumberFormatFactory();
+
+    /**
+     * Return true if this factory will be visible.  Default is true.
+     * If not visible, the locales supported by this factory will not
+     * be listed by getAvailableLocales.
+     * @stable ICU 2.6
+     */
+    virtual UBool visible(void) const = 0;
+
+    /**
+     * Return the locale names directly supported by this factory.  The number of names
+     * is returned in count;
+     * @stable ICU 2.6
+     */
+    virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) const = 0;
+
+    /**
+     * Return a number format of the appropriate type.  If the locale
+     * is not supported, return null.  If the locale is supported, but
+     * the type is not provided by this service, return null.  Otherwise
+     * return an appropriate instance of NumberFormat.
+     * @stable ICU 2.6
+     */
+    virtual NumberFormat* createFormat(const Locale& loc, UNumberFormatStyle formatType) = 0;
+};
+
+/**
+ * A NumberFormatFactory that supports a single locale.  It can be visible or invisible.
+ * @stable ICU 2.6
+ */
+class U_I18N_API SimpleNumberFormatFactory : public NumberFormatFactory {
+protected:
+    /**
+     * True if the locale supported by this factory is visible.
+     * @stable ICU 2.6
+     */
+    const UBool _visible;
+
+    /**
+     * The locale supported by this factory, as a UnicodeString.
+     * @stable ICU 2.6
+     */
+    UnicodeString _id;
+
+public:
+    /**
+     * @stable ICU 2.6
+     */
+    SimpleNumberFormatFactory(const Locale& locale, UBool visible = TRUE);
+
+    /**
+     * @stable ICU 3.0
+     */
+    virtual ~SimpleNumberFormatFactory();
+
+    /**
+     * @stable ICU 2.6
+     */
+    virtual UBool visible(void) const;
+
+    /**
+     * @stable ICU 2.6
+     */
+    virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) const;
+};
+#endif /* #if !UCONFIG_NO_SERVICE */
+
+// -------------------------------------
+
+inline UBool
+NumberFormat::isParseIntegerOnly() const
+{
+    return fParseIntegerOnly;
+}
+
+inline UBool
+NumberFormat::isParseStrict() const
+{
+	return fParseStrict;
+}
+
+inline UnicodeString&
+NumberFormat::format(const Formattable& obj,
+                     UnicodeString& appendTo,
+                     UErrorCode& status) const {
+    return Format::format(obj, appendTo, status);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _NUMFMT
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/parseerr.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/parseerr.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/parseerr.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,92 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1999-2005, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*   Date        Name        Description
-*   03/14/00    aliu        Creation.
-*   06/27/00    aliu        Change from C++ class to C struct
-**********************************************************************
-*/
-#ifndef PARSEERR_H
-#define PARSEERR_H
-
-#include "unicode/utypes.h"
-
-
-/**
- * \file
- * \brief C API: Parse Error Information
- */
-/**
- * The capacity of the context strings in UParseError.
- * @stable ICU 2.0
- */ 
-enum { U_PARSE_CONTEXT_LEN = 16 };
-
-/**
- * A UParseError struct is used to returned detailed information about
- * parsing errors.  It is used by ICU parsing engines that parse long
- * rules, patterns, or programs, where the text being parsed is long
- * enough that more information than a UErrorCode is needed to
- * localize the error.
- *
- * <p>The line, offset, and context fields are optional; parsing
- * engines may choose not to use to use them.
- *
- * <p>The preContext and postContext strings include some part of the
- * context surrounding the error.  If the source text is "let for=7"
- * and "for" is the error (e.g., because it is a reserved word), then
- * some examples of what a parser might produce are the following:
- *
- * <pre>
- * preContext   postContext
- * ""           ""            The parser does not support context
- * "let "       "=7"          Pre- and post-context only
- * "let "       "for=7"       Pre- and post-context and error text
- * ""           "for"         Error text only
- * </pre>
- *
- * <p>Examples of engines which use UParseError (or may use it in the
- * future) are Transliterator, RuleBasedBreakIterator, and
- * RegexPattern.
- * 
- * @stable ICU 2.0
- */
-typedef struct UParseError {
-
-    /**
-     * The line on which the error occured.  If the parser uses this
-     * field, it sets it to the line number of the source text line on
-     * which the error appears, which will be be a value >= 1.  If the
-     * parse does not support line numbers, the value will be <= 0.
-     * @stable ICU 2.0
-     */
-    int32_t        line;
-
-    /**
-     * The character offset to the error.  If the line field is >= 1,
-     * then this is the offset from the start of the line.  Otherwise,
-     * this is the offset from the start of the text.  If the parser
-     * does not support this field, it will have a value < 0.
-     * @stable ICU 2.0
-     */
-    int32_t        offset;
-
-    /**
-     * Textual context before the error.  Null-terminated.  The empty
-     * string if not supported by parser.
-     * @stable ICU 2.0   
-     */
-    UChar          preContext[U_PARSE_CONTEXT_LEN];
-
-    /**
-     * The error itself and/or textual context after the error.
-     * Null-terminated.  The empty string if not supported by parser.
-     * @stable ICU 2.0   
-     */
-    UChar          postContext[U_PARSE_CONTEXT_LEN];
-
-} UParseError;
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/parseerr.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/parseerr.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/parseerr.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/parseerr.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,92 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   03/14/00    aliu        Creation.
+*   06/27/00    aliu        Change from C++ class to C struct
+**********************************************************************
+*/
+#ifndef PARSEERR_H
+#define PARSEERR_H
+
+#include "unicode/utypes.h"
+
+
+/**
+ * \file
+ * \brief C API: Parse Error Information
+ */
+/**
+ * The capacity of the context strings in UParseError.
+ * @stable ICU 2.0
+ */ 
+enum { U_PARSE_CONTEXT_LEN = 16 };
+
+/**
+ * A UParseError struct is used to returned detailed information about
+ * parsing errors.  It is used by ICU parsing engines that parse long
+ * rules, patterns, or programs, where the text being parsed is long
+ * enough that more information than a UErrorCode is needed to
+ * localize the error.
+ *
+ * <p>The line, offset, and context fields are optional; parsing
+ * engines may choose not to use to use them.
+ *
+ * <p>The preContext and postContext strings include some part of the
+ * context surrounding the error.  If the source text is "let for=7"
+ * and "for" is the error (e.g., because it is a reserved word), then
+ * some examples of what a parser might produce are the following:
+ *
+ * <pre>
+ * preContext   postContext
+ * ""           ""            The parser does not support context
+ * "let "       "=7"          Pre- and post-context only
+ * "let "       "for=7"       Pre- and post-context and error text
+ * ""           "for"         Error text only
+ * </pre>
+ *
+ * <p>Examples of engines which use UParseError (or may use it in the
+ * future) are Transliterator, RuleBasedBreakIterator, and
+ * RegexPattern.
+ * 
+ * @stable ICU 2.0
+ */
+typedef struct UParseError {
+
+    /**
+     * The line on which the error occured.  If the parser uses this
+     * field, it sets it to the line number of the source text line on
+     * which the error appears, which will be be a value >= 1.  If the
+     * parse does not support line numbers, the value will be <= 0.
+     * @stable ICU 2.0
+     */
+    int32_t        line;
+
+    /**
+     * The character offset to the error.  If the line field is >= 1,
+     * then this is the offset from the start of the line.  Otherwise,
+     * this is the offset from the start of the text.  If the parser
+     * does not support this field, it will have a value < 0.
+     * @stable ICU 2.0
+     */
+    int32_t        offset;
+
+    /**
+     * Textual context before the error.  Null-terminated.  The empty
+     * string if not supported by parser.
+     * @stable ICU 2.0   
+     */
+    UChar          preContext[U_PARSE_CONTEXT_LEN];
+
+    /**
+     * The error itself and/or textual context after the error.
+     * Null-terminated.  The empty string if not supported by parser.
+     * @stable ICU 2.0   
+     */
+    UChar          postContext[U_PARSE_CONTEXT_LEN];
+
+} UParseError;
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/parsepos.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/parsepos.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/parsepos.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,230 +0,0 @@
-/*
-* Copyright (C) 1997-2005, International Business Machines Corporation and others. All Rights Reserved.
-*******************************************************************************
-*
-* File PARSEPOS.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   07/09/97    helena      Converted from java.
-*   07/17/98    stephen     Added errorIndex support.
-*   05/11/99    stephen     Cleaned up.
-*******************************************************************************
-*/
-
-#ifndef PARSEPOS_H
-#define PARSEPOS_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-
- 
-U_NAMESPACE_BEGIN
-
-/**
- * \file
- * \brief C++ API: Canonical Iterator
- */
-/** 
- * <code>ParsePosition</code> is a simple class used by <code>Format</code>
- * and its subclasses to keep track of the current position during parsing.
- * The <code>parseObject</code> method in the various <code>Format</code>
- * classes requires a <code>ParsePosition</code> object as an argument.
- *
- * <p>
- * By design, as you parse through a string with different formats,
- * you can use the same <code>ParsePosition</code>, since the index parameter
- * records the current position.
- *
- * The ParsePosition class is not suitable for subclassing.
- *
- * @version     1.3 10/30/97
- * @author      Mark Davis, Helena Shih
- * @see         java.text.Format
- */
-
-class U_COMMON_API ParsePosition : public UObject {
-public:
-    /**
-     * Default constructor, the index starts with 0 as default.
-     * @stable ICU 2.0
-     */
-    ParsePosition()
-        : UObject(),
-        index(0),
-        errorIndex(-1)
-      {}
-
-    /**
-     * Create a new ParsePosition with the given initial index.
-     * @param newIndex the new text offset.
-     * @stable ICU 2.0
-     */
-    ParsePosition(int32_t newIndex)
-        : UObject(),
-        index(newIndex),
-        errorIndex(-1)
-      {}
-
-    /**
-     * Copy constructor
-     * @param copy the object to be copied from.
-     * @stable ICU 2.0
-     */
-    ParsePosition(const ParsePosition& copy)
-        : UObject(copy),
-        index(copy.index),
-        errorIndex(copy.errorIndex)
-      {}
-
-    /**
-     * Destructor
-     * @stable ICU 2.0
-     */
-    virtual ~ParsePosition();
-
-    /**
-     * Assignment operator
-     * @stable ICU 2.0
-     */
-    ParsePosition&      operator=(const ParsePosition& copy);
-
-    /**
-     * Equality operator.
-     * @return TRUE if the two parse positions are equal, FALSE otherwise.
-     * @stable ICU 2.0
-     */
-    UBool              operator==(const ParsePosition& that) const;
-
-    /**
-     * Equality operator.
-     * @return TRUE if the two parse positions are not equal, FALSE otherwise.
-     * @stable ICU 2.0
-     */
-    UBool              operator!=(const ParsePosition& that) const;
-
-    /**
-     * Clone this object.
-     * Clones can be used concurrently in multiple threads.
-     * If an error occurs, then NULL is returned.
-     * The caller must delete the clone.
-     *
-     * @return a clone of this object
-     *
-     * @see getDynamicClassID
-     * @stable ICU 2.8
-     */
-    ParsePosition *clone() const;
-
-    /**
-     * Retrieve the current parse position.  On input to a parse method, this
-     * is the index of the character at which parsing will begin; on output, it
-     * is the index of the character following the last character parsed.
-     * @return the current index.
-     * @stable ICU 2.0
-     */
-    int32_t getIndex(void) const;
-
-    /**
-     * Set the current parse position.
-     * @param index the new index.
-     * @stable ICU 2.0
-     */
-    void setIndex(int32_t index);
-
-    /**
-     * Set the index at which a parse error occurred.  Formatters
-     * should set this before returning an error code from their
-     * parseObject method.  The default value is -1 if this is not
-     * set.
-     * @stable ICU 2.0
-     */
-    void setErrorIndex(int32_t ei);
-
-    /**
-     * Retrieve the index at which an error occurred, or -1 if the
-     * error index has not been set.
-     * @stable ICU 2.0
-     */
-    int32_t getErrorIndex(void) const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.2
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.2
-     */
-    virtual UClassID getDynamicClassID() const;
-
-private:
-    /**
-     * Input: the place you start parsing.
-     * <br>Output: position where the parse stopped.
-     * This is designed to be used serially,
-     * with each call setting index up for the next one.
-     */
-    int32_t index;
-
-    /**
-     * The index at which a parse error occurred.
-     */
-    int32_t errorIndex;
-
-};
-
-inline ParsePosition&
-ParsePosition::operator=(const ParsePosition& copy)
-{
-  index = copy.index;
-  errorIndex = copy.errorIndex;
-  return *this;
-}
-
-inline UBool
-ParsePosition::operator==(const ParsePosition& copy) const
-{
-  if(index != copy.index || errorIndex != copy.errorIndex)
-  return FALSE;
-  else
-  return TRUE;
-}
-
-inline UBool
-ParsePosition::operator!=(const ParsePosition& copy) const
-{
-  return !operator==(copy);
-}
-
-inline int32_t
-ParsePosition::getIndex() const
-{
-  return index;
-}
-
-inline void
-ParsePosition::setIndex(int32_t offset)
-{
-  this->index = offset;
-}
-
-inline int32_t
-ParsePosition::getErrorIndex() const
-{
-  return errorIndex;
-}
-
-inline void
-ParsePosition::setErrorIndex(int32_t ei)
-{
-  this->errorIndex = ei;
-}
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/parsepos.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/parsepos.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/parsepos.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/parsepos.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,230 @@
+/*
+* Copyright (C) 1997-2005, International Business Machines Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* File PARSEPOS.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   07/09/97    helena      Converted from java.
+*   07/17/98    stephen     Added errorIndex support.
+*   05/11/99    stephen     Cleaned up.
+*******************************************************************************
+*/
+
+#ifndef PARSEPOS_H
+#define PARSEPOS_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+
+ 
+U_NAMESPACE_BEGIN
+
+/**
+ * \file
+ * \brief C++ API: Canonical Iterator
+ */
+/** 
+ * <code>ParsePosition</code> is a simple class used by <code>Format</code>
+ * and its subclasses to keep track of the current position during parsing.
+ * The <code>parseObject</code> method in the various <code>Format</code>
+ * classes requires a <code>ParsePosition</code> object as an argument.
+ *
+ * <p>
+ * By design, as you parse through a string with different formats,
+ * you can use the same <code>ParsePosition</code>, since the index parameter
+ * records the current position.
+ *
+ * The ParsePosition class is not suitable for subclassing.
+ *
+ * @version     1.3 10/30/97
+ * @author      Mark Davis, Helena Shih
+ * @see         java.text.Format
+ */
+
+class U_COMMON_API ParsePosition : public UObject {
+public:
+    /**
+     * Default constructor, the index starts with 0 as default.
+     * @stable ICU 2.0
+     */
+    ParsePosition()
+        : UObject(),
+        index(0),
+        errorIndex(-1)
+      {}
+
+    /**
+     * Create a new ParsePosition with the given initial index.
+     * @param newIndex the new text offset.
+     * @stable ICU 2.0
+     */
+    ParsePosition(int32_t newIndex)
+        : UObject(),
+        index(newIndex),
+        errorIndex(-1)
+      {}
+
+    /**
+     * Copy constructor
+     * @param copy the object to be copied from.
+     * @stable ICU 2.0
+     */
+    ParsePosition(const ParsePosition& copy)
+        : UObject(copy),
+        index(copy.index),
+        errorIndex(copy.errorIndex)
+      {}
+
+    /**
+     * Destructor
+     * @stable ICU 2.0
+     */
+    virtual ~ParsePosition();
+
+    /**
+     * Assignment operator
+     * @stable ICU 2.0
+     */
+    ParsePosition&      operator=(const ParsePosition& copy);
+
+    /**
+     * Equality operator.
+     * @return TRUE if the two parse positions are equal, FALSE otherwise.
+     * @stable ICU 2.0
+     */
+    UBool              operator==(const ParsePosition& that) const;
+
+    /**
+     * Equality operator.
+     * @return TRUE if the two parse positions are not equal, FALSE otherwise.
+     * @stable ICU 2.0
+     */
+    UBool              operator!=(const ParsePosition& that) const;
+
+    /**
+     * Clone this object.
+     * Clones can be used concurrently in multiple threads.
+     * If an error occurs, then NULL is returned.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @stable ICU 2.8
+     */
+    ParsePosition *clone() const;
+
+    /**
+     * Retrieve the current parse position.  On input to a parse method, this
+     * is the index of the character at which parsing will begin; on output, it
+     * is the index of the character following the last character parsed.
+     * @return the current index.
+     * @stable ICU 2.0
+     */
+    int32_t getIndex(void) const;
+
+    /**
+     * Set the current parse position.
+     * @param index the new index.
+     * @stable ICU 2.0
+     */
+    void setIndex(int32_t index);
+
+    /**
+     * Set the index at which a parse error occurred.  Formatters
+     * should set this before returning an error code from their
+     * parseObject method.  The default value is -1 if this is not
+     * set.
+     * @stable ICU 2.0
+     */
+    void setErrorIndex(int32_t ei);
+
+    /**
+     * Retrieve the index at which an error occurred, or -1 if the
+     * error index has not been set.
+     * @stable ICU 2.0
+     */
+    int32_t getErrorIndex(void) const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.2
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const;
+
+private:
+    /**
+     * Input: the place you start parsing.
+     * <br>Output: position where the parse stopped.
+     * This is designed to be used serially,
+     * with each call setting index up for the next one.
+     */
+    int32_t index;
+
+    /**
+     * The index at which a parse error occurred.
+     */
+    int32_t errorIndex;
+
+};
+
+inline ParsePosition&
+ParsePosition::operator=(const ParsePosition& copy)
+{
+  index = copy.index;
+  errorIndex = copy.errorIndex;
+  return *this;
+}
+
+inline UBool
+ParsePosition::operator==(const ParsePosition& copy) const
+{
+  if(index != copy.index || errorIndex != copy.errorIndex)
+  return FALSE;
+  else
+  return TRUE;
+}
+
+inline UBool
+ParsePosition::operator!=(const ParsePosition& copy) const
+{
+  return !operator==(copy);
+}
+
+inline int32_t
+ParsePosition::getIndex() const
+{
+  return index;
+}
+
+inline void
+ParsePosition::setIndex(int32_t offset)
+{
+  this->index = offset;
+}
+
+inline int32_t
+ParsePosition::getErrorIndex() const
+{
+  return errorIndex;
+}
+
+inline void
+ParsePosition::setErrorIndex(int32_t ei)
+{
+  this->errorIndex = ei;
+}
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/platform.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/platform.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/platform.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,316 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 1997-2007, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-******************************************************************************
-*
-*  FILE NAME : platform.h
-*
-*   Date        Name        Description
-*   05/13/98    nos         Creation (content moved here from ptypes.h).
-*   03/02/99    stephen     Added AS400 support.
-*   03/30/99    stephen     Added Linux support.
-*   04/13/99    stephen     Reworked for autoconf.
-******************************************************************************
-*/
-
-/**
- * \file 
- * \brief Basic types for the platform 
- */
-
-/* Define the platform we're on. */
-#ifndef U_DARWIN
-#define U_DARWIN
-#endif
-
-/* Define whether inttypes.h is available */
-#ifndef U_HAVE_INTTYPES_H
-#define U_HAVE_INTTYPES_H 1
-#endif
-
-/*
- * Define what support for C++ streams is available.
- *     If U_IOSTREAM_SOURCE is set to 199711, then <iostream> is available
- * (1997711 is the date the ISO/IEC C++ FDIS was published), and then
- * one should qualify streams using the std namespace in ICU header
- * files.
- *     If U_IOSTREAM_SOURCE is set to 198506, then <iostream.h> is
- * available instead (198506 is the date when Stroustrup published
- * "An Extensible I/O Facility for C++" at the summer USENIX conference).
- *     If U_IOSTREAM_SOURCE is 0, then C++ streams are not available and
- * support for them will be silently suppressed in ICU.
- *
- */
-
-#ifndef U_IOSTREAM_SOURCE
-#define U_IOSTREAM_SOURCE 199711
-#endif
-
-/* Determines whether specific types are available */
-#ifndef U_HAVE_INT8_T
-#define U_HAVE_INT8_T 1
-#endif
-
-#ifndef U_HAVE_UINT8_T
-#define U_HAVE_UINT8_T 1
-#endif
-
-#ifndef U_HAVE_INT16_T
-#define U_HAVE_INT16_T 1
-#endif
-
-#ifndef U_HAVE_UINT16_T
-#define U_HAVE_UINT16_T 1
-#endif
-
-#ifndef U_HAVE_INT32_T
-#define U_HAVE_INT32_T 1
-#endif
-
-#ifndef U_HAVE_UINT32_T
-#define U_HAVE_UINT32_T 1
-#endif
-
-#ifndef U_HAVE_INT64_T
-#define U_HAVE_INT64_T 1
-#endif
-
-#ifndef U_HAVE_UINT64_T
-#define U_HAVE_UINT64_T 1
-#endif
-
-/*===========================================================================*/
-/* Generic data types                                                        */
-/*===========================================================================*/
-
-#include <sys/types.h>
-
-/* If your platform does not have the <inttypes.h> header, you may
-   need to edit the typedefs below. */
-#if U_HAVE_INTTYPES_H
-
-/* autoconf 2.13 sometimes can't properly find the data types in <inttypes.h> */
-/* os/390 needs <inttypes.h>, but it doesn't have int8_t, and it sometimes */
-/* doesn't have uint8_t depending on the OS version. */
-/* So we have this work around. */
-#ifdef OS390
-/* The features header is needed to get (u)int64_t sometimes. */
-#include <features.h>
-#if ! U_HAVE_INT8_T
-typedef signed char int8_t;
-#endif
-#if !defined(__uint8_t)
-#define __uint8_t 1
-typedef unsigned char uint8_t;
-#endif
-#endif /* OS390 */
-
-#include <inttypes.h>
-
-#else /* U_HAVE_INTTYPES_H */
-
-#if ! U_HAVE_INT8_T
-typedef signed char int8_t;
-#endif
-
-#if ! U_HAVE_UINT8_T
-typedef unsigned char uint8_t;
-#endif
-
-#if ! U_HAVE_INT16_T
-typedef signed short int16_t;
-#endif
-
-#if ! U_HAVE_UINT16_T
-typedef unsigned short uint16_t;
-#endif
-
-#if ! U_HAVE_INT32_T
-typedef signed int int32_t;
-#endif
-
-#if ! U_HAVE_UINT32_T
-typedef unsigned int uint32_t;
-#endif
-
-#if ! U_HAVE_INT64_T
-    typedef signed long long int64_t;
-/* else we may not have a 64-bit type */
-#endif
-
-#if ! U_HAVE_UINT64_T
-    typedef unsigned long long uint64_t;
-/* else we may not have a 64-bit type */
-#endif
-
-#endif
-
-/*===========================================================================*/
-/* Compiler and environment features                                         */
-/*===========================================================================*/
-
-/* Define whether namespace is supported */
-#ifndef U_HAVE_NAMESPACE
-#define U_HAVE_NAMESPACE 1
-#endif
-
-/* Determines the endianness of the platform
-   It's done this way in case multiple architectures are being built at once.
-   For example, Darwin supports fat binaries, which can be both PPC and x86 based. */
-#if defined(BYTE_ORDER) && defined(BIG_ENDIAN)
-#define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
-#else
-#define U_IS_BIG_ENDIAN 0
-#endif
-
-/* 1 or 0 to enable or disable threads.  If undefined, default is: enable threads. */
-#define ICU_USE_THREADS 1
-
-/* On strong memory model CPUs (e.g. x86 CPUs), we use a safe & quick double check lock. */
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-#define UMTX_STRONG_MEMORY_MODEL 1
-#endif
-
-#ifndef U_DEBUG
-#define U_DEBUG 0
-#endif
-
-#ifndef U_RELEASE
-#define U_RELEASE 1
-#endif
-
-/* Determine whether to disable renaming or not. This overrides the
-   setting in umachine.h which is for all platforms. */
-#ifndef U_DISABLE_RENAMING
-#define U_DISABLE_RENAMING 1
-#endif
-
-/* Determine whether to override new and delete. */
-#ifndef U_OVERRIDE_CXX_ALLOCATION
-#define U_OVERRIDE_CXX_ALLOCATION 1
-#endif
-/* Determine whether to override placement new and delete for STL. */
-#ifndef U_HAVE_PLACEMENT_NEW
-#define U_HAVE_PLACEMENT_NEW 1
-#endif
-
-/* Determine whether to enable tracing. */
-#ifndef U_ENABLE_TRACING
-#define U_ENABLE_TRACING 0
-#endif
-
-/* Do we allow ICU users to use the draft APIs by default? */
-#ifndef U_DEFAULT_SHOW_DRAFT
-#define U_DEFAULT_SHOW_DRAFT 1
-#endif
-
-/* Define the library suffix in a C syntax. */
-#define U_HAVE_LIB_SUFFIX 0
-#define U_LIB_SUFFIX_C_NAME 
-#define U_LIB_SUFFIX_C_NAME_STRING ""
-
-/*===========================================================================*/
-/* Character data types                                                      */
-/*===========================================================================*/
-
-#if ((defined(OS390) && (!defined(__CHARSET_LIB) || !__CHARSET_LIB))) || defined(OS400)
-#   define U_CHARSET_FAMILY 1
-#endif
-
-/*===========================================================================*/
-/* Information about wchar support                                           */
-/*===========================================================================*/
-
-#define U_HAVE_WCHAR_H      1
-#define U_SIZEOF_WCHAR_T    4
-
-#define U_HAVE_WCSCPY       1
-
-/**
- * \def U_DECLARE_UTF16
- * Do not use this macro. Use the UNICODE_STRING or U_STRING_DECL macros
- * instead.
- * @internal
- */
-#if 1 || defined(U_CHECK_UTF16_STRING)
-#if (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
-    || (defined(__HP_aCC) && __HP_aCC >= 035000) \
-    || (defined(__HP_cc) && __HP_cc >= 111106)
-#define U_DECLARE_UTF16(string) u ## string
-#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550)
-/* || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x580) */
-/* Sun's C compiler has issues with this notation, and it's unreliable. */
-#define U_DECLARE_UTF16(string) U ## string
-#elif U_SIZEOF_WCHAR_T == 2 \
-    && (U_CHARSET_FAMILY == 0 || ((defined(OS390) || defined(OS400)) && defined(__UCS2__)))
-#define U_DECLARE_UTF16(string) L ## string
-#endif
-#endif
-
-/*===========================================================================*/
-/* Information about POSIX support                                           */
-/*===========================================================================*/
-
-#define U_HAVE_NL_LANGINFO_CODESET  1
-#define U_NL_LANGINFO_CODESET       CODESET
-
-#if 1
-#define U_TZSET         tzset
-#endif
-#if 1
-#define U_TIMEZONE      timezone
-#endif
-#if 1
-#define U_TZNAME        tzname
-#endif
-
-#define U_HAVE_MMAP     1
-#define U_HAVE_POPEN    1
-
-/*===========================================================================*/
-/* Symbol import-export control                                              */
-/*===========================================================================*/
-
-#if 1
-#define U_EXPORT __attribute__((visibility("default")))
-#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
-   || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550) 
-#define U_EXPORT __global
-/*#elif defined(__HP_aCC) || defined(__HP_cc)
-#define U_EXPORT __declspec(dllexport)*/
-#else
-#define U_EXPORT
-#endif
-
-/* U_CALLCONV is releated to U_EXPORT2 */
-#define U_EXPORT2
-
-/* cygwin needs to export/import data */
-#ifdef U_CYGWIN
-#define U_IMPORT __declspec(dllimport)
-#else
-#define U_IMPORT 
-#endif
-
-/*===========================================================================*/
-/* Code alignment and C function inlining                                    */
-/*===========================================================================*/
-
-#ifndef U_INLINE
-#   ifdef __cplusplus
-#       define U_INLINE inline
-#   else
-#       define U_INLINE inline
-#   endif
-#endif
-
-#define U_ALIGN_CODE(n) 
-
-/*===========================================================================*/
-/* Programs used by ICU code                                                 */
-/*===========================================================================*/
-
-#define U_MAKE  "/usr/bin/gnumake"

Copied: MacRuby/trunk/icu-1060/unicode/platform.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/platform.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/platform.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/platform.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,316 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+*  FILE NAME : platform.h
+*
+*   Date        Name        Description
+*   05/13/98    nos         Creation (content moved here from ptypes.h).
+*   03/02/99    stephen     Added AS400 support.
+*   03/30/99    stephen     Added Linux support.
+*   04/13/99    stephen     Reworked for autoconf.
+******************************************************************************
+*/
+
+/**
+ * \file 
+ * \brief Basic types for the platform 
+ */
+
+/* Define the platform we're on. */
+#ifndef U_DARWIN
+#define U_DARWIN
+#endif
+
+/* Define whether inttypes.h is available */
+#ifndef U_HAVE_INTTYPES_H
+#define U_HAVE_INTTYPES_H 1
+#endif
+
+/*
+ * Define what support for C++ streams is available.
+ *     If U_IOSTREAM_SOURCE is set to 199711, then <iostream> is available
+ * (1997711 is the date the ISO/IEC C++ FDIS was published), and then
+ * one should qualify streams using the std namespace in ICU header
+ * files.
+ *     If U_IOSTREAM_SOURCE is set to 198506, then <iostream.h> is
+ * available instead (198506 is the date when Stroustrup published
+ * "An Extensible I/O Facility for C++" at the summer USENIX conference).
+ *     If U_IOSTREAM_SOURCE is 0, then C++ streams are not available and
+ * support for them will be silently suppressed in ICU.
+ *
+ */
+
+#ifndef U_IOSTREAM_SOURCE
+#define U_IOSTREAM_SOURCE 199711
+#endif
+
+/* Determines whether specific types are available */
+#ifndef U_HAVE_INT8_T
+#define U_HAVE_INT8_T 1
+#endif
+
+#ifndef U_HAVE_UINT8_T
+#define U_HAVE_UINT8_T 1
+#endif
+
+#ifndef U_HAVE_INT16_T
+#define U_HAVE_INT16_T 1
+#endif
+
+#ifndef U_HAVE_UINT16_T
+#define U_HAVE_UINT16_T 1
+#endif
+
+#ifndef U_HAVE_INT32_T
+#define U_HAVE_INT32_T 1
+#endif
+
+#ifndef U_HAVE_UINT32_T
+#define U_HAVE_UINT32_T 1
+#endif
+
+#ifndef U_HAVE_INT64_T
+#define U_HAVE_INT64_T 1
+#endif
+
+#ifndef U_HAVE_UINT64_T
+#define U_HAVE_UINT64_T 1
+#endif
+
+/*===========================================================================*/
+/* Generic data types                                                        */
+/*===========================================================================*/
+
+#include <sys/types.h>
+
+/* If your platform does not have the <inttypes.h> header, you may
+   need to edit the typedefs below. */
+#if U_HAVE_INTTYPES_H
+
+/* autoconf 2.13 sometimes can't properly find the data types in <inttypes.h> */
+/* os/390 needs <inttypes.h>, but it doesn't have int8_t, and it sometimes */
+/* doesn't have uint8_t depending on the OS version. */
+/* So we have this work around. */
+#ifdef OS390
+/* The features header is needed to get (u)int64_t sometimes. */
+#include <features.h>
+#if ! U_HAVE_INT8_T
+typedef signed char int8_t;
+#endif
+#if !defined(__uint8_t)
+#define __uint8_t 1
+typedef unsigned char uint8_t;
+#endif
+#endif /* OS390 */
+
+#include <inttypes.h>
+
+#else /* U_HAVE_INTTYPES_H */
+
+#if ! U_HAVE_INT8_T
+typedef signed char int8_t;
+#endif
+
+#if ! U_HAVE_UINT8_T
+typedef unsigned char uint8_t;
+#endif
+
+#if ! U_HAVE_INT16_T
+typedef signed short int16_t;
+#endif
+
+#if ! U_HAVE_UINT16_T
+typedef unsigned short uint16_t;
+#endif
+
+#if ! U_HAVE_INT32_T
+typedef signed int int32_t;
+#endif
+
+#if ! U_HAVE_UINT32_T
+typedef unsigned int uint32_t;
+#endif
+
+#if ! U_HAVE_INT64_T
+    typedef signed long long int64_t;
+/* else we may not have a 64-bit type */
+#endif
+
+#if ! U_HAVE_UINT64_T
+    typedef unsigned long long uint64_t;
+/* else we may not have a 64-bit type */
+#endif
+
+#endif
+
+/*===========================================================================*/
+/* Compiler and environment features                                         */
+/*===========================================================================*/
+
+/* Define whether namespace is supported */
+#ifndef U_HAVE_NAMESPACE
+#define U_HAVE_NAMESPACE 1
+#endif
+
+/* Determines the endianness of the platform
+   It's done this way in case multiple architectures are being built at once.
+   For example, Darwin supports fat binaries, which can be both PPC and x86 based. */
+#if defined(BYTE_ORDER) && defined(BIG_ENDIAN)
+#define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
+#else
+#define U_IS_BIG_ENDIAN 0
+#endif
+
+/* 1 or 0 to enable or disable threads.  If undefined, default is: enable threads. */
+#define ICU_USE_THREADS 1
+
+/* On strong memory model CPUs (e.g. x86 CPUs), we use a safe & quick double check lock. */
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+#define UMTX_STRONG_MEMORY_MODEL 1
+#endif
+
+#ifndef U_DEBUG
+#define U_DEBUG 0
+#endif
+
+#ifndef U_RELEASE
+#define U_RELEASE 1
+#endif
+
+/* Determine whether to disable renaming or not. This overrides the
+   setting in umachine.h which is for all platforms. */
+#ifndef U_DISABLE_RENAMING
+#define U_DISABLE_RENAMING 1
+#endif
+
+/* Determine whether to override new and delete. */
+#ifndef U_OVERRIDE_CXX_ALLOCATION
+#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+/* Determine whether to override placement new and delete for STL. */
+#ifndef U_HAVE_PLACEMENT_NEW
+#define U_HAVE_PLACEMENT_NEW 1
+#endif
+
+/* Determine whether to enable tracing. */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 0
+#endif
+
+/* Do we allow ICU users to use the draft APIs by default? */
+#ifndef U_DEFAULT_SHOW_DRAFT
+#define U_DEFAULT_SHOW_DRAFT 1
+#endif
+
+/* Define the library suffix in a C syntax. */
+#define U_HAVE_LIB_SUFFIX 0
+#define U_LIB_SUFFIX_C_NAME 
+#define U_LIB_SUFFIX_C_NAME_STRING ""
+
+/*===========================================================================*/
+/* Character data types                                                      */
+/*===========================================================================*/
+
+#if ((defined(OS390) && (!defined(__CHARSET_LIB) || !__CHARSET_LIB))) || defined(OS400)
+#   define U_CHARSET_FAMILY 1
+#endif
+
+/*===========================================================================*/
+/* Information about wchar support                                           */
+/*===========================================================================*/
+
+#define U_HAVE_WCHAR_H      1
+#define U_SIZEOF_WCHAR_T    4
+
+#define U_HAVE_WCSCPY       1
+
+/**
+ * \def U_DECLARE_UTF16
+ * Do not use this macro. Use the UNICODE_STRING or U_STRING_DECL macros
+ * instead.
+ * @internal
+ */
+#if 1 || defined(U_CHECK_UTF16_STRING)
+#if (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
+    || (defined(__HP_aCC) && __HP_aCC >= 035000) \
+    || (defined(__HP_cc) && __HP_cc >= 111106)
+#define U_DECLARE_UTF16(string) u ## string
+#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550)
+/* || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x580) */
+/* Sun's C compiler has issues with this notation, and it's unreliable. */
+#define U_DECLARE_UTF16(string) U ## string
+#elif U_SIZEOF_WCHAR_T == 2 \
+    && (U_CHARSET_FAMILY == 0 || ((defined(OS390) || defined(OS400)) && defined(__UCS2__)))
+#define U_DECLARE_UTF16(string) L ## string
+#endif
+#endif
+
+/*===========================================================================*/
+/* Information about POSIX support                                           */
+/*===========================================================================*/
+
+#define U_HAVE_NL_LANGINFO_CODESET  1
+#define U_NL_LANGINFO_CODESET       CODESET
+
+#if 1
+#define U_TZSET         tzset
+#endif
+#if 1
+#define U_TIMEZONE      timezone
+#endif
+#if 1
+#define U_TZNAME        tzname
+#endif
+
+#define U_HAVE_MMAP     1
+#define U_HAVE_POPEN    1
+
+/*===========================================================================*/
+/* Symbol import-export control                                              */
+/*===========================================================================*/
+
+#if 1
+#define U_EXPORT __attribute__((visibility("default")))
+#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
+   || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550) 
+#define U_EXPORT __global
+/*#elif defined(__HP_aCC) || defined(__HP_cc)
+#define U_EXPORT __declspec(dllexport)*/
+#else
+#define U_EXPORT
+#endif
+
+/* U_CALLCONV is releated to U_EXPORT2 */
+#define U_EXPORT2
+
+/* cygwin needs to export/import data */
+#ifdef U_CYGWIN
+#define U_IMPORT __declspec(dllimport)
+#else
+#define U_IMPORT 
+#endif
+
+/*===========================================================================*/
+/* Code alignment and C function inlining                                    */
+/*===========================================================================*/
+
+#ifndef U_INLINE
+#   ifdef __cplusplus
+#       define U_INLINE inline
+#   else
+#       define U_INLINE inline
+#   endif
+#endif
+
+#define U_ALIGN_CODE(n) 
+
+/*===========================================================================*/
+/* Programs used by ICU code                                                 */
+/*===========================================================================*/
+
+#define U_MAKE  "/usr/bin/gnumake"

Deleted: MacRuby/trunk/icu-1060/unicode/plurfmt.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/plurfmt.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/plurfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,541 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 2007-2008, International Business Machines Corporation and
-* others. All Rights Reserved.
-*******************************************************************************
-*
-
-* File PLURFMT.H
-*
-* Modification History:*
-*   Date        Name        Description
-*
-********************************************************************************
-*/
-
-#ifndef PLURFMT
-#define PLURFMT
-
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief C++ API: PluralFormat object
- */
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/numfmt.h"
-#include "unicode/plurrule.h"
-
-U_NAMESPACE_BEGIN
-
-class Hashtable;
-
-/**
- * <p>
- * <code>PluralFormat</code> supports the creation of internationalized
- * messages with plural inflection. It is based on <i>plural
- * selection</i>, i.e. the caller specifies messages for each
- * plural case that can appear in the users language and the
- * <code>PluralFormat</code> selects the appropriate message based on
- * the number.
- * </p>
- * <h4>The Problem of Plural Forms in Internationalized Messages</h4>
- * <p>
- * Different languages have different ways to inflect
- * plurals. Creating internationalized messages that include plural
- * forms is only feasible when the framework is able to handle plural
- * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code>
- * doesn't handle this well, because it attaches a number interval to
- * each message and selects the message whose interval contains a
- * given number. This can only handle a finite number of
- * intervals. But in some languages, like Polish, one plural case
- * applies to infinitely many intervals (e.g., paucal applies to
- * numbers ending with 2, 3, or 4 except those ending with 12, 13, or
- * 14). Thus <code>ChoiceFormat</code> is not adequate.
- * </p><p>
- * <code>PluralFormat</code> deals with this by breaking the problem
- * into two parts:
- * <ul>
- * <li>It uses <code>PluralRules</code> that can define more complex
- *     conditions for a plural case than just a single interval. These plural
- *     rules define both what plural cases exist in a language, and to
- *     which numbers these cases apply.
- * <li>It provides predefined plural rules for many locales. Thus, the programmer
- *     need not worry about the plural cases of a language. On the flip side,
- *     the localizer does not have to specify the plural cases; he can simply
- *     use the predefined keywords. The whole plural formatting of messages can
- *     be done using localized patterns from resource bundles.
- * </ul>
- * </p>
- * <h4>Usage of <code>PluralFormat</code></h4>
- * <p>
- * This discussion assumes that you use <code>PluralFormat</code> with
- * a predefined set of plural rules. You can create one using one of
- * the constructors that takes a <code>locale</code> object. To
- * specify the message pattern, you can either pass it to the
- * constructor or set it explicitly using the
- * <code>applyPattern()</code> method. The <code>format()</code>
- * method takes a number object and selects the message of the
- * matching plural case. This message will be returned.
- * </p>
- * <h5>Patterns and Their Interpretation</h5>
- * <p>
- * The pattern text defines the message output for each plural case of the
- * used locale. The pattern is a sequence of
- * <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white
- * space characters. Each clause assigns the message <code><i>message</i></code>
- * to the plural case identified by <code><i>caseKeyword</i></code>.
- * </p><p>
- * You always have to define a message text for the default plural case
- * "<code>other</code>" which is contained in every rule set. If the plural
- * rules of the <code>PluralFormat</code> object do not contain a plural case
- * identified by <code><i>caseKeyword</i></code>, U_DEFAULT_KEYWORD_MISSING
- * will be set to status.
- * If you do not specify a message text for a particular plural case, the
- * message text of the plural case "<code>other</code>" gets assigned to this
- * plural case. If you specify more than one message for the same plural case,
- * U_DUPLICATE_KEYWORD will be set to status.
- * <br/>
- * Spaces between <code><i>caseKeyword</i></code> and
- * <code><i>message</i></code>  will be ignored; spaces within
- * <code><i>message</i></code> will be preserved.
- * </p><p>
- * The message text for a particular plural case may contain other message
- * format patterns. <code>PluralFormat</code> preserves these so that you
- * can use the strings produced by <code>PluralFormat</code> with other
- * formatters. If you are using <code>PluralFormat</code> inside a
- * <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
- * automatically evaluate the resulting format pattern.<br/>
- * Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
- * in message texts to define a nested format pattern.<br/>
- * The pound sign (<code>#</code>) will be interpreted as the number placeholder
- * in the message text, if it is not contained in curly braces (to preserve
- * <code>NumberFormat</code> patterns). <code>PluralFormat</code> will
- * replace each of those pound signs by the number passed to the
- * <code>format()</code> method. It will be formatted using a
- * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
- * need special number formatting, you have to explicitly specify a
- * <code>NumberFormat</code> for the <code>PluralFormat</code> to use.
- * </p>
- * Example
- * <pre>
- * UErrorCode status = U_ZERO_ERROR;
- * MessageFormat* msgFmt = new MessageFormat(UnicodeString("{0, plural,
- *   one{{0, number, C''est #,##0.0#  fichier}} other {Ce sont # fichiers}} dans la liste."),
- *   Locale("fr"), status);
- * if (U_FAILURE(status)) {
- *     return;
- * }
- * Formattable args1[] = {(int32_t)0};
- * Formattable args2[] = {(int32_t)3};
- * FieldPosition ignore(FieldPosition::DONT_CARE);
- * UnicodeString result;
- * msgFmt->format(args1, 1, result, ignore, status);
- * cout << result << endl;
- * result.remove();
- * msgFmt->format(args2, 1, result, ignore, status);
- * cout << result << endl;
- * </pre>
- * Produces the output:<br/>
- * <code>C'est 0,0 fichier dans la liste.</code><br/>
- * <code>Ce sont 3 fichiers dans la liste."</code>
- * <p>
- * <strong>Note:</strong><br/>
- *   Currently <code>PluralFormat</code>
- *   does not make use of quotes like <code>MessageFormat</code>.
- *   If you use plural format strings with <code>MessageFormat</code> and want
- *   to use a quote sign "<code>'</code>", you have to write "<code>''</code>".
- *   <code>MessageFormat</code> unquotes this pattern and  passes the unquoted
- *   pattern to <code>PluralFormat</code>. It's a bit trickier if you use
- *   nested formats that do quoting. In the example above, we wanted to insert
- *   "<code>'</code>" in the number format pattern. Since
- *   <code>NumberFormat</code> supports quotes, we had to insert
- *   "<code>''</code>". But since <code>MessageFormat</code> unquotes the
- *   pattern before it gets passed to <code>PluralFormat</code>, we have to
- *   double these quotes, i.e. write "<code>''''</code>".
- * </p>
- * <h4>Defining Custom Plural Rules</h4>
- * <p>If you need to use <code>PluralFormat</code> with custom rules, you can
- * create a <code>PluralRules</code> object and pass it to
- * <code>PluralFormat</code>'s constructor. If you also specify a locale in this
- * constructor, this locale will be used to format the number in the message
- * texts.
- * </p><p>
- * For more information about <code>PluralRules</code>, see
- * {@link PluralRules}.
- * </p>
- *
- * ported from Java
- * @draft ICU 4.0
- */
-
-class U_I18N_API PluralFormat : public Format {
-public:
-
-    /**
-     * Creates a new <code>PluralFormat</code> for the default locale.
-     * This locale will be used to get the set of plural rules and for standard
-     * number formatting.
-     * @param status  output param set to success/failure code on exit, which
-     *                must not indicate a failure before the function call.
-     * @draft ICU 4.0
-     */
-    PluralFormat(UErrorCode& status);
-
-    /**
-     * Creates a new <code>PluralFormat</code> for a given locale.
-     * @param locale the <code>PluralFormat</code> will be configured with
-     *               rules for this locale. This locale will also be used for
-     *               standard number formatting.
-     * @param status output param set to success/failure code on exit, which
-     *               must not indicate a failure before the function call.
-     * @draft ICU 4.0
-     */
-    PluralFormat(const Locale& locale, UErrorCode& status);
-
-    /**
-     * Creates a new <code>PluralFormat</code> for a given set of rules.
-     * The standard number formatting will be done using the default locale.
-     * @param rules   defines the behavior of the <code>PluralFormat</code>
-     *                object.
-     * @param status  output param set to success/failure code on exit, which
-     *                must not indicate a failure before the function call.
-     * @draft ICU 4.0
-     */
-    PluralFormat(const PluralRules& rules, UErrorCode& status);
-
-    /**
-     * Creates a new <code>PluralFormat</code> for a given set of rules.
-     * The standard number formatting will be done using the given locale.
-     * @param locale  the default number formatting will be done using this
-     *                locale.
-     * @param rules   defines the behavior of the <code>PluralFormat</code>
-     *                object.
-     * @param status  output param set to success/failure code on exit, which
-     *                must not indicate a failure before the function call.
-     * @draft ICU 4.0
-     */
-    PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status);
-
-    /**
-     * Creates a new <code>PluralFormat</code> for a given pattern string.
-     * The default locale will be used to get the set of plural rules and for
-     * standard number formatting.
-     * @param  pattern the pattern for this <code>PluralFormat</code>.
-     *                 errors are returned to status if the pattern is invalid.
-     * @param status   output param set to success/failure code on exit, which
-     *                 must not indicate a failure before the function call.
-     * @draft ICU 4.0
-     */
-    PluralFormat(const UnicodeString& pattern, UErrorCode& status);
-
-    /**
-     * Creates a new <code>PluralFormat</code> for a given pattern string and
-     * locale.
-     * The locale will be used to get the set of plural rules and for
-     * standard number formatting.
-     * @param locale   the <code>PluralFormat</code> will be configured with
-     *                 rules for this locale. This locale will also be used for
-     *                 standard number formatting.
-     * @param pattern  the pattern for this <code>PluralFormat</code>.
-     *                 errors are returned to status if the pattern is invalid.
-     * @param status   output param set to success/failure code on exit, which
-     *                 must not indicate a failure before the function call.
-     * @draft ICU 4.0
-     */
-    PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status);
-
-    /**
-     * Creates a new <code>PluralFormat</code> for a given set of rules, a
-     * pattern and a locale.
-     * @param rules    defines the behavior of the <code>PluralFormat</code>
-     *                 object.
-     * @param pattern  the pattern for this <code>PluralFormat</code>.
-     *                 errors are returned to status if the pattern is invalid.
-     * @param status   output param set to success/failure code on exit, which
-     *                 must not indicate a failure before the function call.
-     * @draft ICU 4.0
-     */
-    PluralFormat(const PluralRules& rules,
-                 const UnicodeString& pattern,
-                 UErrorCode& status);
-
-    /**
-     * Creates a new <code>PluralFormat</code> for a given set of rules, a
-     * pattern and a locale.
-     * @param locale  the <code>PluralFormat</code> will be configured with
-     *                rules for this locale. This locale will also be used for
-     *                standard number formatting.
-     * @param rules   defines the behavior of the <code>PluralFormat</code>
-     *                object.
-     * @param pattern the pattern for this <code>PluralFormat</code>.
-     *                errors are returned to status if the pattern is invalid.
-     * @param status  output param set to success/failure code on exit, which
-     *                must not indicate a failure before the function call.
-     * @draft ICU 4.0
-     */
-    PluralFormat(const Locale& locale,
-                 const PluralRules& rules,
-                 const UnicodeString& pattern,
-                 UErrorCode& status);
-
-    /**
-      * copy constructor.
-      * @draft ICU 4.0
-      */
-    PluralFormat(const PluralFormat& other);
-
-    /**
-     * Destructor.
-     * @draft ICU 4.0
-     */
-    virtual ~PluralFormat();
-
-    /**
-     * Sets the pattern used by this plural format.
-     * The method parses the pattern and creates a map of format strings
-     * for the plural rules.
-     * Patterns and their interpretation are specified in the class description.
-     *
-     * @param pattern the pattern for this plural format
-     *                errors are returned to status if the pattern is invalid.
-     * @param status  output param set to success/failure code on exit, which
-     *                must not indicate a failure before the function call.
-     * @draft ICU 4.0
-     */
-    void applyPattern(const UnicodeString& pattern, UErrorCode& status);
-
-    /**
-     * Formats a plural message for a given number.
-     *
-     * @param number  a number for which the plural message should be formatted
-     *                for. If no pattern has been applied to this
-     *                <code>PluralFormat</code> object yet, the formatted number
-     *                will be returned.
-     * @param status  output param set to success/failure code on exit, which
-     *                must not indicate a failure before the function call.
-     * @return        the string containing the formatted plural message.
-     * @draft ICU 4.0
-     */
-    UnicodeString format(int32_t number, UErrorCode& status) const;   
-    
-    /**
-     * Formats a plural message for a given number.
-     *
-     * @param number  a number for which the plural message should be formatted
-     *                for. If no pattern has been applied to this
-     *                <code>PluralFormat</code> object yet, the formatted number
-     *                will be returned.
-     * @param status  output param set to success/failure code on exit, which
-     *                must not indicate a failure before the function call.
-     * @return        the string containing the formatted plural message.
-     * @draft ICU 4.0
-     */
-    UnicodeString format(double number, UErrorCode& status) const;
-
-    /**
-     * Formats a plural message for a given number.
-     *
-     * @param number   a number for which the plural message should be formatted
-     *                 for. If no pattern has been applied to this
-     *                 <code>PluralFormat</code> object yet, the formatted number
-     *                 will be returned.
-     * @param appendTo output parameter to receive result.
-     *                 result is appended to existing contents.
-     * @param pos      On input: an alignment field, if desired.
-     *                 On output: the offsets of the alignment field.
-     * @param status   output param set to success/failure code on exit, which
-     *                 must not indicate a failure before the function call.
-     * @return         the string containing the formatted plural message.
-     * @draft ICU 4.0
-     */
-    UnicodeString& format(int32_t number,
-                          UnicodeString& appendTo,
-                          FieldPosition& pos,
-                          UErrorCode& status) const;
-    
-    /**
-     * Formats a plural message for a given number.
-     *
-     * @param number   a number for which the plural message should be formatted
-     *                 for. If no pattern has been applied to this
-     *                 <code>PluralFormat</code> object yet, the formatted number
-     *                 will be returned.
-     * @param appendTo output parameter to receive result.
-     *                 result is appended to existing contents.
-     * @param pos      On input: an alignment field, if desired.
-     *                 On output: the offsets of the alignment field.
-     * @param status   output param set to success/failure code on exit, which
-     *                 must not indicate a failure before the function call.
-     * @return         the string containing the formatted plural message.
-     * @draft ICU 4.0
-     */
-    UnicodeString& format(double number,
-                          UnicodeString& appendTo,
-                          FieldPosition& pos,
-                          UErrorCode& status) const;
-
-    /**
-     * Sets the locale used by this <code>PluraFormat</code> object.
-     * Note: Calling this method resets this <code>PluraFormat</code> object,
-     *     i.e., a pattern that was applied previously will be removed,
-     *     and the NumberFormat is set to the default number format for
-     *     the locale.  The resulting format behaves the same as one
-     *     constructed from {@link #PluralFormat(locale)}.
-     * @param locale  the <code>locale</code> to use to configure the formatter.
-     * @param status  output param set to success/failure code on exit, which
-     *                must not indicate a failure before the function call.
-     * @draft ICU 4.0
-     */
-    void setLocale(const Locale& locale, UErrorCode& status);
-
-    /**
-      * Sets the number format used by this formatter.  You only need to
-      * call this if you want a different number format than the default
-      * formatter for the locale.
-      * @param format  the number format to use.
-      * @param status  output param set to success/failure code on exit, which
-      *                must not indicate a failure before the function call.
-      * @draft ICU 4.0
-      */
-    void setNumberFormat(const NumberFormat* format, UErrorCode& status);
-
-    /**
-       * Assignment operator
-       *
-       * @param other    the PluralFormat object to copy from.
-       * @draft ICU 4.0
-       */
-    PluralFormat& operator=(const PluralFormat& other);
-
-    /**
-      * Return true if another object is semantically equal to this one.
-      *
-      * @param other    the PluralFormat object to be compared with.
-      * @return         true if other is semantically equal to this.
-      * @draft ICU 4.0
-      */
-    virtual UBool operator==(const Format& other) const;
-
-    /**
-     * Return true if another object is semantically unequal to this one.
-     *
-     * @param other    the PluralFormat object to be compared with.
-     * @return         true if other is semantically unequal to this.
-     * @draft ICU 4.0
-     */
-    virtual UBool operator!=(const Format& other) const;
-
-    /**
-     * Clones this Format object polymorphically.  The caller owns the
-     * result and should delete it when done.
-     * @draft ICU 4.0
-     */
-    virtual Format* clone(void) const;
-
-    /**
-    * Redeclared Format method.
-    *
-    * @param obj       The object to be formatted into a string.
-    * @param appendTo  output parameter to receive result.
-    *                  Result is appended to existing contents.
-    * @param pos       On input: an alignment field, if desired.
-    *                  On output: the offsets of the alignment field.
-    * @param status    output param filled with success/failure status.
-    * @return          Reference to 'appendTo' parameter.
-    * @draft ICU 4.0
-    */
-   UnicodeString& format(const Formattable& obj,
-                         UnicodeString& appendTo,
-                         FieldPosition& pos,
-                         UErrorCode& status) const;
-
-   /**
-    * Returns the pattern from applyPattern() or constructor().
-    *
-    * @param  appendTo  output parameter to receive result.
-     *                  Result is appended to existing contents.
-    * @return the UnicodeString with inserted pattern.
-    * @draft ICU 4.0
-    */
-   UnicodeString& toPattern(UnicodeString& appendTo);
-
-   /**
-    * This method is not yet supported by <code>PluralFormat</code>.
-    * <P>
-    * Before calling, set parse_pos.index to the offset you want to start
-    * parsing at in the source. After calling, parse_pos.index is the end of
-    * the text you parsed. If error occurs, index is unchanged.
-    * <P>
-    * When parsing, leading whitespace is discarded (with a successful parse),
-    * while trailing whitespace is left as is.
-    * <P>
-    * See Format::parseObject() for more.
-    *
-    * @param source    The string to be parsed into an object.
-    * @param result    Formattable to be set to the parse result.
-    *                  If parse fails, return contents are undefined.
-    * @param parse_pos The position to start parsing at. Upon return
-    *                  this param is set to the position after the
-    *                  last character successfully parsed. If the
-    *                  source is not parsed successfully, this param
-    *                  will remain unchanged.
-    * @draft ICU 4.0
-    */
-   virtual void parseObject(const UnicodeString& source,
-                            Formattable& result,
-                            ParsePosition& parse_pos) const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @draft ICU 4.0
-     *
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @draft ICU 4.0
-     */
-     virtual UClassID getDynamicClassID() const;
-
-private:
-    typedef enum fmtToken {
-        none,
-        tLetter,
-        tNumber,
-        tSpace,
-        tNumberSign,
-        tLeftBrace,
-        tRightBrace
-    }fmtToken;
-
-    Locale  locale;
-    PluralRules* pluralRules;
-    UnicodeString pattern;
-    Hashtable  *fParsedValuesHash;
-    NumberFormat*  numberFormat;
-    NumberFormat*  replacedNumberFormat;
-
-    PluralFormat();   // default constructor not implemented
-    void init(const PluralRules* rules, const Locale& curlocale, UErrorCode& status);
-    UBool inRange(UChar ch, fmtToken& type);
-    UBool checkSufficientDefinition();
-    void parsingFailure();
-    UnicodeString insertFormattedNumber(double number,
-                                        UnicodeString& message,
-                                        UnicodeString& appendTo,
-                                        FieldPosition& pos) const;
-    void copyHashtable(Hashtable *other, UErrorCode& status);
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _PLURFMT
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/plurfmt.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/plurfmt.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/plurfmt.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/plurfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,541 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+
+* File PLURFMT.H
+*
+* Modification History:*
+*   Date        Name        Description
+*
+********************************************************************************
+*/
+
+#ifndef PLURFMT
+#define PLURFMT
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: PluralFormat object
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/numfmt.h"
+#include "unicode/plurrule.h"
+
+U_NAMESPACE_BEGIN
+
+class Hashtable;
+
+/**
+ * <p>
+ * <code>PluralFormat</code> supports the creation of internationalized
+ * messages with plural inflection. It is based on <i>plural
+ * selection</i>, i.e. the caller specifies messages for each
+ * plural case that can appear in the users language and the
+ * <code>PluralFormat</code> selects the appropriate message based on
+ * the number.
+ * </p>
+ * <h4>The Problem of Plural Forms in Internationalized Messages</h4>
+ * <p>
+ * Different languages have different ways to inflect
+ * plurals. Creating internationalized messages that include plural
+ * forms is only feasible when the framework is able to handle plural
+ * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code>
+ * doesn't handle this well, because it attaches a number interval to
+ * each message and selects the message whose interval contains a
+ * given number. This can only handle a finite number of
+ * intervals. But in some languages, like Polish, one plural case
+ * applies to infinitely many intervals (e.g., paucal applies to
+ * numbers ending with 2, 3, or 4 except those ending with 12, 13, or
+ * 14). Thus <code>ChoiceFormat</code> is not adequate.
+ * </p><p>
+ * <code>PluralFormat</code> deals with this by breaking the problem
+ * into two parts:
+ * <ul>
+ * <li>It uses <code>PluralRules</code> that can define more complex
+ *     conditions for a plural case than just a single interval. These plural
+ *     rules define both what plural cases exist in a language, and to
+ *     which numbers these cases apply.
+ * <li>It provides predefined plural rules for many locales. Thus, the programmer
+ *     need not worry about the plural cases of a language. On the flip side,
+ *     the localizer does not have to specify the plural cases; he can simply
+ *     use the predefined keywords. The whole plural formatting of messages can
+ *     be done using localized patterns from resource bundles.
+ * </ul>
+ * </p>
+ * <h4>Usage of <code>PluralFormat</code></h4>
+ * <p>
+ * This discussion assumes that you use <code>PluralFormat</code> with
+ * a predefined set of plural rules. You can create one using one of
+ * the constructors that takes a <code>locale</code> object. To
+ * specify the message pattern, you can either pass it to the
+ * constructor or set it explicitly using the
+ * <code>applyPattern()</code> method. The <code>format()</code>
+ * method takes a number object and selects the message of the
+ * matching plural case. This message will be returned.
+ * </p>
+ * <h5>Patterns and Their Interpretation</h5>
+ * <p>
+ * The pattern text defines the message output for each plural case of the
+ * used locale. The pattern is a sequence of
+ * <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white
+ * space characters. Each clause assigns the message <code><i>message</i></code>
+ * to the plural case identified by <code><i>caseKeyword</i></code>.
+ * </p><p>
+ * You always have to define a message text for the default plural case
+ * "<code>other</code>" which is contained in every rule set. If the plural
+ * rules of the <code>PluralFormat</code> object do not contain a plural case
+ * identified by <code><i>caseKeyword</i></code>, U_DEFAULT_KEYWORD_MISSING
+ * will be set to status.
+ * If you do not specify a message text for a particular plural case, the
+ * message text of the plural case "<code>other</code>" gets assigned to this
+ * plural case. If you specify more than one message for the same plural case,
+ * U_DUPLICATE_KEYWORD will be set to status.
+ * <br/>
+ * Spaces between <code><i>caseKeyword</i></code> and
+ * <code><i>message</i></code>  will be ignored; spaces within
+ * <code><i>message</i></code> will be preserved.
+ * </p><p>
+ * The message text for a particular plural case may contain other message
+ * format patterns. <code>PluralFormat</code> preserves these so that you
+ * can use the strings produced by <code>PluralFormat</code> with other
+ * formatters. If you are using <code>PluralFormat</code> inside a
+ * <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
+ * automatically evaluate the resulting format pattern.<br/>
+ * Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
+ * in message texts to define a nested format pattern.<br/>
+ * The pound sign (<code>#</code>) will be interpreted as the number placeholder
+ * in the message text, if it is not contained in curly braces (to preserve
+ * <code>NumberFormat</code> patterns). <code>PluralFormat</code> will
+ * replace each of those pound signs by the number passed to the
+ * <code>format()</code> method. It will be formatted using a
+ * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
+ * need special number formatting, you have to explicitly specify a
+ * <code>NumberFormat</code> for the <code>PluralFormat</code> to use.
+ * </p>
+ * Example
+ * <pre>
+ * UErrorCode status = U_ZERO_ERROR;
+ * MessageFormat* msgFmt = new MessageFormat(UnicodeString("{0, plural,
+ *   one{{0, number, C''est #,##0.0#  fichier}} other {Ce sont # fichiers}} dans la liste."),
+ *   Locale("fr"), status);
+ * if (U_FAILURE(status)) {
+ *     return;
+ * }
+ * Formattable args1[] = {(int32_t)0};
+ * Formattable args2[] = {(int32_t)3};
+ * FieldPosition ignore(FieldPosition::DONT_CARE);
+ * UnicodeString result;
+ * msgFmt->format(args1, 1, result, ignore, status);
+ * cout << result << endl;
+ * result.remove();
+ * msgFmt->format(args2, 1, result, ignore, status);
+ * cout << result << endl;
+ * </pre>
+ * Produces the output:<br/>
+ * <code>C'est 0,0 fichier dans la liste.</code><br/>
+ * <code>Ce sont 3 fichiers dans la liste."</code>
+ * <p>
+ * <strong>Note:</strong><br/>
+ *   Currently <code>PluralFormat</code>
+ *   does not make use of quotes like <code>MessageFormat</code>.
+ *   If you use plural format strings with <code>MessageFormat</code> and want
+ *   to use a quote sign "<code>'</code>", you have to write "<code>''</code>".
+ *   <code>MessageFormat</code> unquotes this pattern and  passes the unquoted
+ *   pattern to <code>PluralFormat</code>. It's a bit trickier if you use
+ *   nested formats that do quoting. In the example above, we wanted to insert
+ *   "<code>'</code>" in the number format pattern. Since
+ *   <code>NumberFormat</code> supports quotes, we had to insert
+ *   "<code>''</code>". But since <code>MessageFormat</code> unquotes the
+ *   pattern before it gets passed to <code>PluralFormat</code>, we have to
+ *   double these quotes, i.e. write "<code>''''</code>".
+ * </p>
+ * <h4>Defining Custom Plural Rules</h4>
+ * <p>If you need to use <code>PluralFormat</code> with custom rules, you can
+ * create a <code>PluralRules</code> object and pass it to
+ * <code>PluralFormat</code>'s constructor. If you also specify a locale in this
+ * constructor, this locale will be used to format the number in the message
+ * texts.
+ * </p><p>
+ * For more information about <code>PluralRules</code>, see
+ * {@link PluralRules}.
+ * </p>
+ *
+ * ported from Java
+ * @draft ICU 4.0
+ */
+
+class U_I18N_API PluralFormat : public Format {
+public:
+
+    /**
+     * Creates a new <code>PluralFormat</code> for the default locale.
+     * This locale will be used to get the set of plural rules and for standard
+     * number formatting.
+     * @param status  output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * @draft ICU 4.0
+     */
+    PluralFormat(UErrorCode& status);
+
+    /**
+     * Creates a new <code>PluralFormat</code> for a given locale.
+     * @param locale the <code>PluralFormat</code> will be configured with
+     *               rules for this locale. This locale will also be used for
+     *               standard number formatting.
+     * @param status output param set to success/failure code on exit, which
+     *               must not indicate a failure before the function call.
+     * @draft ICU 4.0
+     */
+    PluralFormat(const Locale& locale, UErrorCode& status);
+
+    /**
+     * Creates a new <code>PluralFormat</code> for a given set of rules.
+     * The standard number formatting will be done using the default locale.
+     * @param rules   defines the behavior of the <code>PluralFormat</code>
+     *                object.
+     * @param status  output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * @draft ICU 4.0
+     */
+    PluralFormat(const PluralRules& rules, UErrorCode& status);
+
+    /**
+     * Creates a new <code>PluralFormat</code> for a given set of rules.
+     * The standard number formatting will be done using the given locale.
+     * @param locale  the default number formatting will be done using this
+     *                locale.
+     * @param rules   defines the behavior of the <code>PluralFormat</code>
+     *                object.
+     * @param status  output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * @draft ICU 4.0
+     */
+    PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status);
+
+    /**
+     * Creates a new <code>PluralFormat</code> for a given pattern string.
+     * The default locale will be used to get the set of plural rules and for
+     * standard number formatting.
+     * @param  pattern the pattern for this <code>PluralFormat</code>.
+     *                 errors are returned to status if the pattern is invalid.
+     * @param status   output param set to success/failure code on exit, which
+     *                 must not indicate a failure before the function call.
+     * @draft ICU 4.0
+     */
+    PluralFormat(const UnicodeString& pattern, UErrorCode& status);
+
+    /**
+     * Creates a new <code>PluralFormat</code> for a given pattern string and
+     * locale.
+     * The locale will be used to get the set of plural rules and for
+     * standard number formatting.
+     * @param locale   the <code>PluralFormat</code> will be configured with
+     *                 rules for this locale. This locale will also be used for
+     *                 standard number formatting.
+     * @param pattern  the pattern for this <code>PluralFormat</code>.
+     *                 errors are returned to status if the pattern is invalid.
+     * @param status   output param set to success/failure code on exit, which
+     *                 must not indicate a failure before the function call.
+     * @draft ICU 4.0
+     */
+    PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status);
+
+    /**
+     * Creates a new <code>PluralFormat</code> for a given set of rules, a
+     * pattern and a locale.
+     * @param rules    defines the behavior of the <code>PluralFormat</code>
+     *                 object.
+     * @param pattern  the pattern for this <code>PluralFormat</code>.
+     *                 errors are returned to status if the pattern is invalid.
+     * @param status   output param set to success/failure code on exit, which
+     *                 must not indicate a failure before the function call.
+     * @draft ICU 4.0
+     */
+    PluralFormat(const PluralRules& rules,
+                 const UnicodeString& pattern,
+                 UErrorCode& status);
+
+    /**
+     * Creates a new <code>PluralFormat</code> for a given set of rules, a
+     * pattern and a locale.
+     * @param locale  the <code>PluralFormat</code> will be configured with
+     *                rules for this locale. This locale will also be used for
+     *                standard number formatting.
+     * @param rules   defines the behavior of the <code>PluralFormat</code>
+     *                object.
+     * @param pattern the pattern for this <code>PluralFormat</code>.
+     *                errors are returned to status if the pattern is invalid.
+     * @param status  output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * @draft ICU 4.0
+     */
+    PluralFormat(const Locale& locale,
+                 const PluralRules& rules,
+                 const UnicodeString& pattern,
+                 UErrorCode& status);
+
+    /**
+      * copy constructor.
+      * @draft ICU 4.0
+      */
+    PluralFormat(const PluralFormat& other);
+
+    /**
+     * Destructor.
+     * @draft ICU 4.0
+     */
+    virtual ~PluralFormat();
+
+    /**
+     * Sets the pattern used by this plural format.
+     * The method parses the pattern and creates a map of format strings
+     * for the plural rules.
+     * Patterns and their interpretation are specified in the class description.
+     *
+     * @param pattern the pattern for this plural format
+     *                errors are returned to status if the pattern is invalid.
+     * @param status  output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * @draft ICU 4.0
+     */
+    void applyPattern(const UnicodeString& pattern, UErrorCode& status);
+
+    /**
+     * Formats a plural message for a given number.
+     *
+     * @param number  a number for which the plural message should be formatted
+     *                for. If no pattern has been applied to this
+     *                <code>PluralFormat</code> object yet, the formatted number
+     *                will be returned.
+     * @param status  output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * @return        the string containing the formatted plural message.
+     * @draft ICU 4.0
+     */
+    UnicodeString format(int32_t number, UErrorCode& status) const;   
+    
+    /**
+     * Formats a plural message for a given number.
+     *
+     * @param number  a number for which the plural message should be formatted
+     *                for. If no pattern has been applied to this
+     *                <code>PluralFormat</code> object yet, the formatted number
+     *                will be returned.
+     * @param status  output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * @return        the string containing the formatted plural message.
+     * @draft ICU 4.0
+     */
+    UnicodeString format(double number, UErrorCode& status) const;
+
+    /**
+     * Formats a plural message for a given number.
+     *
+     * @param number   a number for which the plural message should be formatted
+     *                 for. If no pattern has been applied to this
+     *                 <code>PluralFormat</code> object yet, the formatted number
+     *                 will be returned.
+     * @param appendTo output parameter to receive result.
+     *                 result is appended to existing contents.
+     * @param pos      On input: an alignment field, if desired.
+     *                 On output: the offsets of the alignment field.
+     * @param status   output param set to success/failure code on exit, which
+     *                 must not indicate a failure before the function call.
+     * @return         the string containing the formatted plural message.
+     * @draft ICU 4.0
+     */
+    UnicodeString& format(int32_t number,
+                          UnicodeString& appendTo,
+                          FieldPosition& pos,
+                          UErrorCode& status) const;
+    
+    /**
+     * Formats a plural message for a given number.
+     *
+     * @param number   a number for which the plural message should be formatted
+     *                 for. If no pattern has been applied to this
+     *                 <code>PluralFormat</code> object yet, the formatted number
+     *                 will be returned.
+     * @param appendTo output parameter to receive result.
+     *                 result is appended to existing contents.
+     * @param pos      On input: an alignment field, if desired.
+     *                 On output: the offsets of the alignment field.
+     * @param status   output param set to success/failure code on exit, which
+     *                 must not indicate a failure before the function call.
+     * @return         the string containing the formatted plural message.
+     * @draft ICU 4.0
+     */
+    UnicodeString& format(double number,
+                          UnicodeString& appendTo,
+                          FieldPosition& pos,
+                          UErrorCode& status) const;
+
+    /**
+     * Sets the locale used by this <code>PluraFormat</code> object.
+     * Note: Calling this method resets this <code>PluraFormat</code> object,
+     *     i.e., a pattern that was applied previously will be removed,
+     *     and the NumberFormat is set to the default number format for
+     *     the locale.  The resulting format behaves the same as one
+     *     constructed from {@link #PluralFormat(locale)}.
+     * @param locale  the <code>locale</code> to use to configure the formatter.
+     * @param status  output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * @draft ICU 4.0
+     */
+    void setLocale(const Locale& locale, UErrorCode& status);
+
+    /**
+      * Sets the number format used by this formatter.  You only need to
+      * call this if you want a different number format than the default
+      * formatter for the locale.
+      * @param format  the number format to use.
+      * @param status  output param set to success/failure code on exit, which
+      *                must not indicate a failure before the function call.
+      * @draft ICU 4.0
+      */
+    void setNumberFormat(const NumberFormat* format, UErrorCode& status);
+
+    /**
+       * Assignment operator
+       *
+       * @param other    the PluralFormat object to copy from.
+       * @draft ICU 4.0
+       */
+    PluralFormat& operator=(const PluralFormat& other);
+
+    /**
+      * Return true if another object is semantically equal to this one.
+      *
+      * @param other    the PluralFormat object to be compared with.
+      * @return         true if other is semantically equal to this.
+      * @draft ICU 4.0
+      */
+    virtual UBool operator==(const Format& other) const;
+
+    /**
+     * Return true if another object is semantically unequal to this one.
+     *
+     * @param other    the PluralFormat object to be compared with.
+     * @return         true if other is semantically unequal to this.
+     * @draft ICU 4.0
+     */
+    virtual UBool operator!=(const Format& other) const;
+
+    /**
+     * Clones this Format object polymorphically.  The caller owns the
+     * result and should delete it when done.
+     * @draft ICU 4.0
+     */
+    virtual Format* clone(void) const;
+
+    /**
+    * Redeclared Format method.
+    *
+    * @param obj       The object to be formatted into a string.
+    * @param appendTo  output parameter to receive result.
+    *                  Result is appended to existing contents.
+    * @param pos       On input: an alignment field, if desired.
+    *                  On output: the offsets of the alignment field.
+    * @param status    output param filled with success/failure status.
+    * @return          Reference to 'appendTo' parameter.
+    * @draft ICU 4.0
+    */
+   UnicodeString& format(const Formattable& obj,
+                         UnicodeString& appendTo,
+                         FieldPosition& pos,
+                         UErrorCode& status) const;
+
+   /**
+    * Returns the pattern from applyPattern() or constructor().
+    *
+    * @param  appendTo  output parameter to receive result.
+     *                  Result is appended to existing contents.
+    * @return the UnicodeString with inserted pattern.
+    * @draft ICU 4.0
+    */
+   UnicodeString& toPattern(UnicodeString& appendTo);
+
+   /**
+    * This method is not yet supported by <code>PluralFormat</code>.
+    * <P>
+    * Before calling, set parse_pos.index to the offset you want to start
+    * parsing at in the source. After calling, parse_pos.index is the end of
+    * the text you parsed. If error occurs, index is unchanged.
+    * <P>
+    * When parsing, leading whitespace is discarded (with a successful parse),
+    * while trailing whitespace is left as is.
+    * <P>
+    * See Format::parseObject() for more.
+    *
+    * @param source    The string to be parsed into an object.
+    * @param result    Formattable to be set to the parse result.
+    *                  If parse fails, return contents are undefined.
+    * @param parse_pos The position to start parsing at. Upon return
+    *                  this param is set to the position after the
+    *                  last character successfully parsed. If the
+    *                  source is not parsed successfully, this param
+    *                  will remain unchanged.
+    * @draft ICU 4.0
+    */
+   virtual void parseObject(const UnicodeString& source,
+                            Formattable& result,
+                            ParsePosition& parse_pos) const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @draft ICU 4.0
+     *
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @draft ICU 4.0
+     */
+     virtual UClassID getDynamicClassID() const;
+
+private:
+    typedef enum fmtToken {
+        none,
+        tLetter,
+        tNumber,
+        tSpace,
+        tNumberSign,
+        tLeftBrace,
+        tRightBrace
+    }fmtToken;
+
+    Locale  locale;
+    PluralRules* pluralRules;
+    UnicodeString pattern;
+    Hashtable  *fParsedValuesHash;
+    NumberFormat*  numberFormat;
+    NumberFormat*  replacedNumberFormat;
+
+    PluralFormat();   // default constructor not implemented
+    void init(const PluralRules* rules, const Locale& curlocale, UErrorCode& status);
+    UBool inRange(UChar ch, fmtToken& type);
+    UBool checkSufficientDefinition();
+    void parsingFailure();
+    UnicodeString insertFormattedNumber(double number,
+                                        UnicodeString& message,
+                                        UnicodeString& appendTo,
+                                        FieldPosition& pos) const;
+    void copyHashtable(Hashtable *other, UErrorCode& status);
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _PLURFMT
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/plurrule.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/plurrule.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/plurrule.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,291 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 2008, International Business Machines Corporation and
-* others. All Rights Reserved.
-*******************************************************************************
-*
-*
-* File PLURRULE.H
-*
-* Modification History:*
-*   Date        Name        Description
-*
-********************************************************************************
-*/
-
-#ifndef PLURRULE
-#define PLURRULE
-
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief C++ API: PluralRules object
- */
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/format.h"
-
-U_NAMESPACE_BEGIN
-
-class Hashtable;
-class RuleChain;
-class RuleParser;
-
-/**
- * Defines rules for mapping positive long values onto a small set of
- * keywords. Rules are constructed from a text description, consisting
- * of a series of keywords and conditions.  The {@link #select} method
- * examines each condition in order and returns the keyword for the
- * first condition that matches the number.  If none match,
- * default rule(other) is returned.
- *
- * Examples:<pre>
- *   "one: n is 1; few: n in 2..4"</pre>
- *  This defines two rules, for 'one' and 'few'.  The condition for
- *  'one' is "n is 1" which means that the number must be equal to
- *  1 for this condition to pass.  The condition for 'few' is
- *  "n in 2..4" which means that the number must be between 2 and
- *  4 inclusive for this condition to pass.  All other numbers
- *  are assigned the keyword "other" by the default rule.
- *  </p><pre>
- *    "zero: n is 0; one: n is 1; zero: n mod 100 in 1..19"</pre>
- *  This illustrates that the same keyword can be defined multiple times.
- *  Each rule is examined in order, and the first keyword whose condition
- *  passes is the one returned.  Also notes that a modulus is applied
- *  to n in the last rule.  Thus its condition holds for 119, 219, 319...
- *  </p><pre>
- *    "one: n is 1; few: n mod 10 in 2..4 and n mod 100 not in 12..14"</pre>
- *  This illustrates conjunction and negation.  The condition for 'few'
- *  has two parts, both of which must be met: "n mod 10 in 2..4" and
- *  "n mod 100 not in 12..14".  The first part applies a modulus to n
- *  before the test as in the previous example.  The second part applies
- *  a different modulus and also uses negation, thus it matches all
- *  numbers _not_ in 12, 13, 14, 112, 113, 114, 212, 213, 214...
- *  </p>
- *  <p>
- * Syntax:<pre>
- * rules         = rule (';' rule)*
- * rule          = keyword ':' condition
- * keyword       = <identifier>
- * condition     = and_condition ('or' and_condition)*
- * and_condition = relation ('and' relation)*
- * relation      = is_relation | in_relation | within_relation | 'n' <EOL>
- * is_relation   = expr 'is' ('not')? value
- * in_relation   = expr ('not')? 'in' range
- * within_relation = expr ('not')? 'within' range
- * expr          = 'n' ('mod' value)?
- * value         = digit+
- * digit         = 0|1|2|3|4|5|6|7|8|9
- * range         = value'..'value
- * </pre></p>
- * <p>
- *  The difference between 'in' and 'within' is that 'in' only includes
- *  integers in the specified range, while 'within' includes all values.</p>
- *  <p>
- *  Keywords
- *  could be defined by users or from ICU locale data. There are 6
- *  predefined values in ICU - 'zero', 'one', 'two', 'few', 'many' and
- *  'other'. Callers need to check the value of keyword returned by
- *  {@link #select} method.
- *  </p>
- *
- * Examples:<pre>
- * UnicodeString keyword = pl->select(number);
- * if (keyword== UnicodeString("one") {
- *     ...
- * }
- * else if ( ... )
- * </pre>
- */
-class U_I18N_API PluralRules : public UObject {
-public:
-
-    /**
-     * Constructor.
-     * @param status  Output param set to success/failure code on exit, which
-     *                must not indicate a failure before the function call.
-     *
-     * @draft ICU 4.0
-     */
-    PluralRules(UErrorCode& status);
-
-    /**
-     * Copy constructor.
-     * @draft ICU 4.0
-     */
-    PluralRules(const PluralRules& other);
-
-    /**
-     * Destructor.
-     * @draft ICU 4.0
-     */
-    virtual ~PluralRules();
-
-    /**
-     * Clone
-     * @draft ICU 4.0
-     */
-    PluralRules* clone() const;
-
-    /**
-      * Assignment operator.
-      * @draft ICU 4.0
-      */
-    PluralRules& operator=(const PluralRules&);
-
-    /**
-     * Creates a PluralRules from a description if it is parsable, otherwise
-     * returns null.
-     *
-     * @param description rule description
-     * @param status      Output param set to success/failure code on exit, which
-     *                    must not indicate a failure before the function call.
-     * @return            new PluralRules pointer. NULL if there is an error.
-     * @draft ICU 4.0
-     */
-    static PluralRules* U_EXPORT2 createRules(const UnicodeString& description,
-                                              UErrorCode& status);
-
-    /**
-     * The default rules that accept any number.
-     *
-     * @param status  Output param set to success/failure code on exit, which
-     *                must not indicate a failure before the function call.
-     * @return        new PluralRules pointer. NULL if there is an error.
-     * @draft ICU 4.0
-     */
-    static PluralRules* U_EXPORT2 createDefaultRules(UErrorCode& status);
-
-    /**
-     * Provides access to the predefined <code>PluralRules</code> for a given
-     * locale.
-     *
-     * @param locale  The locale for which a <code>PluralRules</code> object is
-     *                returned.
-     * @param status  Output param set to success/failure code on exit, which
-     *                must not indicate a failure before the function call.
-     * @return        The predefined <code>PluralRules</code> object pointer for
-     *                this locale. If there's no predefined rules for this locale,
-     *                the rules for the closest parent in the locale hierarchy
-     *                that has one will  be returned.  The final fallback always
-     *                returns the default 'other' rules.
-     * @draft ICU 4.0
-     */
-    static PluralRules* U_EXPORT2 forLocale(const Locale& locale, UErrorCode& status);
-    
-    /**
-     * Given a number, returns the keyword of the first rule that applies to
-     * the number.  This function can be used with isKeyword* functions to
-     * determine the keyword for default plural rules.
-     *
-     * @param number  The number for which the rule has to be determined.
-     * @return        The keyword of the selected rule.
-     * @draft ICU 4.0
-     */
-    UnicodeString select(int32_t number) const;
-    
-    /**
-     * Given a number, returns the keyword of the first rule that applies to
-     * the number.  This function can be used with isKeyword* functions to
-     * determine the keyword for default plural rules.
-     *
-     * @param number  The number for which the rule has to be determined.
-     * @return        The keyword of the selected rule.
-     * @draft ICU 4.0
-     */
-    UnicodeString select(double number) const;
-
-    /**
-     * Returns a list of all rule keywords used in this <code>PluralRules</code>
-     * object.  The rule 'other' is always present by default.
-     *
-     * @param status Output param set to success/failure code on exit, which
-     *               must not indicate a failure before the function call.
-     * @return       StringEnumeration with the keywords.
-     *               The caller must delete the object.
-     * @draft ICU 4.0
-     */
-    StringEnumeration* getKeywords(UErrorCode& status) const;
-
-    /**
-     * Returns TRUE if the given keyword is defined in this
-     * <code>PluralRules</code> object.
-     *
-     * @param keyword  the input keyword.
-     * @return         TRUE if the input keyword is defined.
-     *                 Otherwise, return FALSE.
-     * @draft ICU 4.0
-     */
-    UBool isKeyword(const UnicodeString& keyword) const;
-
-
-    /**
-     * Returns keyword for default plural form.
-     *
-     * @return         keyword for default plural form.
-     * @internal 4.0
-     * @draft ICU 4.0
-     */
-    UnicodeString getKeywordOther() const;
-
-    /**
-     * Compares the equality of two PluralRules objects.
-     *
-     * @param other The other PluralRules object to be compared with.
-     * @return      True if the given PluralRules is the same as this
-     *              PluralRules; false otherwise.
-     * @draft ICU 4.0
-     */
-    virtual UBool operator==(const PluralRules& other) const;
-
-    /**
-     * Compares the inequality of two PluralRules objects.
-     *
-     * @param other The PluralRules object to be compared with.
-     * @return      True if the given PluralRules is not the same as this
-     *              PluralRules; false otherwise.
-     * @draft ICU 4.0
-     */
-    UBool operator!=(const PluralRules& other) const  {return !operator==(other);}
-
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @draft ICU 4.0
-     *
-    */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @draft ICU 4.0
-     */
-    virtual UClassID getDynamicClassID() const;
-
-
-private:
-    Hashtable       *fLocaleStringsHash;
-    UnicodeString   mLocaleName;
-    RuleChain       *mRules;
-    RuleParser      *mParser;
-
-    PluralRules();   // default constructor not implemented
-    int32_t getRepeatLimit() const;
-    void parseDescription(UnicodeString& ruleData, RuleChain& rules, UErrorCode &status);
-    void getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName);
-    void addRules(RuleChain& rules);
-    int32_t getNumberValue(const UnicodeString& token) const;
-    UnicodeString getRuleFromResource(const Locale& locale, UErrorCode& status);
-
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _PLURRULE
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/plurrule.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/plurrule.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/plurrule.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/plurrule.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,291 @@
+/*
+*******************************************************************************
+* Copyright (C) 2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+*
+* File PLURRULE.H
+*
+* Modification History:*
+*   Date        Name        Description
+*
+********************************************************************************
+*/
+
+#ifndef PLURRULE
+#define PLURRULE
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: PluralRules object
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/format.h"
+
+U_NAMESPACE_BEGIN
+
+class Hashtable;
+class RuleChain;
+class RuleParser;
+
+/**
+ * Defines rules for mapping positive long values onto a small set of
+ * keywords. Rules are constructed from a text description, consisting
+ * of a series of keywords and conditions.  The {@link #select} method
+ * examines each condition in order and returns the keyword for the
+ * first condition that matches the number.  If none match,
+ * default rule(other) is returned.
+ *
+ * Examples:<pre>
+ *   "one: n is 1; few: n in 2..4"</pre>
+ *  This defines two rules, for 'one' and 'few'.  The condition for
+ *  'one' is "n is 1" which means that the number must be equal to
+ *  1 for this condition to pass.  The condition for 'few' is
+ *  "n in 2..4" which means that the number must be between 2 and
+ *  4 inclusive for this condition to pass.  All other numbers
+ *  are assigned the keyword "other" by the default rule.
+ *  </p><pre>
+ *    "zero: n is 0; one: n is 1; zero: n mod 100 in 1..19"</pre>
+ *  This illustrates that the same keyword can be defined multiple times.
+ *  Each rule is examined in order, and the first keyword whose condition
+ *  passes is the one returned.  Also notes that a modulus is applied
+ *  to n in the last rule.  Thus its condition holds for 119, 219, 319...
+ *  </p><pre>
+ *    "one: n is 1; few: n mod 10 in 2..4 and n mod 100 not in 12..14"</pre>
+ *  This illustrates conjunction and negation.  The condition for 'few'
+ *  has two parts, both of which must be met: "n mod 10 in 2..4" and
+ *  "n mod 100 not in 12..14".  The first part applies a modulus to n
+ *  before the test as in the previous example.  The second part applies
+ *  a different modulus and also uses negation, thus it matches all
+ *  numbers _not_ in 12, 13, 14, 112, 113, 114, 212, 213, 214...
+ *  </p>
+ *  <p>
+ * Syntax:<pre>
+ * rules         = rule (';' rule)*
+ * rule          = keyword ':' condition
+ * keyword       = <identifier>
+ * condition     = and_condition ('or' and_condition)*
+ * and_condition = relation ('and' relation)*
+ * relation      = is_relation | in_relation | within_relation | 'n' <EOL>
+ * is_relation   = expr 'is' ('not')? value
+ * in_relation   = expr ('not')? 'in' range
+ * within_relation = expr ('not')? 'within' range
+ * expr          = 'n' ('mod' value)?
+ * value         = digit+
+ * digit         = 0|1|2|3|4|5|6|7|8|9
+ * range         = value'..'value
+ * </pre></p>
+ * <p>
+ *  The difference between 'in' and 'within' is that 'in' only includes
+ *  integers in the specified range, while 'within' includes all values.</p>
+ *  <p>
+ *  Keywords
+ *  could be defined by users or from ICU locale data. There are 6
+ *  predefined values in ICU - 'zero', 'one', 'two', 'few', 'many' and
+ *  'other'. Callers need to check the value of keyword returned by
+ *  {@link #select} method.
+ *  </p>
+ *
+ * Examples:<pre>
+ * UnicodeString keyword = pl->select(number);
+ * if (keyword== UnicodeString("one") {
+ *     ...
+ * }
+ * else if ( ... )
+ * </pre>
+ */
+class U_I18N_API PluralRules : public UObject {
+public:
+
+    /**
+     * Constructor.
+     * @param status  Output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     *
+     * @draft ICU 4.0
+     */
+    PluralRules(UErrorCode& status);
+
+    /**
+     * Copy constructor.
+     * @draft ICU 4.0
+     */
+    PluralRules(const PluralRules& other);
+
+    /**
+     * Destructor.
+     * @draft ICU 4.0
+     */
+    virtual ~PluralRules();
+
+    /**
+     * Clone
+     * @draft ICU 4.0
+     */
+    PluralRules* clone() const;
+
+    /**
+      * Assignment operator.
+      * @draft ICU 4.0
+      */
+    PluralRules& operator=(const PluralRules&);
+
+    /**
+     * Creates a PluralRules from a description if it is parsable, otherwise
+     * returns null.
+     *
+     * @param description rule description
+     * @param status      Output param set to success/failure code on exit, which
+     *                    must not indicate a failure before the function call.
+     * @return            new PluralRules pointer. NULL if there is an error.
+     * @draft ICU 4.0
+     */
+    static PluralRules* U_EXPORT2 createRules(const UnicodeString& description,
+                                              UErrorCode& status);
+
+    /**
+     * The default rules that accept any number.
+     *
+     * @param status  Output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * @return        new PluralRules pointer. NULL if there is an error.
+     * @draft ICU 4.0
+     */
+    static PluralRules* U_EXPORT2 createDefaultRules(UErrorCode& status);
+
+    /**
+     * Provides access to the predefined <code>PluralRules</code> for a given
+     * locale.
+     *
+     * @param locale  The locale for which a <code>PluralRules</code> object is
+     *                returned.
+     * @param status  Output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * @return        The predefined <code>PluralRules</code> object pointer for
+     *                this locale. If there's no predefined rules for this locale,
+     *                the rules for the closest parent in the locale hierarchy
+     *                that has one will  be returned.  The final fallback always
+     *                returns the default 'other' rules.
+     * @draft ICU 4.0
+     */
+    static PluralRules* U_EXPORT2 forLocale(const Locale& locale, UErrorCode& status);
+    
+    /**
+     * Given a number, returns the keyword of the first rule that applies to
+     * the number.  This function can be used with isKeyword* functions to
+     * determine the keyword for default plural rules.
+     *
+     * @param number  The number for which the rule has to be determined.
+     * @return        The keyword of the selected rule.
+     * @draft ICU 4.0
+     */
+    UnicodeString select(int32_t number) const;
+    
+    /**
+     * Given a number, returns the keyword of the first rule that applies to
+     * the number.  This function can be used with isKeyword* functions to
+     * determine the keyword for default plural rules.
+     *
+     * @param number  The number for which the rule has to be determined.
+     * @return        The keyword of the selected rule.
+     * @draft ICU 4.0
+     */
+    UnicodeString select(double number) const;
+
+    /**
+     * Returns a list of all rule keywords used in this <code>PluralRules</code>
+     * object.  The rule 'other' is always present by default.
+     *
+     * @param status Output param set to success/failure code on exit, which
+     *               must not indicate a failure before the function call.
+     * @return       StringEnumeration with the keywords.
+     *               The caller must delete the object.
+     * @draft ICU 4.0
+     */
+    StringEnumeration* getKeywords(UErrorCode& status) const;
+
+    /**
+     * Returns TRUE if the given keyword is defined in this
+     * <code>PluralRules</code> object.
+     *
+     * @param keyword  the input keyword.
+     * @return         TRUE if the input keyword is defined.
+     *                 Otherwise, return FALSE.
+     * @draft ICU 4.0
+     */
+    UBool isKeyword(const UnicodeString& keyword) const;
+
+
+    /**
+     * Returns keyword for default plural form.
+     *
+     * @return         keyword for default plural form.
+     * @internal 4.0
+     * @draft ICU 4.0
+     */
+    UnicodeString getKeywordOther() const;
+
+    /**
+     * Compares the equality of two PluralRules objects.
+     *
+     * @param other The other PluralRules object to be compared with.
+     * @return      True if the given PluralRules is the same as this
+     *              PluralRules; false otherwise.
+     * @draft ICU 4.0
+     */
+    virtual UBool operator==(const PluralRules& other) const;
+
+    /**
+     * Compares the inequality of two PluralRules objects.
+     *
+     * @param other The PluralRules object to be compared with.
+     * @return      True if the given PluralRules is not the same as this
+     *              PluralRules; false otherwise.
+     * @draft ICU 4.0
+     */
+    UBool operator!=(const PluralRules& other) const  {return !operator==(other);}
+
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @draft ICU 4.0
+     *
+    */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @draft ICU 4.0
+     */
+    virtual UClassID getDynamicClassID() const;
+
+
+private:
+    Hashtable       *fLocaleStringsHash;
+    UnicodeString   mLocaleName;
+    RuleChain       *mRules;
+    RuleParser      *mParser;
+
+    PluralRules();   // default constructor not implemented
+    int32_t getRepeatLimit() const;
+    void parseDescription(UnicodeString& ruleData, RuleChain& rules, UErrorCode &status);
+    void getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName);
+    void addRules(RuleChain& rules);
+    int32_t getNumberValue(const UnicodeString& token) const;
+    UnicodeString getRuleFromResource(const Locale& locale, UErrorCode& status);
+
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _PLURRULE
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/ppalmos.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ppalmos.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ppalmos.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,273 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 1997-2006, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-******************************************************************************
-*
-*  FILE NAME : ppalmos.h
-*
-*   Date        Name        Description
-*   05/10/04    Ken Krugler Creation (copied from pwin32.h & modified).
-******************************************************************************
-*/
-
-#ifndef U_PPALMOS_H
-#define U_PPALMOS_H
-
- /**
-  * \file
-  * \brief Configuration constants for the Palm OS platform
-  */
-  
-/* Define the platform we're on. */
-#ifndef U_PALMOS
-#define U_PALMOS
-#endif
-
-/* _MSC_VER is used to detect the Microsoft compiler. */
-#if defined(_MSC_VER)
-#define U_INT64_IS_LONG_LONG 0
-#else
-#define U_INT64_IS_LONG_LONG 1
-#endif
-
-/* Define whether inttypes.h is available */
-#ifndef U_HAVE_INTTYPES_H
-#define U_HAVE_INTTYPES_H 1
-#endif
-
-/*
- * Define what support for C++ streams is available.
- *     If U_IOSTREAM_SOURCE is set to 199711, then <iostream> is available
- * (1997711 is the date the ISO/IEC C++ FDIS was published), and then
- * one should qualify streams using the std namespace in ICU header
- * files.
- *     If U_IOSTREAM_SOURCE is set to 198506, then <iostream.h> is
- * available instead (198506 is the date when Stroustrup published
- * "An Extensible I/O Facility for C++" at the summer USENIX conference).
- *     If U_IOSTREAM_SOURCE is 0, then C++ streams are not available and
- * support for them will be silently suppressed in ICU.
- *
- */
-
-#ifndef U_IOSTREAM_SOURCE
-#define U_IOSTREAM_SOURCE 199711
-#endif
-
-/* Determines whether specific types are available */
-#ifndef U_HAVE_INT8_T
-#define U_HAVE_INT8_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_UINT8_T
-#define U_HAVE_UINT8_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_INT16_T
-#define U_HAVE_INT16_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_UINT16_T
-#define U_HAVE_UINT16_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_INT32_T
-#define U_HAVE_INT32_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_UINT32_T
-#define U_HAVE_UINT32_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_INT64_T
-#define U_HAVE_INT64_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_UINT64_T
-#define U_HAVE_UINT64_T U_HAVE_INTTYPES_H
-#endif
-
-
-/*===========================================================================*/
-/* Generic data types                                                        */
-/*===========================================================================*/
-
-/* If your platform does not have the <inttypes.h> header, you may
-   need to edit the typedefs below. */
-#if U_HAVE_INTTYPES_H
-#include <inttypes.h>
-#else /* U_HAVE_INTTYPES_H */
-
-#if ! U_HAVE_INT8_T
-typedef signed char int8_t;
-#endif
-
-#if ! U_HAVE_UINT8_T
-typedef unsigned char uint8_t;
-#endif
-
-#if ! U_HAVE_INT16_T
-typedef signed short int16_t;
-#endif
-
-#if ! U_HAVE_UINT16_T
-typedef unsigned short uint16_t;
-#endif
-
-#if ! U_HAVE_INT32_T
-typedef signed int int32_t;
-#endif
-
-#if ! U_HAVE_UINT32_T
-typedef unsigned int uint32_t;
-#endif
-
-#if ! U_HAVE_INT64_T
-#if U_INT64_IS_LONG_LONG
-    typedef signed long long int64_t;
-#else
-    typedef signed __int64 int64_t;
-#endif
-#endif
-
-#if ! U_HAVE_UINT64_T
-#if U_INT64_IS_LONG_LONG
-    typedef unsigned long long uint64_t;
-#else
-    typedef unsigned __int64 uint64_t;
-#endif
-#endif
-#endif
-
-/*===========================================================================*/
-/* Compiler and environment features                                         */
-/*===========================================================================*/
-
-/* Define whether namespace is supported */
-#ifndef U_HAVE_NAMESPACE
-#define U_HAVE_NAMESPACE 1
-#endif
-
-/* Determines the endianness of the platform */
-#define U_IS_BIG_ENDIAN 0
-
-/* 1 or 0 to enable or disable threads.  If undefined, default is: enable threads. */
-#define ICU_USE_THREADS 1
-
-#ifndef U_DEBUG
-#ifdef _DEBUG
-#define U_DEBUG 1
-#else
-#define U_DEBUG 0
-#endif
-#endif
-
-#ifndef U_RELEASE
-#ifdef NDEBUG
-#define U_RELEASE 1
-#else
-#define U_RELEASE 0
-#endif
-#endif
-
-/* Determine whether to disable renaming or not. This overrides the
-   setting in umachine.h which is for all platforms. */
-#ifndef U_DISABLE_RENAMING
-#define U_DISABLE_RENAMING 0
-#endif
-
-/* Determine whether to override new and delete. */
-#ifndef U_OVERRIDE_CXX_ALLOCATION
-#define U_OVERRIDE_CXX_ALLOCATION 1
-#endif
-/* Determine whether to override placement new and delete for STL. */
-#ifndef U_HAVE_PLACEMENT_NEW
-#define U_HAVE_PLACEMENT_NEW 0
-#endif
-/* Determine whether to override new and delete for MFC. */
-#if !defined(U_HAVE_DEBUG_LOCATION_NEW) && defined(_MSC_VER)
-#define U_HAVE_DEBUG_LOCATION_NEW 0
-#endif
-
-/* Determine whether to enable tracing. */
-#ifndef U_ENABLE_TRACING
-#define U_ENABLE_TRACING 1
-#endif
-
-/* Do we allow ICU users to use the draft APIs by default? */
-#ifndef U_DEFAULT_SHOW_DRAFT
-#define U_DEFAULT_SHOW_DRAFT 1
-#endif
-
-/* Define the library suffix in a C syntax. */
-#define U_HAVE_LIB_SUFFIX 0
-#define U_LIB_SUFFIX_C_NAME 
-#define U_LIB_SUFFIX_C_NAME_STRING ""
-
-/*===========================================================================*/
-/* Information about wchar support                                           */
-/*===========================================================================*/
-
-#define U_HAVE_WCHAR_H 1
-#define U_SIZEOF_WCHAR_T 2
-
-#define U_HAVE_WCSCPY    0
-
-/*===========================================================================*/
-/* Information about POSIX support                                           */
-/*===========================================================================*/
-
-
-/* TODO: Fix Palm OS's determination of a timezone */
-#if 0
-#define U_TZSET         _tzset
-#endif
-#if 0
-#define U_TIMEZONE      _timezone
-#endif
-#if 0
-#define U_TZNAME        _tzname
-#endif
-
-#define U_HAVE_MMAP 0
-#define U_HAVE_POPEN 0
-
-/*===========================================================================*/
-/* Symbol import-export control                                              */
-/*===========================================================================*/
-
-#define U_EXPORT
-#define U_EXPORT2
-#define U_IMPORT
-
-/*===========================================================================*/
-/* Code alignment and C function inlining                                    */
-/*===========================================================================*/
-
-#ifndef U_INLINE
-#   ifdef __cplusplus
-#       define U_INLINE inline
-#   else
-#       define U_INLINE __inline
-#   endif
-#endif
-
-#if defined(_MSC_VER) && defined(_M_IX86)
-#define U_ALIGN_CODE(val)    __asm      align val
-#else
-#define U_ALIGN_CODE(val)
-#endif
-
-
-/*===========================================================================*/
-/* Programs used by ICU code                                                 */
-/*===========================================================================*/
-
-#ifndef U_MAKE
-#define U_MAKE  "nmake"
-#define U_MAKE_IS_NMAKE 1
-#endif
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/ppalmos.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ppalmos.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ppalmos.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ppalmos.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,273 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2006, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+*  FILE NAME : ppalmos.h
+*
+*   Date        Name        Description
+*   05/10/04    Ken Krugler Creation (copied from pwin32.h & modified).
+******************************************************************************
+*/
+
+#ifndef U_PPALMOS_H
+#define U_PPALMOS_H
+
+ /**
+  * \file
+  * \brief Configuration constants for the Palm OS platform
+  */
+  
+/* Define the platform we're on. */
+#ifndef U_PALMOS
+#define U_PALMOS
+#endif
+
+/* _MSC_VER is used to detect the Microsoft compiler. */
+#if defined(_MSC_VER)
+#define U_INT64_IS_LONG_LONG 0
+#else
+#define U_INT64_IS_LONG_LONG 1
+#endif
+
+/* Define whether inttypes.h is available */
+#ifndef U_HAVE_INTTYPES_H
+#define U_HAVE_INTTYPES_H 1
+#endif
+
+/*
+ * Define what support for C++ streams is available.
+ *     If U_IOSTREAM_SOURCE is set to 199711, then <iostream> is available
+ * (1997711 is the date the ISO/IEC C++ FDIS was published), and then
+ * one should qualify streams using the std namespace in ICU header
+ * files.
+ *     If U_IOSTREAM_SOURCE is set to 198506, then <iostream.h> is
+ * available instead (198506 is the date when Stroustrup published
+ * "An Extensible I/O Facility for C++" at the summer USENIX conference).
+ *     If U_IOSTREAM_SOURCE is 0, then C++ streams are not available and
+ * support for them will be silently suppressed in ICU.
+ *
+ */
+
+#ifndef U_IOSTREAM_SOURCE
+#define U_IOSTREAM_SOURCE 199711
+#endif
+
+/* Determines whether specific types are available */
+#ifndef U_HAVE_INT8_T
+#define U_HAVE_INT8_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_UINT8_T
+#define U_HAVE_UINT8_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_INT16_T
+#define U_HAVE_INT16_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_UINT16_T
+#define U_HAVE_UINT16_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_INT32_T
+#define U_HAVE_INT32_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_UINT32_T
+#define U_HAVE_UINT32_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_INT64_T
+#define U_HAVE_INT64_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_UINT64_T
+#define U_HAVE_UINT64_T U_HAVE_INTTYPES_H
+#endif
+
+
+/*===========================================================================*/
+/* Generic data types                                                        */
+/*===========================================================================*/
+
+/* If your platform does not have the <inttypes.h> header, you may
+   need to edit the typedefs below. */
+#if U_HAVE_INTTYPES_H
+#include <inttypes.h>
+#else /* U_HAVE_INTTYPES_H */
+
+#if ! U_HAVE_INT8_T
+typedef signed char int8_t;
+#endif
+
+#if ! U_HAVE_UINT8_T
+typedef unsigned char uint8_t;
+#endif
+
+#if ! U_HAVE_INT16_T
+typedef signed short int16_t;
+#endif
+
+#if ! U_HAVE_UINT16_T
+typedef unsigned short uint16_t;
+#endif
+
+#if ! U_HAVE_INT32_T
+typedef signed int int32_t;
+#endif
+
+#if ! U_HAVE_UINT32_T
+typedef unsigned int uint32_t;
+#endif
+
+#if ! U_HAVE_INT64_T
+#if U_INT64_IS_LONG_LONG
+    typedef signed long long int64_t;
+#else
+    typedef signed __int64 int64_t;
+#endif
+#endif
+
+#if ! U_HAVE_UINT64_T
+#if U_INT64_IS_LONG_LONG
+    typedef unsigned long long uint64_t;
+#else
+    typedef unsigned __int64 uint64_t;
+#endif
+#endif
+#endif
+
+/*===========================================================================*/
+/* Compiler and environment features                                         */
+/*===========================================================================*/
+
+/* Define whether namespace is supported */
+#ifndef U_HAVE_NAMESPACE
+#define U_HAVE_NAMESPACE 1
+#endif
+
+/* Determines the endianness of the platform */
+#define U_IS_BIG_ENDIAN 0
+
+/* 1 or 0 to enable or disable threads.  If undefined, default is: enable threads. */
+#define ICU_USE_THREADS 1
+
+#ifndef U_DEBUG
+#ifdef _DEBUG
+#define U_DEBUG 1
+#else
+#define U_DEBUG 0
+#endif
+#endif
+
+#ifndef U_RELEASE
+#ifdef NDEBUG
+#define U_RELEASE 1
+#else
+#define U_RELEASE 0
+#endif
+#endif
+
+/* Determine whether to disable renaming or not. This overrides the
+   setting in umachine.h which is for all platforms. */
+#ifndef U_DISABLE_RENAMING
+#define U_DISABLE_RENAMING 0
+#endif
+
+/* Determine whether to override new and delete. */
+#ifndef U_OVERRIDE_CXX_ALLOCATION
+#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+/* Determine whether to override placement new and delete for STL. */
+#ifndef U_HAVE_PLACEMENT_NEW
+#define U_HAVE_PLACEMENT_NEW 0
+#endif
+/* Determine whether to override new and delete for MFC. */
+#if !defined(U_HAVE_DEBUG_LOCATION_NEW) && defined(_MSC_VER)
+#define U_HAVE_DEBUG_LOCATION_NEW 0
+#endif
+
+/* Determine whether to enable tracing. */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 1
+#endif
+
+/* Do we allow ICU users to use the draft APIs by default? */
+#ifndef U_DEFAULT_SHOW_DRAFT
+#define U_DEFAULT_SHOW_DRAFT 1
+#endif
+
+/* Define the library suffix in a C syntax. */
+#define U_HAVE_LIB_SUFFIX 0
+#define U_LIB_SUFFIX_C_NAME 
+#define U_LIB_SUFFIX_C_NAME_STRING ""
+
+/*===========================================================================*/
+/* Information about wchar support                                           */
+/*===========================================================================*/
+
+#define U_HAVE_WCHAR_H 1
+#define U_SIZEOF_WCHAR_T 2
+
+#define U_HAVE_WCSCPY    0
+
+/*===========================================================================*/
+/* Information about POSIX support                                           */
+/*===========================================================================*/
+
+
+/* TODO: Fix Palm OS's determination of a timezone */
+#if 0
+#define U_TZSET         _tzset
+#endif
+#if 0
+#define U_TIMEZONE      _timezone
+#endif
+#if 0
+#define U_TZNAME        _tzname
+#endif
+
+#define U_HAVE_MMAP 0
+#define U_HAVE_POPEN 0
+
+/*===========================================================================*/
+/* Symbol import-export control                                              */
+/*===========================================================================*/
+
+#define U_EXPORT
+#define U_EXPORT2
+#define U_IMPORT
+
+/*===========================================================================*/
+/* Code alignment and C function inlining                                    */
+/*===========================================================================*/
+
+#ifndef U_INLINE
+#   ifdef __cplusplus
+#       define U_INLINE inline
+#   else
+#       define U_INLINE __inline
+#   endif
+#endif
+
+#if defined(_MSC_VER) && defined(_M_IX86)
+#define U_ALIGN_CODE(val)    __asm      align val
+#else
+#define U_ALIGN_CODE(val)
+#endif
+
+
+/*===========================================================================*/
+/* Programs used by ICU code                                                 */
+/*===========================================================================*/
+
+#ifndef U_MAKE
+#define U_MAKE  "nmake"
+#define U_MAKE_IS_NMAKE 1
+#endif
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/putil.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/putil.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/putil.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,184 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 1997-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-******************************************************************************
-*
-*  FILE NAME : putil.h
-*
-*   Date        Name        Description
-*   05/14/98    nos         Creation (content moved here from utypes.h).
-*   06/17/99    erm         Added IEEE_754
-*   07/22/98    stephen     Added IEEEremainder, max, min, trunc
-*   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
-*   08/24/98    stephen     Added longBitsFromDouble
-*   03/02/99    stephen     Removed openFile().  Added AS400 support.
-*   04/15/99    stephen     Converted to C
-*   11/15/99    helena      Integrated S/390 changes for IEEE support.
-*   01/11/00    helena      Added u_getVersion.
-******************************************************************************
-*/
-
-#ifndef PUTIL_H
-#define PUTIL_H
-
-#include "unicode/utypes.h"
- /**
-  * \file
-  * \brief C API: Platform Utilities
-  */
-  
-/* Define this to 1 if your platform supports IEEE 754 floating point,
-   to 0 if it does not. */
-#ifndef IEEE_754
-#   define IEEE_754 1
-#endif
-
-/*==========================================================================*/
-/* Platform utilities                                                       */
-/*==========================================================================*/
-
-/**
- * Platform utilities isolates the platform dependencies of the
- * libarary.  For each platform which this code is ported to, these
- * functions may have to be re-implemented.
- */
-
-/**
- * Return the ICU data directory. 
- * The data directory is where common format ICU data files (.dat files)
- *   are loaded from.  Note that normal use of the built-in ICU
- *   facilities does not require loading of an external data file;
- *   unless you are adding custom data to ICU, the data directory
- *   does not need to be set.
- *
- * The data directory is determined as follows:
- *    If u_setDataDirectory() has been called, that is it, otherwise
- *    if the ICU_DATA environment variable is set, use that, otherwise
- *    If a data directory was specifed at ICU build time
- *      <code>( #define ICU_DATA_DIR "path" )</code>, use that,
- *    otherwise no data directory is available.
- *
- * @return the data directory, or an empty string ("") if no data directory has
- *         been specified.
- *   
- * @stable ICU 2.0
- */
-U_STABLE const char* U_EXPORT2 u_getDataDirectory(void);
-
-/** 
- * Set the ICU data directory. 
- * The data directory is where common format ICU data files (.dat files)
- *   are loaded from.  Note that normal use of the built-in ICU
- *   facilities does not require loading of an external data file;
- *   unless you are adding custom data to ICU, the data directory
- *   does not need to be set.
- *
- * This function should be called at most once in a process, before the
- * first ICU operation (e.g., u_init()) that will require the loading of an
- * ICU data file.
- * This function is not thread-safe. Use it before calling ICU APIs from
- * multiple threads.
- *
- * @param directory The directory to be set.
- *
- * @see u_init
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
-
-/**
- * Please use ucnv_getDefaultName() instead.
- * Return the default codepage for this platform and locale.
- * This function can call setlocale() on Unix platforms. Please read the
- * platform documentation on setlocale() before calling this function.
- * @return the default codepage for this platform 
- * @internal
- */
-U_INTERNAL const char*  U_EXPORT2 uprv_getDefaultCodepage(void);
-
-/**
- * Please use uloc_getDefault() instead.
- * Return the default locale ID string by querying ths system, or
- *     zero if one cannot be found. 
- * This function can call setlocale() on Unix platforms. Please read the
- * platform documentation on setlocale() before calling this function.
- * @return the default locale ID string
- * @internal
- */
-U_INTERNAL const char*  U_EXPORT2 uprv_getDefaultLocaleID(void);
-
-/**
- * Filesystem file and path separator characters.
- * Example: '/' and ':' on Unix, '\\' and ';' on Windows.
- * @stable ICU 2.0
- */
-#ifdef XP_MAC
-#   define U_FILE_SEP_CHAR ':'
-#   define U_FILE_ALT_SEP_CHAR ':'
-#   define U_PATH_SEP_CHAR ';'
-#   define U_FILE_SEP_STRING ":"
-#   define U_FILE_ALT_SEP_STRING ":"
-#   define U_PATH_SEP_STRING ";"
-#elif defined(U_WINDOWS)
-#   define U_FILE_SEP_CHAR '\\'
-#   define U_FILE_ALT_SEP_CHAR '/'
-#   define U_PATH_SEP_CHAR ';'
-#   define U_FILE_SEP_STRING "\\"
-#   define U_FILE_ALT_SEP_STRING "/"
-#   define U_PATH_SEP_STRING ";"
-#else
-#   define U_FILE_SEP_CHAR '/'
-#   define U_FILE_ALT_SEP_CHAR '/'
-#   define U_PATH_SEP_CHAR ':'
-#   define U_FILE_SEP_STRING "/"
-#   define U_FILE_ALT_SEP_STRING "/"
-#   define U_PATH_SEP_STRING ":"
-#endif
-
-/**
- * Convert char characters to UChar characters.
- * This utility function is useful only for "invariant characters"
- * that are encoded in the platform default encoding.
- * They are a small, constant subset of the encoding and include
- * just the latin letters, digits, and some punctuation.
- * For details, see U_CHARSET_FAMILY.
- *
- * @param cs Input string, points to <code>length</code>
- *           character bytes from a subset of the platform encoding.
- * @param us Output string, points to memory for <code>length</code>
- *           Unicode characters.
- * @param length The number of characters to convert; this may
- *               include the terminating <code>NUL</code>.
- *
- * @see U_CHARSET_FAMILY
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-u_charsToUChars(const char *cs, UChar *us, int32_t length);
-
-/**
- * Convert UChar characters to char characters.
- * This utility function is useful only for "invariant characters"
- * that can be encoded in the platform default encoding.
- * They are a small, constant subset of the encoding and include
- * just the latin letters, digits, and some punctuation.
- * For details, see U_CHARSET_FAMILY.
- *
- * @param us Input string, points to <code>length</code>
- *           Unicode characters that can be encoded with the
- *           codepage-invariant subset of the platform encoding.
- * @param cs Output string, points to memory for <code>length</code>
- *           character bytes.
- * @param length The number of characters to convert; this may
- *               include the terminating <code>NUL</code>.
- *
- * @see U_CHARSET_FAMILY
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-u_UCharsToChars(const UChar *us, char *cs, int32_t length);
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/putil.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/putil.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/putil.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/putil.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,184 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+*  FILE NAME : putil.h
+*
+*   Date        Name        Description
+*   05/14/98    nos         Creation (content moved here from utypes.h).
+*   06/17/99    erm         Added IEEE_754
+*   07/22/98    stephen     Added IEEEremainder, max, min, trunc
+*   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
+*   08/24/98    stephen     Added longBitsFromDouble
+*   03/02/99    stephen     Removed openFile().  Added AS400 support.
+*   04/15/99    stephen     Converted to C
+*   11/15/99    helena      Integrated S/390 changes for IEEE support.
+*   01/11/00    helena      Added u_getVersion.
+******************************************************************************
+*/
+
+#ifndef PUTIL_H
+#define PUTIL_H
+
+#include "unicode/utypes.h"
+ /**
+  * \file
+  * \brief C API: Platform Utilities
+  */
+  
+/* Define this to 1 if your platform supports IEEE 754 floating point,
+   to 0 if it does not. */
+#ifndef IEEE_754
+#   define IEEE_754 1
+#endif
+
+/*==========================================================================*/
+/* Platform utilities                                                       */
+/*==========================================================================*/
+
+/**
+ * Platform utilities isolates the platform dependencies of the
+ * libarary.  For each platform which this code is ported to, these
+ * functions may have to be re-implemented.
+ */
+
+/**
+ * Return the ICU data directory. 
+ * The data directory is where common format ICU data files (.dat files)
+ *   are loaded from.  Note that normal use of the built-in ICU
+ *   facilities does not require loading of an external data file;
+ *   unless you are adding custom data to ICU, the data directory
+ *   does not need to be set.
+ *
+ * The data directory is determined as follows:
+ *    If u_setDataDirectory() has been called, that is it, otherwise
+ *    if the ICU_DATA environment variable is set, use that, otherwise
+ *    If a data directory was specifed at ICU build time
+ *      <code>( #define ICU_DATA_DIR "path" )</code>, use that,
+ *    otherwise no data directory is available.
+ *
+ * @return the data directory, or an empty string ("") if no data directory has
+ *         been specified.
+ *   
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2 u_getDataDirectory(void);
+
+/** 
+ * Set the ICU data directory. 
+ * The data directory is where common format ICU data files (.dat files)
+ *   are loaded from.  Note that normal use of the built-in ICU
+ *   facilities does not require loading of an external data file;
+ *   unless you are adding custom data to ICU, the data directory
+ *   does not need to be set.
+ *
+ * This function should be called at most once in a process, before the
+ * first ICU operation (e.g., u_init()) that will require the loading of an
+ * ICU data file.
+ * This function is not thread-safe. Use it before calling ICU APIs from
+ * multiple threads.
+ *
+ * @param directory The directory to be set.
+ *
+ * @see u_init
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
+
+/**
+ * Please use ucnv_getDefaultName() instead.
+ * Return the default codepage for this platform and locale.
+ * This function can call setlocale() on Unix platforms. Please read the
+ * platform documentation on setlocale() before calling this function.
+ * @return the default codepage for this platform 
+ * @internal
+ */
+U_INTERNAL const char*  U_EXPORT2 uprv_getDefaultCodepage(void);
+
+/**
+ * Please use uloc_getDefault() instead.
+ * Return the default locale ID string by querying ths system, or
+ *     zero if one cannot be found. 
+ * This function can call setlocale() on Unix platforms. Please read the
+ * platform documentation on setlocale() before calling this function.
+ * @return the default locale ID string
+ * @internal
+ */
+U_INTERNAL const char*  U_EXPORT2 uprv_getDefaultLocaleID(void);
+
+/**
+ * Filesystem file and path separator characters.
+ * Example: '/' and ':' on Unix, '\\' and ';' on Windows.
+ * @stable ICU 2.0
+ */
+#ifdef XP_MAC
+#   define U_FILE_SEP_CHAR ':'
+#   define U_FILE_ALT_SEP_CHAR ':'
+#   define U_PATH_SEP_CHAR ';'
+#   define U_FILE_SEP_STRING ":"
+#   define U_FILE_ALT_SEP_STRING ":"
+#   define U_PATH_SEP_STRING ";"
+#elif defined(U_WINDOWS)
+#   define U_FILE_SEP_CHAR '\\'
+#   define U_FILE_ALT_SEP_CHAR '/'
+#   define U_PATH_SEP_CHAR ';'
+#   define U_FILE_SEP_STRING "\\"
+#   define U_FILE_ALT_SEP_STRING "/"
+#   define U_PATH_SEP_STRING ";"
+#else
+#   define U_FILE_SEP_CHAR '/'
+#   define U_FILE_ALT_SEP_CHAR '/'
+#   define U_PATH_SEP_CHAR ':'
+#   define U_FILE_SEP_STRING "/"
+#   define U_FILE_ALT_SEP_STRING "/"
+#   define U_PATH_SEP_STRING ":"
+#endif
+
+/**
+ * Convert char characters to UChar characters.
+ * This utility function is useful only for "invariant characters"
+ * that are encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param cs Input string, points to <code>length</code>
+ *           character bytes from a subset of the platform encoding.
+ * @param us Output string, points to memory for <code>length</code>
+ *           Unicode characters.
+ * @param length The number of characters to convert; this may
+ *               include the terminating <code>NUL</code>.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_charsToUChars(const char *cs, UChar *us, int32_t length);
+
+/**
+ * Convert UChar characters to char characters.
+ * This utility function is useful only for "invariant characters"
+ * that can be encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param us Input string, points to <code>length</code>
+ *           Unicode characters that can be encoded with the
+ *           codepage-invariant subset of the platform encoding.
+ * @param cs Output string, points to memory for <code>length</code>
+ *           character bytes.
+ * @param length The number of characters to convert; this may
+ *               include the terminating <code>NUL</code>.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_UCharsToChars(const UChar *us, char *cs, int32_t length);
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/pwin32.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/pwin32.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/pwin32.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,311 +0,0 @@
-/*
- ******************************************************************************
- *
- *   Copyright (C) 1997-2007, International Business Machines
- *   Corporation and others.  All Rights Reserved.
- *
- ******************************************************************************
- *
- *  FILE NAME : platform.h
- *
- *   Date        Name        Description
- *   05/13/98    nos         Creation (content moved here from ptypes.h).
- *   03/02/99    stephen     Added AS400 support.
- *   03/30/99    stephen     Added Linux support.
- *   04/13/99    stephen     Reworked for autoconf.
- ******************************************************************************
- */
-
- /**
-  * \file
-  * \brief Configuration constants for the Windows platform
-  */
-  
-/* Define the platform we're on. */
-#ifndef U_WINDOWS
-#define U_WINDOWS
-#endif
-
-#if defined(__BORLANDC__)
-#define U_HAVE_PLACEMENT_NEW 0
-#define U_HAVE_INTTYPES_H 1
-#define __STDC_CONSTANT_MACROS
-#endif
-
-/* _MSC_VER is used to detect the Microsoft compiler. */
-#if defined(_MSC_VER)
-#define U_INT64_IS_LONG_LONG 0
-#else
-#define U_INT64_IS_LONG_LONG 1
-#endif
-
-/* Define whether inttypes.h is available */
-#ifndef U_HAVE_INTTYPES_H
-#define U_HAVE_INTTYPES_H 0
-#endif
-
-/*
- * Define what support for C++ streams is available.
- *     If U_IOSTREAM_SOURCE is set to 199711, then <iostream> is available
- * (1997711 is the date the ISO/IEC C++ FDIS was published), and then
- * one should qualify streams using the std namespace in ICU header
- * files.
- *     If U_IOSTREAM_SOURCE is set to 198506, then <iostream.h> is
- * available instead (198506 is the date when Stroustrup published
- * "An Extensible I/O Facility for C++" at the summer USENIX conference).
- *     If U_IOSTREAM_SOURCE is 0, then C++ streams are not available and
- * support for them will be silently suppressed in ICU.
- *
- */
-
-#ifndef U_IOSTREAM_SOURCE
-#define U_IOSTREAM_SOURCE 199711
-#endif
-
-/* Determines whether specific types are available */
-#ifndef U_HAVE_INT8_T
-#define U_HAVE_INT8_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_UINT8_T
-#define U_HAVE_UINT8_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_INT16_T
-#define U_HAVE_INT16_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_UINT16_T
-#define U_HAVE_UINT16_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_INT32_T
-#define U_HAVE_INT32_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_UINT32_T
-#define U_HAVE_UINT32_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_INT64_T
-#define U_HAVE_INT64_T U_HAVE_INTTYPES_H
-#endif
-
-#ifndef U_HAVE_UINT64_T
-#define U_HAVE_UINT64_T U_HAVE_INTTYPES_H
-#endif
-
-/* Define 64 bit limits */
-#if !U_INT64_IS_LONG_LONG
-# ifndef INT64_C
-#  define INT64_C(x) ((int64_t)x)
-# endif
-# ifndef UINT64_C
-#  define UINT64_C(x) ((uint64_t)x)
-# endif
-/* else use the umachine.h definition */
-#endif
-
-/*===========================================================================*/
-/* Generic data types                                                        */
-/*===========================================================================*/
-
-/* If your platform does not have the <inttypes.h> header, you may
-   need to edit the typedefs below. */
-#if U_HAVE_INTTYPES_H
-#include <inttypes.h>
-#else /* U_HAVE_INTTYPES_H */
-
-#if ! U_HAVE_INT8_T
-typedef signed char int8_t;
-#endif
-
-#if ! U_HAVE_UINT8_T
-typedef unsigned char uint8_t;
-#endif
-
-#if ! U_HAVE_INT16_T
-typedef signed short int16_t;
-#endif
-
-#if ! U_HAVE_UINT16_T
-typedef unsigned short uint16_t;
-#endif
-
-#if ! U_HAVE_INT32_T
-typedef signed int int32_t;
-#endif
-
-#if ! U_HAVE_UINT32_T
-typedef unsigned int uint32_t;
-#endif
-
-#if ! U_HAVE_INT64_T
-#if U_INT64_IS_LONG_LONG
-    typedef signed long long int64_t;
-#else
-    typedef signed __int64 int64_t;
-#endif
-#endif
-
-#if ! U_HAVE_UINT64_T
-#if U_INT64_IS_LONG_LONG
-    typedef unsigned long long uint64_t;
-#else
-    typedef unsigned __int64 uint64_t;
-#endif
-#endif
-#endif
-
-/*===========================================================================*/
-/* Compiler and environment features                                         */
-/*===========================================================================*/
-
-/* Define whether namespace is supported */
-#ifndef U_HAVE_NAMESPACE
-#define U_HAVE_NAMESPACE 1
-#endif
-
-/* Determines the endianness of the platform */
-#define U_IS_BIG_ENDIAN 0
-
-/* 1 or 0 to enable or disable threads.  If undefined, default is: enable threads. */
-#define ICU_USE_THREADS 1
-
-/* On strong memory model CPUs (e.g. x86 CPUs), we use a safe & quick double check mutex lock. */
-/*
-Microsoft can define _M_IX86, _M_AMD64 (before Visual Studio 8) or _M_X64 (starting in Visual Studio 8). 
-Intel can define _M_IX86 or _M_X64
-*/
-#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
-#define UMTX_STRONG_MEMORY_MODEL 1
-#endif
-
-#ifndef U_DEBUG
-#ifdef _DEBUG
-#define U_DEBUG 1
-#else
-#define U_DEBUG 0
-#endif
-#endif
-
-#ifndef U_RELEASE
-#ifdef NDEBUG
-#define U_RELEASE 1
-#else
-#define U_RELEASE 0
-#endif
-#endif
-
-/* Determine whether to disable renaming or not. This overrides the
-   setting in umachine.h which is for all platforms. */
-#ifndef U_DISABLE_RENAMING
-#define U_DISABLE_RENAMING 0
-#endif
-
-/* Determine whether to override new and delete. */
-#ifndef U_OVERRIDE_CXX_ALLOCATION
-#define U_OVERRIDE_CXX_ALLOCATION 1
-#endif
-/* Determine whether to override placement new and delete for STL. */
-#ifndef U_HAVE_PLACEMENT_NEW
-#define U_HAVE_PLACEMENT_NEW 1
-#endif
-/* Determine whether to override new and delete for MFC. */
-#if !defined(U_HAVE_DEBUG_LOCATION_NEW) && defined(_MSC_VER)
-#define U_HAVE_DEBUG_LOCATION_NEW 1
-#endif
-
-/* Determine whether to enable tracing. */
-#ifndef U_ENABLE_TRACING
-#define U_ENABLE_TRACING 0
-#endif
-
-/* Do we allow ICU users to use the draft APIs by default? */
-#ifndef U_DEFAULT_SHOW_DRAFT
-#define U_DEFAULT_SHOW_DRAFT 1
-#endif
-
-/* Define the library suffix in a C syntax. */
-#define U_HAVE_LIB_SUFFIX 0
-#define U_LIB_SUFFIX_C_NAME 
-#define U_LIB_SUFFIX_C_NAME_STRING ""
-
-/*===========================================================================*/
-/* Information about wchar support                                           */
-/*===========================================================================*/
-
-#define U_HAVE_WCHAR_H 1
-#define U_SIZEOF_WCHAR_T 2
-
-#define U_HAVE_WCSCPY 1
-
-/**
- * \def U_DECLARE_UTF16
- * Do not use this macro. Use the UNICODE_STRING or U_STRING_DECL macros
- * instead.
- * @internal
- */
-#if 1
-#define U_DECLARE_UTF16(string) L ## string
-#endif
-
-/*===========================================================================*/
-/* Information about POSIX support                                           */
-/*===========================================================================*/
-
-#if 1
-#define U_TZSET         _tzset
-#endif
-#if 1
-#define U_TIMEZONE      _timezone
-#endif
-#if 1
-#define U_TZNAME        _tzname
-#endif
-#if 1
-#define U_DAYLIGHT      _daylight
-#endif
-
-#define U_HAVE_MMAP 0
-#define U_HAVE_POPEN 0
-
-/*===========================================================================*/
-/* Symbol import-export control                                              */
-/*===========================================================================*/
-
-#ifdef U_STATIC_IMPLEMENTATION
-#define U_EXPORT
-#else
-#define U_EXPORT __declspec(dllexport)
-#endif
-#define U_EXPORT2 __cdecl
-#define U_IMPORT __declspec(dllimport)
-
-/*===========================================================================*/
-/* Code alignment and C function inlining                                    */
-/*===========================================================================*/
-
-#ifndef U_INLINE
-#   ifdef __cplusplus
-#       define U_INLINE inline
-#   else
-#       define U_INLINE __inline
-#   endif
-#endif
-
-#if defined(_MSC_VER) && defined(_M_IX86) && !defined(_MANAGED)
-#define U_ALIGN_CODE(val)    __asm      align val
-#else
-#define U_ALIGN_CODE(val)
-#endif
-
-
-/*===========================================================================*/
-/* Programs used by ICU code                                                 */
-/*===========================================================================*/
-
-#ifndef U_MAKE
-#define U_MAKE  "nmake"
-#define U_MAKE_IS_NMAKE 1
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/pwin32.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/pwin32.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/pwin32.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/pwin32.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,311 @@
+/*
+ ******************************************************************************
+ *
+ *   Copyright (C) 1997-2007, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *
+ ******************************************************************************
+ *
+ *  FILE NAME : platform.h
+ *
+ *   Date        Name        Description
+ *   05/13/98    nos         Creation (content moved here from ptypes.h).
+ *   03/02/99    stephen     Added AS400 support.
+ *   03/30/99    stephen     Added Linux support.
+ *   04/13/99    stephen     Reworked for autoconf.
+ ******************************************************************************
+ */
+
+ /**
+  * \file
+  * \brief Configuration constants for the Windows platform
+  */
+  
+/* Define the platform we're on. */
+#ifndef U_WINDOWS
+#define U_WINDOWS
+#endif
+
+#if defined(__BORLANDC__)
+#define U_HAVE_PLACEMENT_NEW 0
+#define U_HAVE_INTTYPES_H 1
+#define __STDC_CONSTANT_MACROS
+#endif
+
+/* _MSC_VER is used to detect the Microsoft compiler. */
+#if defined(_MSC_VER)
+#define U_INT64_IS_LONG_LONG 0
+#else
+#define U_INT64_IS_LONG_LONG 1
+#endif
+
+/* Define whether inttypes.h is available */
+#ifndef U_HAVE_INTTYPES_H
+#define U_HAVE_INTTYPES_H 0
+#endif
+
+/*
+ * Define what support for C++ streams is available.
+ *     If U_IOSTREAM_SOURCE is set to 199711, then <iostream> is available
+ * (1997711 is the date the ISO/IEC C++ FDIS was published), and then
+ * one should qualify streams using the std namespace in ICU header
+ * files.
+ *     If U_IOSTREAM_SOURCE is set to 198506, then <iostream.h> is
+ * available instead (198506 is the date when Stroustrup published
+ * "An Extensible I/O Facility for C++" at the summer USENIX conference).
+ *     If U_IOSTREAM_SOURCE is 0, then C++ streams are not available and
+ * support for them will be silently suppressed in ICU.
+ *
+ */
+
+#ifndef U_IOSTREAM_SOURCE
+#define U_IOSTREAM_SOURCE 199711
+#endif
+
+/* Determines whether specific types are available */
+#ifndef U_HAVE_INT8_T
+#define U_HAVE_INT8_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_UINT8_T
+#define U_HAVE_UINT8_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_INT16_T
+#define U_HAVE_INT16_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_UINT16_T
+#define U_HAVE_UINT16_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_INT32_T
+#define U_HAVE_INT32_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_UINT32_T
+#define U_HAVE_UINT32_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_INT64_T
+#define U_HAVE_INT64_T U_HAVE_INTTYPES_H
+#endif
+
+#ifndef U_HAVE_UINT64_T
+#define U_HAVE_UINT64_T U_HAVE_INTTYPES_H
+#endif
+
+/* Define 64 bit limits */
+#if !U_INT64_IS_LONG_LONG
+# ifndef INT64_C
+#  define INT64_C(x) ((int64_t)x)
+# endif
+# ifndef UINT64_C
+#  define UINT64_C(x) ((uint64_t)x)
+# endif
+/* else use the umachine.h definition */
+#endif
+
+/*===========================================================================*/
+/* Generic data types                                                        */
+/*===========================================================================*/
+
+/* If your platform does not have the <inttypes.h> header, you may
+   need to edit the typedefs below. */
+#if U_HAVE_INTTYPES_H
+#include <inttypes.h>
+#else /* U_HAVE_INTTYPES_H */
+
+#if ! U_HAVE_INT8_T
+typedef signed char int8_t;
+#endif
+
+#if ! U_HAVE_UINT8_T
+typedef unsigned char uint8_t;
+#endif
+
+#if ! U_HAVE_INT16_T
+typedef signed short int16_t;
+#endif
+
+#if ! U_HAVE_UINT16_T
+typedef unsigned short uint16_t;
+#endif
+
+#if ! U_HAVE_INT32_T
+typedef signed int int32_t;
+#endif
+
+#if ! U_HAVE_UINT32_T
+typedef unsigned int uint32_t;
+#endif
+
+#if ! U_HAVE_INT64_T
+#if U_INT64_IS_LONG_LONG
+    typedef signed long long int64_t;
+#else
+    typedef signed __int64 int64_t;
+#endif
+#endif
+
+#if ! U_HAVE_UINT64_T
+#if U_INT64_IS_LONG_LONG
+    typedef unsigned long long uint64_t;
+#else
+    typedef unsigned __int64 uint64_t;
+#endif
+#endif
+#endif
+
+/*===========================================================================*/
+/* Compiler and environment features                                         */
+/*===========================================================================*/
+
+/* Define whether namespace is supported */
+#ifndef U_HAVE_NAMESPACE
+#define U_HAVE_NAMESPACE 1
+#endif
+
+/* Determines the endianness of the platform */
+#define U_IS_BIG_ENDIAN 0
+
+/* 1 or 0 to enable or disable threads.  If undefined, default is: enable threads. */
+#define ICU_USE_THREADS 1
+
+/* On strong memory model CPUs (e.g. x86 CPUs), we use a safe & quick double check mutex lock. */
+/*
+Microsoft can define _M_IX86, _M_AMD64 (before Visual Studio 8) or _M_X64 (starting in Visual Studio 8). 
+Intel can define _M_IX86 or _M_X64
+*/
+#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+#define UMTX_STRONG_MEMORY_MODEL 1
+#endif
+
+#ifndef U_DEBUG
+#ifdef _DEBUG
+#define U_DEBUG 1
+#else
+#define U_DEBUG 0
+#endif
+#endif
+
+#ifndef U_RELEASE
+#ifdef NDEBUG
+#define U_RELEASE 1
+#else
+#define U_RELEASE 0
+#endif
+#endif
+
+/* Determine whether to disable renaming or not. This overrides the
+   setting in umachine.h which is for all platforms. */
+#ifndef U_DISABLE_RENAMING
+#define U_DISABLE_RENAMING 0
+#endif
+
+/* Determine whether to override new and delete. */
+#ifndef U_OVERRIDE_CXX_ALLOCATION
+#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+/* Determine whether to override placement new and delete for STL. */
+#ifndef U_HAVE_PLACEMENT_NEW
+#define U_HAVE_PLACEMENT_NEW 1
+#endif
+/* Determine whether to override new and delete for MFC. */
+#if !defined(U_HAVE_DEBUG_LOCATION_NEW) && defined(_MSC_VER)
+#define U_HAVE_DEBUG_LOCATION_NEW 1
+#endif
+
+/* Determine whether to enable tracing. */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 0
+#endif
+
+/* Do we allow ICU users to use the draft APIs by default? */
+#ifndef U_DEFAULT_SHOW_DRAFT
+#define U_DEFAULT_SHOW_DRAFT 1
+#endif
+
+/* Define the library suffix in a C syntax. */
+#define U_HAVE_LIB_SUFFIX 0
+#define U_LIB_SUFFIX_C_NAME 
+#define U_LIB_SUFFIX_C_NAME_STRING ""
+
+/*===========================================================================*/
+/* Information about wchar support                                           */
+/*===========================================================================*/
+
+#define U_HAVE_WCHAR_H 1
+#define U_SIZEOF_WCHAR_T 2
+
+#define U_HAVE_WCSCPY 1
+
+/**
+ * \def U_DECLARE_UTF16
+ * Do not use this macro. Use the UNICODE_STRING or U_STRING_DECL macros
+ * instead.
+ * @internal
+ */
+#if 1
+#define U_DECLARE_UTF16(string) L ## string
+#endif
+
+/*===========================================================================*/
+/* Information about POSIX support                                           */
+/*===========================================================================*/
+
+#if 1
+#define U_TZSET         _tzset
+#endif
+#if 1
+#define U_TIMEZONE      _timezone
+#endif
+#if 1
+#define U_TZNAME        _tzname
+#endif
+#if 1
+#define U_DAYLIGHT      _daylight
+#endif
+
+#define U_HAVE_MMAP 0
+#define U_HAVE_POPEN 0
+
+/*===========================================================================*/
+/* Symbol import-export control                                              */
+/*===========================================================================*/
+
+#ifdef U_STATIC_IMPLEMENTATION
+#define U_EXPORT
+#else
+#define U_EXPORT __declspec(dllexport)
+#endif
+#define U_EXPORT2 __cdecl
+#define U_IMPORT __declspec(dllimport)
+
+/*===========================================================================*/
+/* Code alignment and C function inlining                                    */
+/*===========================================================================*/
+
+#ifndef U_INLINE
+#   ifdef __cplusplus
+#       define U_INLINE inline
+#   else
+#       define U_INLINE __inline
+#   endif
+#endif
+
+#if defined(_MSC_VER) && defined(_M_IX86) && !defined(_MANAGED)
+#define U_ALIGN_CODE(val)    __asm      align val
+#else
+#define U_ALIGN_CODE(val)
+#endif
+
+
+/*===========================================================================*/
+/* Programs used by ICU code                                                 */
+/*===========================================================================*/
+
+#ifndef U_MAKE
+#define U_MAKE  "nmake"
+#define U_MAKE_IS_NMAKE 1
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/rbbi.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/rbbi.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/rbbi.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,722 +0,0 @@
-/*
-***************************************************************************
-*   Copyright (C) 1999-2008 International Business Machines Corporation   *
-*   and others. All rights reserved.                                      *
-***************************************************************************
-
-**********************************************************************
-*   Date        Name        Description
-*   10/22/99    alan        Creation.
-*   11/11/99    rgillam     Complete port from Java.
-**********************************************************************
-*/
-
-#ifndef RBBI_H
-#define RBBI_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief C++ API: Rule Based Break Iterator
- */
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/brkiter.h"
-#include "unicode/udata.h"
-#include "unicode/parseerr.h"
-#include "unicode/schriter.h"
-#include "unicode/uchriter.h"
-
-
-struct UTrie;
-
-U_NAMESPACE_BEGIN
-
-/** @internal */
-struct RBBIDataHeader;
-class  RuleBasedBreakIteratorTables;
-class  BreakIterator;
-class  RBBIDataWrapper;
-class  UStack;
-class  LanguageBreakEngine;
-class  UnhandledEngine;
-struct RBBIStateTable;
-
-
-
-
-/**
- *
- * A subclass of BreakIterator whose behavior is specified using a list of rules.
- * <p>Instances of this class are most commonly created by the factory methods of
- *  BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc.,
- *  and then used via the abstract API in class BreakIterator</p>
- *
- * <p>See the ICU User Guide for information on Break Iterator Rules.</p>
- *
- * <p>This class is not intended to be subclassed.  (Class DictionaryBasedBreakIterator
- *    is a subclass, but that relationship is effectively internal to the ICU
- *    implementation.  The subclassing interface to RulesBasedBreakIterator is
- *    not part of the ICU API, and may not remain stable.</p>
- *
- */
-class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
-
-protected:
-    /**
-     * The UText through which this BreakIterator accesses the text
-     * @internal
-     */
-    UText  *fText;
-
-    /**
-     *   A character iterator that refers to the same text as the UText, above.
-     *   Only included for compatibility with old API, which was based on CharacterIterators.
-     *   Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
-     */
-    CharacterIterator  *fCharIter;
-
-    /**
-     *   When the input text is provided by a UnicodeString, this will point to
-     *    a characterIterator that wraps that data.  Needed only for the
-     *    implementation of getText(), a backwards compatibility issue.
-     */
-    StringCharacterIterator *fSCharIter;
-
-    /**
-     *  When the input text is provided by a UText, this
-     *    dummy CharacterIterator over an empty string will
-     *    be returned from getText()
-     */
-    UCharCharacterIterator *fDCharIter;
-
-    /**
-     * The rule data for this BreakIterator instance
-     * @internal
-     */
-    RBBIDataWrapper    *fData;
-
-    /** Index of the Rule {tag} values for the most recent match.
-     *  @internal
-    */
-    int32_t             fLastRuleStatusIndex;
-
-    /**
-     * Rule tag value valid flag.
-     * Some iterator operations don't intrinsically set the correct tag value.
-     * This flag lets us lazily compute the value if we are ever asked for it.
-     * @internal
-     */
-    UBool               fLastStatusIndexValid;
-
-    /**
-     * Counter for the number of characters encountered with the "dictionary"
-     *   flag set.
-     * @internal
-     */
-    uint32_t            fDictionaryCharCount;
-
-    /**
-     * When a range of characters is divided up using the dictionary, the break
-     * positions that are discovered are stored here, preventing us from having
-     * to use either the dictionary or the state table again until the iterator
-     * leaves this range of text. Has the most impact for line breaking.
-     * @internal
-     */
-    int32_t*            fCachedBreakPositions;
-
-    /**
-     * The number of elements in fCachedBreakPositions
-     * @internal
-     */
-    int32_t             fNumCachedBreakPositions;
-
-    /**
-     * if fCachedBreakPositions is not null, this indicates which item in the
-     * cache the current iteration position refers to
-     * @internal
-     */
-    int32_t             fPositionInCache;
-    
-    /**
-     *
-     * If present, UStack of LanguageBreakEngine objects that might handle
-     * dictionary characters. Searched from top to bottom to find an object to
-     * handle a given character.
-     * @internal
-     */
-    UStack              *fLanguageBreakEngines;
-    
-    /**
-     *
-     * If present, the special LanguageBreakEngine used for handling
-     * characters that are in the dictionary set, but not handled by any
-     * LangugageBreakEngine.
-     * @internal
-     */
-    UnhandledEngine     *fUnhandledBreakEngine;
-    
-    /**
-     *
-     * The type of the break iterator, or -1 if it has not been set.
-     * @internal
-     */
-    int32_t             fBreakType;
-    
-protected:
-    //=======================================================================
-    // constructors
-    //=======================================================================
-
-    /**
-     * Constant to be used in the constructor
-     * RuleBasedBreakIterator(RBBIDataHeader*, EDontAdopt, UErrorCode &);
-     * which does not adopt the memory indicated by the RBBIDataHeader*
-     * parameter.
-     *
-     * @internal
-     */
-    enum EDontAdopt {
-        kDontAdopt
-    };
-
-    /**
-     * Constructor from a flattened set of RBBI data in malloced memory.
-     *             RulesBasedBreakIterators built from a custom set of rules
-     *             are created via this constructor; the rules are compiled
-     *             into memory, then the break iterator is constructed here.
-     *
-     *             The break iterator adopts the memory, and will
-     *             free it when done.
-     * @internal
-     */
-    RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
-
-    /**
-     * Constructor from a flattened set of RBBI data in memory which need not
-     *             be malloced (e.g. it may be a memory-mapped file, etc.).
-     *
-     *             This version does not adopt the memory, and does not
-     *             free it when done.
-     * @internal
-     */
-    RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
-
-
-    friend class RBBIRuleBuilder;
-    /** @internal */
-    friend class BreakIterator;
-
-
-
-public:
-
-    /** Default constructor.  Creates an empty shell of an iterator, with no
-     *  rules or text to iterate over.   Object can subsequently be assigned to.
-     *  @stable ICU 2.2
-     */
-    RuleBasedBreakIterator();
-
-    /**
-     * Copy constructor.  Will produce a break iterator with the same behavior,
-     * and which iterates over the same text, as the one passed in.
-     * @param that The RuleBasedBreakIterator passed to be copied
-     * @stable ICU 2.0
-     */
-    RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
-
-    /**
-     * Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
-     * @param rules The break rules to be used.
-     * @param parseError  In the event of a syntax error in the rules, provides the location
-     *                    within the rules of the problem.
-     * @param status Information on any errors encountered.
-     * @stable ICU 2.2
-     */
-    RuleBasedBreakIterator( const UnicodeString    &rules,
-                             UParseError           &parseError,
-                             UErrorCode            &status);
-
-
-    /**
-     * This constructor uses the udata interface to create a BreakIterator
-     * whose internal tables live in a memory-mapped file.  "image" is an
-     * ICU UDataMemory handle for the pre-compiled break iterator tables.
-     * @param image handle to the memory image for the break iterator data.
-     *        Ownership of the UDataMemory handle passes to the Break Iterator,
-     *        which will be responsible for closing it when it is no longer needed.
-     * @param status Information on any errors encountered.
-     * @see udata_open
-     * @see #getBinaryRules
-     * @stable ICU 2.8
-     */
-    RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
-
-    /**
-     * Destructor
-     *  @stable ICU 2.0
-     */
-    virtual ~RuleBasedBreakIterator();
-
-    /**
-     * Assignment operator.  Sets this iterator to have the same behavior,
-     * and iterate over the same text, as the one passed in.
-     * @param that The RuleBasedBreakItertor passed in
-     * @return the newly created RuleBasedBreakIterator
-     *  @stable ICU 2.0
-     */
-    RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
-
-    /**
-     * Equality operator.  Returns TRUE if both BreakIterators are of the
-     * same class, have the same behavior, and iterate over the same text.
-     * @param that The BreakIterator to be compared for equality
-     * @return TRUE if both BreakIterators are of the
-     * same class, have the same behavior, and iterate over the same text.
-     *  @stable ICU 2.0
-     */
-    virtual UBool operator==(const BreakIterator& that) const;
-
-    /**
-     * Not-equal operator.  If operator== returns TRUE, this returns FALSE,
-     * and vice versa.
-     * @param that The BreakIterator to be compared for inequality
-     * @return TRUE if both BreakIterators are not same.
-     *  @stable ICU 2.0
-     */
-    UBool operator!=(const BreakIterator& that) const;
-
-    /**
-     * Returns a newly-constructed RuleBasedBreakIterator with the same
-     * behavior, and iterating over the same text, as this one.
-     * Differs from the copy constructor in that it is polymorphic, and
-     * will correctly clone (copy) a derived class.
-     * clone() is thread safe.  Multiple threads may simultaeneously
-     * clone the same source break iterator.
-     * @return a newly-constructed RuleBasedBreakIterator
-     * @stable ICU 2.0
-     */
-    virtual BreakIterator* clone() const;
-
-    /**
-     * Compute a hash code for this BreakIterator
-     * @return A hash code
-     *  @stable ICU 2.0
-     */
-    virtual int32_t hashCode(void) const;
-
-    /**
-     * Returns the description used to create this iterator
-     * @return the description used to create this iterator
-     *  @stable ICU 2.0
-     */
-    virtual const UnicodeString& getRules(void) const;
-
-    //=======================================================================
-    // BreakIterator overrides
-    //=======================================================================
-
-    /**
-     * <p>
-     * Return a CharacterIterator over the text being analyzed.
-     * The returned character iterator is owned by the break iterator, and must
-     * not be deleted by the caller.  Repeated calls to this function may
-     * return the same CharacterIterator.
-     * </p>
-     * <p>
-     * The returned character iterator must not be used concurrently with
-     * the break iterator.  If concurrent operation is needed, clone the
-     * returned character iterator first and operate on the clone.
-     * </p>
-     * <p>
-     * When the break iterator is operating on text supplied via a UText,
-     * this function will fail.  Lacking any way to signal failures, it
-     * returns an CharacterIterator containing no text.
-     * The function getUText() provides similar functionality,
-     * is reliable, and is more efficient.
-     * </p>
-     *
-     * TODO:  deprecate this function?
-     *
-     * @return An iterator over the text being analyzed.
-     * @stable ICU 2.0
-     */
-    virtual  CharacterIterator& getText(void) const;
-
-
-    /**
-      *  Get a UText for the text being analyzed.
-      *  The returned UText is a shallow clone of the UText used internally
-      *  by the break iterator implementation.  It can safely be used to
-      *  access the text without impacting any break iterator operations,
-      *  but the underlying text itself must not be altered.
-      *
-      * @param fillIn A UText to be filled in.  If NULL, a new UText will be
-      *           allocated to hold the result.
-      * @param status receives any error codes.
-      * @return   The current UText for this break iterator.  If an input
-      *           UText was provided, it will always be returned.
-      * @stable ICU 3.4
-      */
-     virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
-
-    /**
-     * Set the iterator to analyze a new piece of text.  This function resets
-     * the current iteration position to the beginning of the text.
-     * @param newText An iterator over the text to analyze.  The BreakIterator
-     * takes ownership of the character iterator.  The caller MUST NOT delete it!
-     *  @stable ICU 2.0
-     */
-    virtual void adoptText(CharacterIterator* newText);
-
-    /**
-     * Set the iterator to analyze a new piece of text.  This function resets
-     * the current iteration position to the beginning of the text.
-     * @param newText The text to analyze.
-     *  @stable ICU 2.0
-     */
-    virtual void setText(const UnicodeString& newText);
-
-    /**
-     * Reset the break iterator to operate over the text represented by
-     * the UText.  The iterator position is reset to the start.
-     *
-     * This function makes a shallow clone of the supplied UText.  This means
-     * that the caller is free to immediately close or otherwise reuse the
-     * Utext that was passed as a parameter, but that the underlying text itself
-     * must not be altered while being referenced by the break iterator.
-     *
-     * @param text    The UText used to change the text.
-     * @param status  Receives any error codes.
-     * @stable ICU 3.4
-     */
-    virtual void  setText(UText *text, UErrorCode &status);
-
-    /**
-     * Sets the current iteration position to the beginning of the text.
-     * @return The offset of the beginning of the text.
-     *  @stable ICU 2.0
-     */
-    virtual int32_t first(void);
-
-    /**
-     * Sets the current iteration position to the end of the text.
-     * @return The text's past-the-end offset.
-     *  @stable ICU 2.0
-     */
-    virtual int32_t last(void);
-
-    /**
-     * Advances the iterator either forward or backward the specified number of steps.
-     * Negative values move backward, and positive values move forward.  This is
-     * equivalent to repeatedly calling next() or previous().
-     * @param n The number of steps to move.  The sign indicates the direction
-     * (negative is backwards, and positive is forwards).
-     * @return The character offset of the boundary position n boundaries away from
-     * the current one.
-     *  @stable ICU 2.0
-     */
-    virtual int32_t next(int32_t n);
-
-    /**
-     * Advances the iterator to the next boundary position.
-     * @return The position of the first boundary after this one.
-     *  @stable ICU 2.0
-     */
-    virtual int32_t next(void);
-
-    /**
-     * Moves the iterator backwards, to the last boundary preceding this one.
-     * @return The position of the last boundary position preceding this one.
-     *  @stable ICU 2.0
-     */
-    virtual int32_t previous(void);
-
-    /**
-     * Sets the iterator to refer to the first boundary position following
-     * the specified position.
-     * @param offset The position from which to begin searching for a break position.
-     * @return The position of the first break after the current position.
-     *  @stable ICU 2.0
-     */
-    virtual int32_t following(int32_t offset);
-
-    /**
-     * Sets the iterator to refer to the last boundary position before the
-     * specified position.
-     * @param offset The position to begin searching for a break from.
-     * @return The position of the last boundary before the starting position.
-     *  @stable ICU 2.0
-     */
-    virtual int32_t preceding(int32_t offset);
-
-    /**
-     * Returns true if the specfied position is a boundary position.  As a side
-     * effect, leaves the iterator pointing to the first boundary position at
-     * or after "offset".
-     * @param offset the offset to check.
-     * @return True if "offset" is a boundary position.
-     *  @stable ICU 2.0
-     */
-    virtual UBool isBoundary(int32_t offset);
-
-    /**
-     * Returns the current iteration position.
-     * @return The current iteration position.
-     * @stable ICU 2.0
-     */
-    virtual int32_t current(void) const;
-
-
-    /**
-     * Return the status tag from the break rule that determined the most recently
-     * returned break position.  For break rules that do not specify a
-     * status, a default value of 0 is returned.  If more than one break rule
-     * would cause a boundary to be located at some position in the text,
-     * the numerically largest of the applicable status values is returned.
-     * <p>
-     * Of the standard types of ICU break iterators, only word break and
-     * line break provide status values.  The values are defined in
-     * the header file ubrk.h.  For Word breaks, the status allows distinguishing between words
-     * that contain alphabetic letters, "words" that appear to be numbers,
-     * punctuation and spaces, words containing ideographic characters, and
-     * more.  For Line Break, the status distinguishes between hard (mandatory) breaks
-     * and soft (potential) break positions.
-     * <p>
-     * <code>getRuleStatus()</code> can be called after obtaining a boundary
-     * position from <code>next()</code>, <code>previous()</code>, or
-     * any other break iterator functions that returns a boundary position.
-     * <p>
-     * When creating custom break rules, one is free to define whatever
-     * status values may be convenient for the application.
-     * <p>
-     * Note: this function is not thread safe.  It should not have been
-     *       declared const, and the const remains only for compatibility
-     *       reasons.  (The function is logically const, but not bit-wise const).
-     * <p>
-     * @return the status from the break rule that determined the most recently
-     * returned break position.
-     *
-     * @see UWordBreak
-     * @stable ICU 2.2
-     */
-    virtual int32_t getRuleStatus() const;
-
-   /**
-    * Get the status (tag) values from the break rule(s) that determined the most
-    * recently returned break position.
-    * <p>
-    * The returned status value(s) are stored into an array provided by the caller.
-    * The values are stored in sorted (ascending) order.
-    * If the capacity of the output array is insufficient to hold the data,
-    *  the output will be truncated to the available length, and a
-    *  U_BUFFER_OVERFLOW_ERROR will be signaled.
-    *
-    * @param fillInVec an array to be filled in with the status values.
-    * @param capacity  the length of the supplied vector.  A length of zero causes
-    *                  the function to return the number of status values, in the
-    *                  normal way, without attemtping to store any values.
-    * @param status    receives error codes.
-    * @return          The number of rule status values from rules that determined
-    *                  the most recent boundary returned by the break iterator.
-    *                  In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
-    *                  is the total number of status values that were available,
-    *                  not the reduced number that were actually returned.
-    * @see getRuleStatus
-    * @stable ICU 3.0
-    */
-    virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
-     * This method is to implement a simple version of RTTI, since not all
-     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
-     * clone() methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-
-    /**
-     * Returns the class ID for this class.  This is useful only for
-     * comparing to a return value from getDynamicClassID().  For example:
-     *
-     *      Base* polymorphic_pointer = createPolymorphicObject();
-     *      if (polymorphic_pointer->getDynamicClassID() ==
-     *          Derived::getStaticClassID()) ...
-     *
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /*
-     * Create a clone (copy) of this break iterator in memory provided
-     *  by the caller.  The idea is to increase performance by avoiding
-     *  a storage allocation.  Use of this functoin is NOT RECOMMENDED.
-     *  Performance gains are minimal, and correct buffer management is
-     *  tricky.  Use clone() instead.
-     *
-     * @param stackBuffer  The pointer to the memory into which the cloned object
-     *                     should be placed.  If NULL,  allocate heap memory
-     *                     for the cloned object.
-     * @param BufferSize   The size of the buffer.  If zero, return the required
-     *                     buffer size, but do not clone the object.  If the
-     *                     size was too small (but not zero), allocate heap
-     *                     storage for the cloned object.
-     *
-     * @param status       Error status.  U_SAFECLONE_ALLOCATED_WARNING will be
-     *                     returned if the the provided buffer was too small, and
-     *                     the clone was therefore put on the heap.
-     *
-     * @return  Pointer to the clone object.  This may differ from the stackBuffer
-     *          address if the byte alignment of the stack buffer was not suitable
-     *          or if the stackBuffer was too small to hold the clone.
-     * @stable ICU 2.0
-     */
-    virtual BreakIterator *  createBufferClone(void *stackBuffer,
-                                               int32_t &BufferSize,
-                                               UErrorCode &status);
-
-
-    /**
-     * Return the binary form of compiled break rules,
-     * which can then be used to create a new break iterator at some
-     * time in the future.  Creating a break iterator from pre-compiled rules
-     * is much faster than building one from the source form of the
-     * break rules.
-     *
-     * The binary data can only be used with the same version of ICU
-     *  and on the same platform type (processor endian-ness)
-     *
-     * @param length Returns the length of the binary data.  (Out paramter.)
-     *
-     * @return   A pointer to the binary (compiled) rule data.  The storage
-     *           belongs to the RulesBasedBreakIterator object, not the
-     *           caller, and must not be modified or deleted.
-     * @internal
-     */
-    virtual const uint8_t *getBinaryRules(uint32_t &length);
-
-
-protected:
-    //=======================================================================
-    // implementation
-    //=======================================================================
-    /**
-     * Dumps caches and performs other actions associated with a complete change
-     * in text or iteration position.
-     * @internal
-     */
-    virtual void reset(void);
-
-#if 0
-    /**
-      * Return true if the category lookup for this char
-      * indicates that it is in the set of dictionary lookup chars.
-      * This function is intended for use by dictionary based break iterators.
-      * @return true if the category lookup for this char
-      * indicates that it is in the set of dictionary lookup chars.
-      * @internal
-      */
-    virtual UBool isDictionaryChar(UChar32);
-
-    /**
-      * Get the type of the break iterator.
-      * @internal
-      */
-    virtual int32_t getBreakType() const;
-#endif
-
-    /**
-      * Set the type of the break iterator.
-      * @internal
-      */
-    virtual void setBreakType(int32_t type);
-
-    /**
-      * Common initialization function, used by constructors and bufferClone.
-      *   (Also used by DictionaryBasedBreakIterator::createBufferClone().)
-      * @internal
-      */
-    void init();
-
-private:
-
-    /**
-     * This method backs the iterator back up to a "safe position" in the text.
-     * This is a position that we know, without any context, must be a break position.
-     * The various calling methods then iterate forward from this safe position to
-     * the appropriate position to return.  (For more information, see the description
-     * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
-     * @param statetable state table used of moving backwards
-     * @internal
-     */
-    int32_t handlePrevious(const RBBIStateTable *statetable);
-
-    /**
-     * This method is the actual implementation of the next() method.  All iteration
-     * vectors through here.  This method initializes the state machine to state 1
-     * and advances through the text character by character until we reach the end
-     * of the text or the state machine transitions to state 0.  We update our return
-     * value every time the state machine passes through a possible end state.
-     * @param statetable state table used of moving forwards
-     * @internal
-     */
-    int32_t handleNext(const RBBIStateTable *statetable);
-
-protected:
-
-    /**
-     * This is the function that actually implements dictionary-based
-     * breaking.  Covering at least the range from startPos to endPos,
-     * it checks for dictionary characters, and if it finds them determines
-     * the appropriate object to deal with them. It may cache found breaks in
-     * fCachedBreakPositions as it goes. It may well also look at text outside
-     * the range startPos to endPos.
-     * If going forward, endPos is the normal Unicode break result, and
-     * if goind in reverse, startPos is the normal Unicode break result
-     * @param startPos  The start position of a range of text
-     * @param endPos    The end position of a range of text
-     * @param reverse   The call is for the reverse direction
-     * @internal
-     */
-    int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
-
-private:
-
-    /**
-     * This function returns the appropriate LanguageBreakEngine for a
-     * given character c.
-     * @param c         A character in the dictionary set
-     * @internal
-     */
-    const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
-
-    /**
-     *  @internal
-     */
-    void makeRuleStatusValid();
-
-};
-
-//------------------------------------------------------------------------------
-//
-//   Inline Functions Definitions ...
-//
-//------------------------------------------------------------------------------
-
-inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
-    return !operator==(that);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/rbbi.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/rbbi.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/rbbi.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/rbbi.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,722 @@
+/*
+***************************************************************************
+*   Copyright (C) 1999-2008 International Business Machines Corporation   *
+*   and others. All rights reserved.                                      *
+***************************************************************************
+
+**********************************************************************
+*   Date        Name        Description
+*   10/22/99    alan        Creation.
+*   11/11/99    rgillam     Complete port from Java.
+**********************************************************************
+*/
+
+#ifndef RBBI_H
+#define RBBI_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: Rule Based Break Iterator
+ */
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/udata.h"
+#include "unicode/parseerr.h"
+#include "unicode/schriter.h"
+#include "unicode/uchriter.h"
+
+
+struct UTrie;
+
+U_NAMESPACE_BEGIN
+
+/** @internal */
+struct RBBIDataHeader;
+class  RuleBasedBreakIteratorTables;
+class  BreakIterator;
+class  RBBIDataWrapper;
+class  UStack;
+class  LanguageBreakEngine;
+class  UnhandledEngine;
+struct RBBIStateTable;
+
+
+
+
+/**
+ *
+ * A subclass of BreakIterator whose behavior is specified using a list of rules.
+ * <p>Instances of this class are most commonly created by the factory methods of
+ *  BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc.,
+ *  and then used via the abstract API in class BreakIterator</p>
+ *
+ * <p>See the ICU User Guide for information on Break Iterator Rules.</p>
+ *
+ * <p>This class is not intended to be subclassed.  (Class DictionaryBasedBreakIterator
+ *    is a subclass, but that relationship is effectively internal to the ICU
+ *    implementation.  The subclassing interface to RulesBasedBreakIterator is
+ *    not part of the ICU API, and may not remain stable.</p>
+ *
+ */
+class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
+
+protected:
+    /**
+     * The UText through which this BreakIterator accesses the text
+     * @internal
+     */
+    UText  *fText;
+
+    /**
+     *   A character iterator that refers to the same text as the UText, above.
+     *   Only included for compatibility with old API, which was based on CharacterIterators.
+     *   Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
+     */
+    CharacterIterator  *fCharIter;
+
+    /**
+     *   When the input text is provided by a UnicodeString, this will point to
+     *    a characterIterator that wraps that data.  Needed only for the
+     *    implementation of getText(), a backwards compatibility issue.
+     */
+    StringCharacterIterator *fSCharIter;
+
+    /**
+     *  When the input text is provided by a UText, this
+     *    dummy CharacterIterator over an empty string will
+     *    be returned from getText()
+     */
+    UCharCharacterIterator *fDCharIter;
+
+    /**
+     * The rule data for this BreakIterator instance
+     * @internal
+     */
+    RBBIDataWrapper    *fData;
+
+    /** Index of the Rule {tag} values for the most recent match.
+     *  @internal
+    */
+    int32_t             fLastRuleStatusIndex;
+
+    /**
+     * Rule tag value valid flag.
+     * Some iterator operations don't intrinsically set the correct tag value.
+     * This flag lets us lazily compute the value if we are ever asked for it.
+     * @internal
+     */
+    UBool               fLastStatusIndexValid;
+
+    /**
+     * Counter for the number of characters encountered with the "dictionary"
+     *   flag set.
+     * @internal
+     */
+    uint32_t            fDictionaryCharCount;
+
+    /**
+     * When a range of characters is divided up using the dictionary, the break
+     * positions that are discovered are stored here, preventing us from having
+     * to use either the dictionary or the state table again until the iterator
+     * leaves this range of text. Has the most impact for line breaking.
+     * @internal
+     */
+    int32_t*            fCachedBreakPositions;
+
+    /**
+     * The number of elements in fCachedBreakPositions
+     * @internal
+     */
+    int32_t             fNumCachedBreakPositions;
+
+    /**
+     * if fCachedBreakPositions is not null, this indicates which item in the
+     * cache the current iteration position refers to
+     * @internal
+     */
+    int32_t             fPositionInCache;
+    
+    /**
+     *
+     * If present, UStack of LanguageBreakEngine objects that might handle
+     * dictionary characters. Searched from top to bottom to find an object to
+     * handle a given character.
+     * @internal
+     */
+    UStack              *fLanguageBreakEngines;
+    
+    /**
+     *
+     * If present, the special LanguageBreakEngine used for handling
+     * characters that are in the dictionary set, but not handled by any
+     * LangugageBreakEngine.
+     * @internal
+     */
+    UnhandledEngine     *fUnhandledBreakEngine;
+    
+    /**
+     *
+     * The type of the break iterator, or -1 if it has not been set.
+     * @internal
+     */
+    int32_t             fBreakType;
+    
+protected:
+    //=======================================================================
+    // constructors
+    //=======================================================================
+
+    /**
+     * Constant to be used in the constructor
+     * RuleBasedBreakIterator(RBBIDataHeader*, EDontAdopt, UErrorCode &);
+     * which does not adopt the memory indicated by the RBBIDataHeader*
+     * parameter.
+     *
+     * @internal
+     */
+    enum EDontAdopt {
+        kDontAdopt
+    };
+
+    /**
+     * Constructor from a flattened set of RBBI data in malloced memory.
+     *             RulesBasedBreakIterators built from a custom set of rules
+     *             are created via this constructor; the rules are compiled
+     *             into memory, then the break iterator is constructed here.
+     *
+     *             The break iterator adopts the memory, and will
+     *             free it when done.
+     * @internal
+     */
+    RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
+
+    /**
+     * Constructor from a flattened set of RBBI data in memory which need not
+     *             be malloced (e.g. it may be a memory-mapped file, etc.).
+     *
+     *             This version does not adopt the memory, and does not
+     *             free it when done.
+     * @internal
+     */
+    RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
+
+
+    friend class RBBIRuleBuilder;
+    /** @internal */
+    friend class BreakIterator;
+
+
+
+public:
+
+    /** Default constructor.  Creates an empty shell of an iterator, with no
+     *  rules or text to iterate over.   Object can subsequently be assigned to.
+     *  @stable ICU 2.2
+     */
+    RuleBasedBreakIterator();
+
+    /**
+     * Copy constructor.  Will produce a break iterator with the same behavior,
+     * and which iterates over the same text, as the one passed in.
+     * @param that The RuleBasedBreakIterator passed to be copied
+     * @stable ICU 2.0
+     */
+    RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
+
+    /**
+     * Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
+     * @param rules The break rules to be used.
+     * @param parseError  In the event of a syntax error in the rules, provides the location
+     *                    within the rules of the problem.
+     * @param status Information on any errors encountered.
+     * @stable ICU 2.2
+     */
+    RuleBasedBreakIterator( const UnicodeString    &rules,
+                             UParseError           &parseError,
+                             UErrorCode            &status);
+
+
+    /**
+     * This constructor uses the udata interface to create a BreakIterator
+     * whose internal tables live in a memory-mapped file.  "image" is an
+     * ICU UDataMemory handle for the pre-compiled break iterator tables.
+     * @param image handle to the memory image for the break iterator data.
+     *        Ownership of the UDataMemory handle passes to the Break Iterator,
+     *        which will be responsible for closing it when it is no longer needed.
+     * @param status Information on any errors encountered.
+     * @see udata_open
+     * @see #getBinaryRules
+     * @stable ICU 2.8
+     */
+    RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
+
+    /**
+     * Destructor
+     *  @stable ICU 2.0
+     */
+    virtual ~RuleBasedBreakIterator();
+
+    /**
+     * Assignment operator.  Sets this iterator to have the same behavior,
+     * and iterate over the same text, as the one passed in.
+     * @param that The RuleBasedBreakItertor passed in
+     * @return the newly created RuleBasedBreakIterator
+     *  @stable ICU 2.0
+     */
+    RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
+
+    /**
+     * Equality operator.  Returns TRUE if both BreakIterators are of the
+     * same class, have the same behavior, and iterate over the same text.
+     * @param that The BreakIterator to be compared for equality
+     * @return TRUE if both BreakIterators are of the
+     * same class, have the same behavior, and iterate over the same text.
+     *  @stable ICU 2.0
+     */
+    virtual UBool operator==(const BreakIterator& that) const;
+
+    /**
+     * Not-equal operator.  If operator== returns TRUE, this returns FALSE,
+     * and vice versa.
+     * @param that The BreakIterator to be compared for inequality
+     * @return TRUE if both BreakIterators are not same.
+     *  @stable ICU 2.0
+     */
+    UBool operator!=(const BreakIterator& that) const;
+
+    /**
+     * Returns a newly-constructed RuleBasedBreakIterator with the same
+     * behavior, and iterating over the same text, as this one.
+     * Differs from the copy constructor in that it is polymorphic, and
+     * will correctly clone (copy) a derived class.
+     * clone() is thread safe.  Multiple threads may simultaeneously
+     * clone the same source break iterator.
+     * @return a newly-constructed RuleBasedBreakIterator
+     * @stable ICU 2.0
+     */
+    virtual BreakIterator* clone() const;
+
+    /**
+     * Compute a hash code for this BreakIterator
+     * @return A hash code
+     *  @stable ICU 2.0
+     */
+    virtual int32_t hashCode(void) const;
+
+    /**
+     * Returns the description used to create this iterator
+     * @return the description used to create this iterator
+     *  @stable ICU 2.0
+     */
+    virtual const UnicodeString& getRules(void) const;
+
+    //=======================================================================
+    // BreakIterator overrides
+    //=======================================================================
+
+    /**
+     * <p>
+     * Return a CharacterIterator over the text being analyzed.
+     * The returned character iterator is owned by the break iterator, and must
+     * not be deleted by the caller.  Repeated calls to this function may
+     * return the same CharacterIterator.
+     * </p>
+     * <p>
+     * The returned character iterator must not be used concurrently with
+     * the break iterator.  If concurrent operation is needed, clone the
+     * returned character iterator first and operate on the clone.
+     * </p>
+     * <p>
+     * When the break iterator is operating on text supplied via a UText,
+     * this function will fail.  Lacking any way to signal failures, it
+     * returns an CharacterIterator containing no text.
+     * The function getUText() provides similar functionality,
+     * is reliable, and is more efficient.
+     * </p>
+     *
+     * TODO:  deprecate this function?
+     *
+     * @return An iterator over the text being analyzed.
+     * @stable ICU 2.0
+     */
+    virtual  CharacterIterator& getText(void) const;
+
+
+    /**
+      *  Get a UText for the text being analyzed.
+      *  The returned UText is a shallow clone of the UText used internally
+      *  by the break iterator implementation.  It can safely be used to
+      *  access the text without impacting any break iterator operations,
+      *  but the underlying text itself must not be altered.
+      *
+      * @param fillIn A UText to be filled in.  If NULL, a new UText will be
+      *           allocated to hold the result.
+      * @param status receives any error codes.
+      * @return   The current UText for this break iterator.  If an input
+      *           UText was provided, it will always be returned.
+      * @stable ICU 3.4
+      */
+     virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
+
+    /**
+     * Set the iterator to analyze a new piece of text.  This function resets
+     * the current iteration position to the beginning of the text.
+     * @param newText An iterator over the text to analyze.  The BreakIterator
+     * takes ownership of the character iterator.  The caller MUST NOT delete it!
+     *  @stable ICU 2.0
+     */
+    virtual void adoptText(CharacterIterator* newText);
+
+    /**
+     * Set the iterator to analyze a new piece of text.  This function resets
+     * the current iteration position to the beginning of the text.
+     * @param newText The text to analyze.
+     *  @stable ICU 2.0
+     */
+    virtual void setText(const UnicodeString& newText);
+
+    /**
+     * Reset the break iterator to operate over the text represented by
+     * the UText.  The iterator position is reset to the start.
+     *
+     * This function makes a shallow clone of the supplied UText.  This means
+     * that the caller is free to immediately close or otherwise reuse the
+     * Utext that was passed as a parameter, but that the underlying text itself
+     * must not be altered while being referenced by the break iterator.
+     *
+     * @param text    The UText used to change the text.
+     * @param status  Receives any error codes.
+     * @stable ICU 3.4
+     */
+    virtual void  setText(UText *text, UErrorCode &status);
+
+    /**
+     * Sets the current iteration position to the beginning of the text.
+     * @return The offset of the beginning of the text.
+     *  @stable ICU 2.0
+     */
+    virtual int32_t first(void);
+
+    /**
+     * Sets the current iteration position to the end of the text.
+     * @return The text's past-the-end offset.
+     *  @stable ICU 2.0
+     */
+    virtual int32_t last(void);
+
+    /**
+     * Advances the iterator either forward or backward the specified number of steps.
+     * Negative values move backward, and positive values move forward.  This is
+     * equivalent to repeatedly calling next() or previous().
+     * @param n The number of steps to move.  The sign indicates the direction
+     * (negative is backwards, and positive is forwards).
+     * @return The character offset of the boundary position n boundaries away from
+     * the current one.
+     *  @stable ICU 2.0
+     */
+    virtual int32_t next(int32_t n);
+
+    /**
+     * Advances the iterator to the next boundary position.
+     * @return The position of the first boundary after this one.
+     *  @stable ICU 2.0
+     */
+    virtual int32_t next(void);
+
+    /**
+     * Moves the iterator backwards, to the last boundary preceding this one.
+     * @return The position of the last boundary position preceding this one.
+     *  @stable ICU 2.0
+     */
+    virtual int32_t previous(void);
+
+    /**
+     * Sets the iterator to refer to the first boundary position following
+     * the specified position.
+     * @param offset The position from which to begin searching for a break position.
+     * @return The position of the first break after the current position.
+     *  @stable ICU 2.0
+     */
+    virtual int32_t following(int32_t offset);
+
+    /**
+     * Sets the iterator to refer to the last boundary position before the
+     * specified position.
+     * @param offset The position to begin searching for a break from.
+     * @return The position of the last boundary before the starting position.
+     *  @stable ICU 2.0
+     */
+    virtual int32_t preceding(int32_t offset);
+
+    /**
+     * Returns true if the specfied position is a boundary position.  As a side
+     * effect, leaves the iterator pointing to the first boundary position at
+     * or after "offset".
+     * @param offset the offset to check.
+     * @return True if "offset" is a boundary position.
+     *  @stable ICU 2.0
+     */
+    virtual UBool isBoundary(int32_t offset);
+
+    /**
+     * Returns the current iteration position.
+     * @return The current iteration position.
+     * @stable ICU 2.0
+     */
+    virtual int32_t current(void) const;
+
+
+    /**
+     * Return the status tag from the break rule that determined the most recently
+     * returned break position.  For break rules that do not specify a
+     * status, a default value of 0 is returned.  If more than one break rule
+     * would cause a boundary to be located at some position in the text,
+     * the numerically largest of the applicable status values is returned.
+     * <p>
+     * Of the standard types of ICU break iterators, only word break and
+     * line break provide status values.  The values are defined in
+     * the header file ubrk.h.  For Word breaks, the status allows distinguishing between words
+     * that contain alphabetic letters, "words" that appear to be numbers,
+     * punctuation and spaces, words containing ideographic characters, and
+     * more.  For Line Break, the status distinguishes between hard (mandatory) breaks
+     * and soft (potential) break positions.
+     * <p>
+     * <code>getRuleStatus()</code> can be called after obtaining a boundary
+     * position from <code>next()</code>, <code>previous()</code>, or
+     * any other break iterator functions that returns a boundary position.
+     * <p>
+     * When creating custom break rules, one is free to define whatever
+     * status values may be convenient for the application.
+     * <p>
+     * Note: this function is not thread safe.  It should not have been
+     *       declared const, and the const remains only for compatibility
+     *       reasons.  (The function is logically const, but not bit-wise const).
+     * <p>
+     * @return the status from the break rule that determined the most recently
+     * returned break position.
+     *
+     * @see UWordBreak
+     * @stable ICU 2.2
+     */
+    virtual int32_t getRuleStatus() const;
+
+   /**
+    * Get the status (tag) values from the break rule(s) that determined the most
+    * recently returned break position.
+    * <p>
+    * The returned status value(s) are stored into an array provided by the caller.
+    * The values are stored in sorted (ascending) order.
+    * If the capacity of the output array is insufficient to hold the data,
+    *  the output will be truncated to the available length, and a
+    *  U_BUFFER_OVERFLOW_ERROR will be signaled.
+    *
+    * @param fillInVec an array to be filled in with the status values.
+    * @param capacity  the length of the supplied vector.  A length of zero causes
+    *                  the function to return the number of status values, in the
+    *                  normal way, without attemtping to store any values.
+    * @param status    receives error codes.
+    * @return          The number of rule status values from rules that determined
+    *                  the most recent boundary returned by the break iterator.
+    *                  In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
+    *                  is the total number of status values that were available,
+    *                  not the reduced number that were actually returned.
+    * @see getRuleStatus
+    * @stable ICU 3.0
+    */
+    virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
+     * This method is to implement a simple version of RTTI, since not all
+     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
+     * clone() methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+    /**
+     * Returns the class ID for this class.  This is useful only for
+     * comparing to a return value from getDynamicClassID().  For example:
+     *
+     *      Base* polymorphic_pointer = createPolymorphicObject();
+     *      if (polymorphic_pointer->getDynamicClassID() ==
+     *          Derived::getStaticClassID()) ...
+     *
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /*
+     * Create a clone (copy) of this break iterator in memory provided
+     *  by the caller.  The idea is to increase performance by avoiding
+     *  a storage allocation.  Use of this functoin is NOT RECOMMENDED.
+     *  Performance gains are minimal, and correct buffer management is
+     *  tricky.  Use clone() instead.
+     *
+     * @param stackBuffer  The pointer to the memory into which the cloned object
+     *                     should be placed.  If NULL,  allocate heap memory
+     *                     for the cloned object.
+     * @param BufferSize   The size of the buffer.  If zero, return the required
+     *                     buffer size, but do not clone the object.  If the
+     *                     size was too small (but not zero), allocate heap
+     *                     storage for the cloned object.
+     *
+     * @param status       Error status.  U_SAFECLONE_ALLOCATED_WARNING will be
+     *                     returned if the the provided buffer was too small, and
+     *                     the clone was therefore put on the heap.
+     *
+     * @return  Pointer to the clone object.  This may differ from the stackBuffer
+     *          address if the byte alignment of the stack buffer was not suitable
+     *          or if the stackBuffer was too small to hold the clone.
+     * @stable ICU 2.0
+     */
+    virtual BreakIterator *  createBufferClone(void *stackBuffer,
+                                               int32_t &BufferSize,
+                                               UErrorCode &status);
+
+
+    /**
+     * Return the binary form of compiled break rules,
+     * which can then be used to create a new break iterator at some
+     * time in the future.  Creating a break iterator from pre-compiled rules
+     * is much faster than building one from the source form of the
+     * break rules.
+     *
+     * The binary data can only be used with the same version of ICU
+     *  and on the same platform type (processor endian-ness)
+     *
+     * @param length Returns the length of the binary data.  (Out paramter.)
+     *
+     * @return   A pointer to the binary (compiled) rule data.  The storage
+     *           belongs to the RulesBasedBreakIterator object, not the
+     *           caller, and must not be modified or deleted.
+     * @internal
+     */
+    virtual const uint8_t *getBinaryRules(uint32_t &length);
+
+
+protected:
+    //=======================================================================
+    // implementation
+    //=======================================================================
+    /**
+     * Dumps caches and performs other actions associated with a complete change
+     * in text or iteration position.
+     * @internal
+     */
+    virtual void reset(void);
+
+#if 0
+    /**
+      * Return true if the category lookup for this char
+      * indicates that it is in the set of dictionary lookup chars.
+      * This function is intended for use by dictionary based break iterators.
+      * @return true if the category lookup for this char
+      * indicates that it is in the set of dictionary lookup chars.
+      * @internal
+      */
+    virtual UBool isDictionaryChar(UChar32);
+
+    /**
+      * Get the type of the break iterator.
+      * @internal
+      */
+    virtual int32_t getBreakType() const;
+#endif
+
+    /**
+      * Set the type of the break iterator.
+      * @internal
+      */
+    virtual void setBreakType(int32_t type);
+
+    /**
+      * Common initialization function, used by constructors and bufferClone.
+      *   (Also used by DictionaryBasedBreakIterator::createBufferClone().)
+      * @internal
+      */
+    void init();
+
+private:
+
+    /**
+     * This method backs the iterator back up to a "safe position" in the text.
+     * This is a position that we know, without any context, must be a break position.
+     * The various calling methods then iterate forward from this safe position to
+     * the appropriate position to return.  (For more information, see the description
+     * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
+     * @param statetable state table used of moving backwards
+     * @internal
+     */
+    int32_t handlePrevious(const RBBIStateTable *statetable);
+
+    /**
+     * This method is the actual implementation of the next() method.  All iteration
+     * vectors through here.  This method initializes the state machine to state 1
+     * and advances through the text character by character until we reach the end
+     * of the text or the state machine transitions to state 0.  We update our return
+     * value every time the state machine passes through a possible end state.
+     * @param statetable state table used of moving forwards
+     * @internal
+     */
+    int32_t handleNext(const RBBIStateTable *statetable);
+
+protected:
+
+    /**
+     * This is the function that actually implements dictionary-based
+     * breaking.  Covering at least the range from startPos to endPos,
+     * it checks for dictionary characters, and if it finds them determines
+     * the appropriate object to deal with them. It may cache found breaks in
+     * fCachedBreakPositions as it goes. It may well also look at text outside
+     * the range startPos to endPos.
+     * If going forward, endPos is the normal Unicode break result, and
+     * if goind in reverse, startPos is the normal Unicode break result
+     * @param startPos  The start position of a range of text
+     * @param endPos    The end position of a range of text
+     * @param reverse   The call is for the reverse direction
+     * @internal
+     */
+    int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
+
+private:
+
+    /**
+     * This function returns the appropriate LanguageBreakEngine for a
+     * given character c.
+     * @param c         A character in the dictionary set
+     * @internal
+     */
+    const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
+
+    /**
+     *  @internal
+     */
+    void makeRuleStatusValid();
+
+};
+
+//------------------------------------------------------------------------------
+//
+//   Inline Functions Definitions ...
+//
+//------------------------------------------------------------------------------
+
+inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
+    return !operator==(that);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/rbnf.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/rbnf.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/rbnf.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1049 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 1997-2006, International Business Machines Corporation and others.
-* All Rights Reserved.
-*******************************************************************************
-*/
-
-#ifndef RBNF_H
-#define RBNF_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Rule Based Number Format
- */
-
-/**
- * \def U_HAVE_RBNF
- * This will be 0 if RBNF support is not included in ICU
- * and 1 if it is.
- *
- * @stable ICU 2.4
- */
-#if UCONFIG_NO_FORMATTING
-#define U_HAVE_RBNF 0
-#else
-#define U_HAVE_RBNF 1
-
-#include "unicode/coll.h"
-#include "unicode/dcfmtsym.h"
-#include "unicode/fmtable.h"
-#include "unicode/locid.h"
-#include "unicode/numfmt.h"
-#include "unicode/unistr.h"
-#include "unicode/strenum.h"
-
-U_NAMESPACE_BEGIN
-
-class NFRuleSet;
-class LocalizationInfo;
-
-/**
- * Tags for the predefined rulesets.
- *
- * @stable ICU 2.2
- */
-enum URBNFRuleSetTag {
-    URBNF_SPELLOUT,
-    URBNF_ORDINAL,
-    URBNF_DURATION,
-    URBNF_COUNT
-};
-
-#if UCONFIG_NO_COLLATION
-class Collator;
-#endif
-
-/**
- * The RuleBasedNumberFormat class formats numbers according to a set of rules. This number formatter is
- * typically used for spelling out numeric values in words (e.g., 25,3476 as
- * &quot;twenty-five thousand three hundred seventy-six&quot; or &quot;vingt-cinq mille trois
- * cents soixante-seize&quot; or
- * &quot;f&uuml;nfundzwanzigtausenddreihundertsechsundsiebzig&quot;), but can also be used for
- * other complicated formatting tasks, such as formatting a number of seconds as hours,
- * minutes and seconds (e.g., 3,730 as &quot;1:02:10&quot;).
- *
- * <p>The resources contain three predefined formatters for each locale: spellout, which
- * spells out a value in words (123 is &quot;one hundred twenty-three&quot;); ordinal, which
- * appends an ordinal suffix to the end of a numeral (123 is &quot;123rd&quot;); and
- * duration, which shows a duration in seconds as hours, minutes, and seconds (123 is
- * &quot;2:03&quot;).&nbsp; The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s
- * by supplying programmer-defined rule sets.</p>
- *
- * <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description
- * that is either passed to the constructor as a <tt>String</tt> or loaded from a resource
- * bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
- * Each rule has a string of output text and a value or range of values it is applicable to.
- * In a typical spellout rule set, the first twenty rules are the words for the numbers from
- * 0 to 19:</p>
- *
- * <pre>zero; one; two; three; four; five; six; seven; eight; nine;
- * ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre>
- *
- * <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
- * we only have to supply the words for the multiples of 10:</p>
- *
- * <pre> 20: twenty[-&gt;&gt;];
- * 30: thirty[-&gt;&gt;];
- * 40: forty[-&gt;&gt;];
- * 50: fifty[-&gt;&gt;];
- * 60: sixty[-&gt;&gt;];
- * 70: seventy[-&gt;&gt;];
- * 80: eighty[-&gt;&gt;];
- * 90: ninety[-&gt;&gt;];</pre>
- *
- * <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the
- * rule's output text with a colon. The rules are in a sorted list, and a rule is applicable
- * to all numbers from its own base value to one less than the next rule's base value. The
- * &quot;&gt;&gt;&quot; token is called a <em>substitution</em> and tells the fomatter to
- * isolate the number's ones digit, format it using this same set of rules, and place the
- * result at the position of the &quot;&gt;&gt;&quot; token. Text in brackets is omitted if
- * the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24
- * is &quot;twenty-four,&quot; not &quot;twenty four&quot;).</p>
- *
- * <p>For even larger numbers, we can actually look up several parts of the number in the
- * list:</p>
- *
- * <pre>100: &lt;&lt; hundred[ &gt;&gt;];</pre>
- *
- * <p>The &quot;&lt;&lt;&quot; represents a new kind of substitution. The &lt;&lt; isolates
- * the hundreds digit (and any digits to its left), formats it using this same rule set, and
- * places the result where the &quot;&lt;&lt;&quot; was. Notice also that the meaning of
- * &gt;&gt; has changed: it now refers to both the tens and the ones digits. The meaning of
- * both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em>
- * which is the highest power of 10 that is less than or equal to the base value (the user
- * can change this). To fill in the substitutions, the formatter divides the number being
- * formatted by the divisor. The integral quotient is used to fill in the &lt;&lt;
- * substitution, and the remainder is used to fill in the &gt;&gt; substitution. The meaning
- * of the brackets changes similarly: text in brackets is omitted if the value being
- * formatted is an even multiple of the rule's divisor. The rules are applied recursively, so
- * if a substitution is filled in with text that includes another substitution, that
- * substitution is also filled in.</p>
- *
- * <p>This rule covers values up to 999, at which point we add another rule:</p>
- *
- * <pre>1000: &lt;&lt; thousand[ &gt;&gt;];</pre>
- *
- * <p>Again, the meanings of the brackets and substitution tokens shift because the rule's
- * base value is a higher power of 10, changing the rule's divisor. This rule can actually be
- * used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
- *
- * <pre> 1,000,000: &lt;&lt; million[ &gt;&gt;];
- * 1,000,000,000: &lt;&lt; billion[ &gt;&gt;];
- * 1,000,000,000,000: &lt;&lt; trillion[ &gt;&gt;];
- * 1,000,000,000,000,000: OUT OF RANGE!;</pre>
- *
- * <p>Commas, periods, and spaces can be used in the base values to improve legibility and
- * are ignored by the rule parser. The last rule in the list is customarily treated as an
- * &quot;overflow rule,&quot; applying to everything from its base value on up, and often (as
- * in this example) being used to print out an error message or default representation.
- * Notice also that the size of the major groupings in large numbers is controlled by the
- * spacing of the rules: because in English we group numbers by thousand, the higher rules
- * are separated from each other by a factor of 1,000.</p>
- *
- * <p>To see how these rules actually work in practice, consider the following example:
- * Formatting 25,430 with this rule set would work like this:</p>
- *
- * <table border="0" width="100%">
- *   <tr>
- *     <td><strong>&lt;&lt; thousand &gt;&gt;</strong></td>
- *     <td>[the rule whose base value is 1,000 is applicable to 25,340]</td>
- *   </tr>
- *   <tr>
- *     <td><strong>twenty-&gt;&gt;</strong> thousand &gt;&gt;</td>
- *     <td>[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
- *   </tr>
- *   <tr>
- *     <td>twenty-<strong>five</strong> thousand &gt;&gt;</td>
- *     <td>[25 mod 10 is 5. The rule for 5 is &quot;five.&quot;</td>
- *   </tr>
- *   <tr>
- *     <td>twenty-five thousand <strong>&lt;&lt; hundred &gt;&gt;</strong></td>
- *     <td>[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
- *   </tr>
- *   <tr>
- *     <td>twenty-five thousand <strong>three</strong> hundred &gt;&gt;</td>
- *     <td>[340 over 100 is 3. The rule for 3 is &quot;three.&quot;]</td>
- *   </tr>
- *   <tr>
- *     <td>twenty-five thousand three hundred <strong>forty</strong></td>
- *     <td>[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
- *     evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
- *   </tr>
- * </table>
- *
- * <p>The above syntax suffices only to format positive integers. To format negative numbers,
- * we add a special rule:</p>
- *
- * <pre>-x: minus &gt;&gt;;</pre>
- *
- * <p>This is called a <em>negative-number rule,</em> and is identified by &quot;-x&quot;
- * where the base value would be. This rule is used to format all negative numbers. the
- * &gt;&gt; token here means &quot;find the number's absolute value, format it with these
- * rules, and put the result here.&quot;</p>
- *
- * <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional
- * parts:</p>
- *
- * <pre>x.x: &lt;&lt; point &gt;&gt;;</pre>
- *
- * <p>This rule is used for all positive non-integers (negative non-integers pass through the
- * negative-number rule first and then through this rule). Here, the &lt;&lt; token refers to
- * the number's integral part, and the &gt;&gt; to the number's fractional part. The
- * fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be
- * formatted as &quot;one hundred twenty-three point four five six&quot;).</p>
- *
- * <p>To see how this rule syntax is applied to various languages, examine the resource data.</p>
- *
- * <p>There is actually much more flexibility built into the rule language than the
- * description above shows. A formatter may own multiple rule sets, which can be selected by
- * the caller, and which can use each other to fill in their substitutions. Substitutions can
- * also be filled in with digits, using a DecimalFormat object. There is syntax that can be
- * used to alter a rule's divisor in various ways. And there is provision for much more
- * flexible fraction handling. A complete description of the rule syntax follows:</p>
- *
- * <hr>
- *
- * <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule
- * sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule
- * set name must begin with a % sign. Rule sets with names that begin with a single % sign
- * are <em>public:</em> the caller can specify that they be used to format and parse numbers.
- * Rule sets with names that begin with %% are <em>private:</em> they exist only for the use
- * of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
- *
- * <p>The user can also specify a special &quot;rule set&quot; named <tt>%%lenient-parse</tt>.
- * The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt>
- * description which is used to define equivalences for lenient parsing. For more information
- * on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing,
- * see <tt>setLenientParse()</tt>.  <em>Note:</em> symbols that have syntactic meaning
- * in collation rules, such as '&amp;', have no particular meaning when appearing outside
- * of the <tt>lenient-parse</tt> rule set.</p>
- *
- * <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
- * Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
- * These parameters are controlled by the description syntax, which consists of a <em>rule
- * descriptor,</em> a colon, and a <em>rule body.</em></p>
- *
- * <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
- * name of a token):</p>
- *
- * <table border="0" width="100%">
- *   <tr>
- *     <td><em>bv</em>:</td>
- *     <td><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
- *     number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
- *     which are ignored. The rule's divisor is the highest power of 10 less than or equal to
- *     the base value.</td>
- *   </tr>
- *   <tr>
- *     <td><em>bv</em>/<em>rad</em>:</td>
- *     <td><em>bv</em> specifies the rule's base value. The rule's divisor is the
- *     highest power of <em>rad</em> less than or equal to the base value.</td>
- *   </tr>
- *   <tr>
- *     <td><em>bv</em>&gt;:</td>
- *     <td><em>bv</em> specifies the rule's base value. To calculate the divisor,
- *     let the radix be 10, and the exponent be the highest exponent of the radix that yields a
- *     result less than or equal to the base value. Every &gt; character after the base value
- *     decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
- *     raised to the power of the exponent; otherwise, the divisor is 1.</td>
- *   </tr>
- *   <tr>
- *     <td><em>bv</em>/<em>rad</em>&gt;:</td>
- *     <td><em>bv</em> specifies the rule's base value. To calculate the divisor,
- *     let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
- *     yields a result less than or equal to the base value. Every &gt; character after the radix
- *     decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
- *     raised to the power of the exponent; otherwise, the divisor is 1.</td>
- *   </tr>
- *   <tr>
- *     <td>-x:</td>
- *     <td>The rule is a negative-number rule.</td>
- *   </tr>
- *   <tr>
- *     <td>x.x:</td>
- *     <td>The rule is an <em>improper fraction rule.</em></td>
- *   </tr>
- *   <tr>
- *     <td>0.x:</td>
- *     <td>The rule is a <em>proper fraction rule.</em></td>
- *   </tr>
- *   <tr>
- *     <td>x.0:</td>
- *     <td>The rule is a <em>master rule.</em></td>
- *   </tr>
- *   <tr>
- *     <td><em>nothing</em></td>
- *     <td>If the rule's rule descriptor is left out, the base value is one plus the
- *     preceding rule's base value (or zero if this is the first rule in the list) in a normal
- *     rule set.&nbsp; In a fraction rule set, the base value is the same as the preceding rule's
- *     base value.</td>
- *   </tr>
- * </table>
- *
- * <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending
- * on whether it is used to format a number's integral part (or the whole number) or a
- * number's fractional part. Using a rule set to format a rule's fractional part makes it a
- * fraction rule set.</p>
- *
- * <p>Which rule is used to format a number is defined according to one of the following
- * algorithms: If the rule set is a regular rule set, do the following:
- *
- * <ul>
- *   <li>If the rule set includes a master rule (and the number was passed in as a <tt>double</tt>),
- *     use the master rule.&nbsp; (If the number being formatted was passed in as a <tt>long</tt>,
- *     the master rule is ignored.)</li>
- *   <li>If the number is negative, use the negative-number rule.</li>
- *   <li>If the number has a fractional part and is greater than 1, use the improper fraction
- *     rule.</li>
- *   <li>If the number has a fractional part and is between 0 and 1, use the proper fraction
- *     rule.</li>
- *   <li>Binary-search the rule list for the rule with the highest base value less than or equal
- *     to the number. If that rule has two substitutions, its base value is not an even multiple
- *     of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the
- *     rule that precedes it in the rule list. Otherwise, use the rule itself.</li>
- * </ul>
- *
- * <p>If the rule set is a fraction rule set, do the following:
- *
- * <ul>
- *   <li>Ignore negative-number and fraction rules.</li>
- *   <li>For each rule in the list, multiply the number being formatted (which will always be
- *     between 0 and 1) by the rule's base value. Keep track of the distance between the result
- *     the nearest integer.</li>
- *   <li>Use the rule that produced the result closest to zero in the above calculation. In the
- *     event of a tie or a direct hit, use the first matching rule encountered. (The idea here is
- *     to try each rule's base value as a possible denominator of a fraction. Whichever
- *     denominator produces the fraction closest in value to the number being formatted wins.) If
- *     the rule following the matching rule has the same base value, use it if the numerator of
- *     the fraction is anything other than 1; if the numerator is 1, use the original matching
- *     rule. (This is to allow singular and plural forms of the rule text without a lot of extra
- *     hassle.)</li>
- * </ul>
- *
- * <p>A rule's body consists of a string of characters terminated by a semicolon. The rule
- * may include zero, one, or two <em>substitution tokens,</em> and a range of text in
- * brackets. The brackets denote optional text (and may also include one or both
- * substitutions). The exact meanings of the substitution tokens, and under what conditions
- * optional text is omitted, depend on the syntax of the substitution token and the context.
- * The rest of the text in a rule body is literal text that is output when the rule matches
- * the number being formatted.</p>
- *
- * <p>A substitution token begins and ends with a <em>token character.</em> The token
- * character and the context together specify a mathematical operation to be performed on the
- * number being formatted. An optional <em>substitution descriptor </em>specifies how the
- * value resulting from that operation is used to fill in the substitution. The position of
- * the substitution token in the rule body specifies the location of the resultant text in
- * the original rule text.</p>
- *
- * <p>The meanings of the substitution token characters are as follows:</p>
- *
- * <table border="0" width="100%">
- *   <tr>
- *     <td>&gt;&gt;</td>
- *     <td>in normal rule</td>
- *     <td>Divide the number by the rule's divisor and format the remainder</td>
- *   </tr>
- *   <tr>
- *     <td></td>
- *     <td>in negative-number rule</td>
- *     <td>Find the absolute value of the number and format the result</td>
- *   </tr>
- *   <tr>
- *     <td></td>
- *     <td>in fraction or master rule</td>
- *     <td>Isolate the number's fractional part and format it.</td>
- *   </tr>
- *   <tr>
- *     <td></td>
- *     <td>in rule in fraction rule set</td>
- *     <td>Not allowed.</td>
- *   </tr>
- *   <tr>
- *     <td>&gt;&gt;&gt;</td>
- *     <td>in normal rule</td>
- *     <td>Divide the number by the rule's divisor and format the remainder,
- *       but bypass the normal rule-selection process and just use the
- *       rule that precedes this one in this rule list.</td>
- *   </tr>
- *   <tr>
- *     <td></td>
- *     <td>in all other rules</td>
- *     <td>Not allowed.</td>
- *   </tr>
- *   <tr>
- *     <td>&lt;&lt;</td>
- *     <td>in normal rule</td>
- *     <td>Divide the number by the rule's divisor and format the quotient</td>
- *   </tr>
- *   <tr>
- *     <td></td>
- *     <td>in negative-number rule</td>
- *     <td>Not allowed.</td>
- *   </tr>
- *   <tr>
- *     <td></td>
- *     <td>in fraction or master rule</td>
- *     <td>Isolate the number's integral part and format it.</td>
- *   </tr>
- *   <tr>
- *     <td></td>
- *     <td>in rule in fraction rule set</td>
- *     <td>Multiply the number by the rule's base value and format the result.</td>
- *   </tr>
- *   <tr>
- *     <td>==</td>
- *     <td>in all rule sets</td>
- *     <td>Format the number unchanged</td>
- *   </tr>
- *   <tr>
- *     <td>[]</td>
- *     <td>in normal rule</td>
- *     <td>Omit the optional text if the number is an even multiple of the rule's divisor</td>
- *   </tr>
- *   <tr>
- *     <td></td>
- *     <td>in negative-number rule</td>
- *     <td>Not allowed.</td>
- *   </tr>
- *   <tr>
- *     <td></td>
- *     <td>in improper-fraction rule</td>
- *     <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an
- *     x.x rule and a 0.x rule)</td>
- *   </tr>
- *   <tr>
- *     <td></td>
- *     <td>in master rule</td>
- *     <td>Omit the optional text if the number is an integer (same as specifying both an x.x
- *     rule and an x.0 rule)</td>
- *   </tr>
- *   <tr>
- *     <td></td>
- *     <td>in proper-fraction rule</td>
- *     <td>Not allowed.</td>
- *   </tr>
- *   <tr>
- *     <td></td>
- *     <td>in rule in fraction rule set</td>
- *     <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td>
- *   </tr>
- * </table>
- *
- * <p>The substitution descriptor (i.e., the text between the token characters) may take one
- * of three forms:</p>
- *
- * <table border="0" width="100%">
- *   <tr>
- *     <td>a rule set name</td>
- *     <td>Perform the mathematical operation on the number, and format the result using the
- *     named rule set.</td>
- *   </tr>
- *   <tr>
- *     <td>a DecimalFormat pattern</td>
- *     <td>Perform the mathematical operation on the number, and format the result using a
- *     DecimalFormat with the specified pattern.&nbsp; The pattern must begin with 0 or #.</td>
- *   </tr>
- *   <tr>
- *     <td>nothing</td>
- *     <td>Perform the mathematical operation on the number, and format the result using the rule
- *     set containing the current rule, except:
- *     <ul>
- *       <li>You can't have an empty substitution descriptor with a == substitution.</li>
- *       <li>If you omit the substitution descriptor in a &gt;&gt; substitution in a fraction rule,
- *         format the result one digit at a time using the rule set containing the current rule.</li>
- *       <li>If you omit the substitution descriptor in a &lt;&lt; substitution in a rule in a
- *         fraction rule set, format the result using the default rule set for this formatter.</li>
- *     </ul>
- *     </td>
- *   </tr>
- * </table>
- *
- * <p>Whitespace is ignored between a rule set name and a rule set body, between a rule
- * descriptor and a rule body, or between rules. If a rule body begins with an apostrophe,
- * the apostrophe is ignored, but all text after it becomes significant (this is how you can
- * have a rule's rule text begin with whitespace). There is no escape function: the semicolon
- * is not allowed in rule set names or in rule text, and the colon is not allowed in rule set
- * names. The characters beginning a substitution token are always treated as the beginning
- * of a substitution token.</p>
- *
- * <p>See the resource data and the demo program for annotated examples of real rule sets
- * using these features.</p>
- *
- * <p><em>User subclasses are not supported.</em> While clients may write
- * subclasses, such code will not necessarily work and will not be
- * guaranteed to work stably from release to release.
- *
- * <p><b>Localizations</b></p>
- * <p>Constructors are available that allow the specification of localizations for the
- * public rule sets (and also allow more control over what public rule sets are available).
- * Localization data is represented as a textual description.  The description represents
- * an array of arrays of string.  The first element is an array of the public rule set names,
- * each of these must be one of the public rule set names that appear in the rules.  Only
- * names in this array will be treated as public rule set names by the API.  Each subsequent
- * element is an array of localizations of these names.  The first element of one of these
- * subarrays is the locale name, and the remaining elements are localizations of the
- * public rule set names, in the same order as they were listed in the first arrray.</p>
- * <p>In the syntax, angle brackets '<', '>' are used to delimit the arrays, and comma ',' is used
- * to separate elements of an array.  Whitespace is ignored, unless quoted.</p>
- * <p>For example:<pre>
- * < < %foo, %bar, %baz >, 
- *   < en, Foo, Bar, Baz >, 
- *   < fr, 'le Foo', 'le Bar', 'le Baz' > 
- *   < zh, \\u7532, \\u4e59, \\u4e19 > >
- * </pre></p>
- * @author Richard Gillam
- * @see NumberFormat
- * @see DecimalFormat
- * @stable ICU 2.0
- */
-class U_I18N_API RuleBasedNumberFormat : public NumberFormat {
-public:
-
-  //-----------------------------------------------------------------------
-  // constructors
-  //-----------------------------------------------------------------------
-
-    /**
-     * Creates a RuleBasedNumberFormat that behaves according to the description
-     * passed in.  The formatter uses the default locale.
-     * @param rules A description of the formatter's desired behavior.
-     * See the class documentation for a complete explanation of the description
-     * syntax.
-     * @param perror The parse error if an error was encountered.
-     * @param status The status indicating whether the constructor succeeded.
-     * @stable ICU 3.2
-     */
-    RuleBasedNumberFormat(const UnicodeString& rules, UParseError& perror, UErrorCode& status);
-
-    /**
-     * Creates a RuleBasedNumberFormat that behaves according to the description
-     * passed in.  The formatter uses the default locale.  
-     * <p>
-     * The localizations data provides information about the public
-     * rule sets and their localized display names for different
-     * locales. The first element in the list is an array of the names
-     * of the public rule sets.  The first element in this array is
-     * the initial default ruleset.  The remaining elements in the
-     * list are arrays of localizations of the names of the public
-     * rule sets.  Each of these is one longer than the initial array,
-     * with the first String being the ULocale ID, and the remaining
-     * Strings being the localizations of the rule set names, in the
-     * same order as the initial array.  Arrays are NULL-terminated.
-     * @param rules A description of the formatter's desired behavior.
-     * See the class documentation for a complete explanation of the description
-     * syntax.
-     * @param localizations the localization information.
-     * names in the description.  These will be copied by the constructor.
-     * @param perror The parse error if an error was encountered.
-     * @param status The status indicating whether the constructor succeeded.
-     * @stable ICU 3.2
-     */
-    RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations,
-                        UParseError& perror, UErrorCode& status);
-
-  /**
-   * Creates a RuleBasedNumberFormat that behaves according to the rules
-   * passed in.  The formatter uses the specified locale to determine the
-   * characters to use when formatting numerals, and to define equivalences
-   * for lenient parsing.
-   * @param rules The formatter rules.
-   * See the class documentation for a complete explanation of the rule
-   * syntax.
-   * @param locale A locale that governs which characters are used for
-   * formatting values in numerals and which characters are equivalent in
-   * lenient parsing.
-   * @param perror The parse error if an error was encountered.
-   * @param status The status indicating whether the constructor succeeded.
-   * @stable ICU 2.0
-   */
-  RuleBasedNumberFormat(const UnicodeString& rules, const Locale& locale,
-                        UParseError& perror, UErrorCode& status);
-
-    /**
-     * Creates a RuleBasedNumberFormat that behaves according to the description
-     * passed in.  The formatter uses the default locale.  
-     * <p>
-     * The localizations data provides information about the public
-     * rule sets and their localized display names for different
-     * locales. The first element in the list is an array of the names
-     * of the public rule sets.  The first element in this array is
-     * the initial default ruleset.  The remaining elements in the
-     * list are arrays of localizations of the names of the public
-     * rule sets.  Each of these is one longer than the initial array,
-     * with the first String being the ULocale ID, and the remaining
-     * Strings being the localizations of the rule set names, in the
-     * same order as the initial array.  Arrays are NULL-terminated.
-     * @param rules A description of the formatter's desired behavior.
-     * See the class documentation for a complete explanation of the description
-     * syntax.
-     * @param localizations a list of localizations for the rule set
-     * names in the description.  These will be copied by the constructor.
-     * @param locale A locale that governs which characters are used for
-     * formatting values in numerals and which characters are equivalent in
-     * lenient parsing.
-     * @param perror The parse error if an error was encountered.
-     * @param status The status indicating whether the constructor succeeded.
-     * @stable ICU 3.2
-     */
-    RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations,
-                        const Locale& locale, UParseError& perror, UErrorCode& status);
-
-  /**
-   * Creates a RuleBasedNumberFormat from a predefined ruleset.  The selector
-   * code choosed among three possible predefined formats: spellout, ordinal,
-   * and duration.
-   * @param tag A selector code specifying which kind of formatter to create for that
-   * locale.  There are three legal values: URBNF_SPELLOUT, which creates a formatter that
-   * spells out a value in words in the desired language, URBNF_ORDINAL, which attaches
-   * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
-   * and URBNF_DURATION, which formats a duration in seconds as hours, minutes, and seconds.
-   * @param locale The locale for the formatter.
-   * @param status The status indicating whether the constructor succeeded.
-   * @stable ICU 2.0
-   */
-  RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& locale, UErrorCode& status);
-
-  //-----------------------------------------------------------------------
-  // boilerplate
-  //-----------------------------------------------------------------------
-
-  /**
-   * Copy constructor
-   * @param rhs    the object to be copied from.
-   * @stable ICU 2.6
-   */
-  RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs);
-
-  /**
-   * Assignment operator
-   * @param rhs    the object to be copied from.
-   * @stable ICU 2.6
-   */
-  RuleBasedNumberFormat& operator=(const RuleBasedNumberFormat& rhs);
-
-  /**
-   * Release memory allocated for a RuleBasedNumberFormat when you are finished with it.
-   * @stable ICU 2.6
-   */
-  virtual ~RuleBasedNumberFormat();
-
-  /**
-   * Clone this object polymorphically.  The caller is responsible
-   * for deleting the result when done.
-   * @return  A copy of the object.
-   * @stable ICU 2.6
-   */
-  virtual Format* clone(void) const;
-
-  /**
-   * Return true if the given Format objects are semantically equal.
-   * Objects of different subclasses are considered unequal.
-   * @param other    the object to be compared with.
-   * @return        true if the given Format objects are semantically equal.
-   * @stable ICU 2.6
-   */
-  virtual UBool operator==(const Format& other) const;
-
-//-----------------------------------------------------------------------
-// public API functions
-//-----------------------------------------------------------------------
-
-  /**
-   * return the rules that were provided to the RuleBasedNumberFormat.
-   * @return the result String that was passed in
-   * @stable ICU 2.0
-   */
-  virtual UnicodeString getRules() const;
-
-  /**
-   * Return the number of public rule set names.
-   * @return the number of public rule set names.
-   * @stable ICU 2.0
-   */
-  virtual int32_t getNumberOfRuleSetNames() const;
-
-  /**
-   * Return the name of the index'th public ruleSet.  If index is not valid,
-   * the function returns null.
-   * @param index the index of the ruleset
-   * @return the name of the index'th public ruleSet.
-   * @stable ICU 2.0
-   */
-  virtual UnicodeString getRuleSetName(int32_t index) const;
-
-  /**
-   * Return the number of locales for which we have localized rule set display names.
-   * @return the number of locales for which we have localized rule set display names.
-   * @stable ICU 3.2
-   */
-  virtual int32_t getNumberOfRuleSetDisplayNameLocales(void) const;
-
-  /**
-   * Return the index'th display name locale.
-   * @param index the index of the locale
-   * @param status set to a failure code when this function fails
-   * @return the locale
-   * @see #getNumberOfRuleSetDisplayNameLocales
-   * @stable ICU 3.2
-   */
-  virtual Locale getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const;
-
-    /**
-     * Return the rule set display names for the provided locale.  These are in the same order
-     * as those returned by getRuleSetName.  The locale is matched against the locales for
-     * which there is display name data, using normal fallback rules.  If no locale matches, 
-     * the default display names are returned.  (These are the internal rule set names minus
-     * the leading '%'.)
-     * @param index the index of the rule set
-     * @param locale the locale (returned by getRuleSetDisplayNameLocales) for which the localized
-     * display name is desired
-     * @return the display name for the given index, which might be bogus if there is an error
-     * @see #getRuleSetName
-     * @stable ICU 3.2
-     */
-  virtual UnicodeString getRuleSetDisplayName(int32_t index, 
-                          const Locale& locale = Locale::getDefault());
-
-    /**
-     * Return the rule set display name for the provided rule set and locale.  
-     * The locale is matched against the locales for which there is display name data, using
-     * normal fallback rules.  If no locale matches, the default display name is returned.
-     * @return the display name for the rule set
-     * @stable ICU 3.2
-     * @see #getRuleSetDisplayName
-     */
-  virtual UnicodeString getRuleSetDisplayName(const UnicodeString& ruleSetName, 
-                          const Locale& locale = Locale::getDefault());
-
-  /**
-   * Formats the specified 32-bit number using the default ruleset.
-   * @param number The number to format.
-   * @param toAppendTo the string that will hold the (appended) result
-   * @param pos the fieldposition
-   * @return A textual representation of the number.
-   * @stable ICU 2.0
-   */
-  virtual UnicodeString& format(int32_t number,
-                                UnicodeString& toAppendTo,
-                                FieldPosition& pos) const;
-
-  /**
-   * Formats the specified 64-bit number using the default ruleset.
-   * @param number The number to format.
-   * @param toAppendTo the string that will hold the (appended) result
-   * @param pos the fieldposition
-   * @return A textual representation of the number.
-   * @stable ICU 2.1
-   */
-  virtual UnicodeString& format(int64_t number,
-                                UnicodeString& toAppendTo,
-                                FieldPosition& pos) const;
-  /**
-   * Formats the specified number using the default ruleset.
-   * @param number The number to format.
-   * @param toAppendTo the string that will hold the (appended) result
-   * @param pos the fieldposition
-   * @return A textual representation of the number.
-   * @stable ICU 2.0
-   */
-  virtual UnicodeString& format(double number,
-                                UnicodeString& toAppendTo,
-                                FieldPosition& pos) const;
-
-  /**
-   * Formats the specified number using the named ruleset.
-   * @param number The number to format.
-   * @param ruleSetName The name of the rule set to format the number with.
-   * This must be the name of a valid public rule set for this formatter.
-   * @param toAppendTo the string that will hold the (appended) result
-   * @param pos the fieldposition
-   * @param status the status
-   * @return A textual representation of the number.
-   * @stable ICU 2.0
-   */
-  virtual UnicodeString& format(int32_t number,
-                                const UnicodeString& ruleSetName,
-                                UnicodeString& toAppendTo,
-                                FieldPosition& pos,
-                                UErrorCode& status) const;
-  /**
-   * Formats the specified 64-bit number using the named ruleset.
-   * @param number The number to format.
-   * @param ruleSetName The name of the rule set to format the number with.
-   * This must be the name of a valid public rule set for this formatter.
-   * @param toAppendTo the string that will hold the (appended) result
-   * @param pos the fieldposition
-   * @param status the status
-   * @return A textual representation of the number.
-   * @stable ICU 2.1
-   */
-  virtual UnicodeString& format(int64_t number,
-                                const UnicodeString& ruleSetName,
-                                UnicodeString& toAppendTo,
-                                FieldPosition& pos,
-                                UErrorCode& status) const;
-  /**
-   * Formats the specified number using the named ruleset.
-   * @param number The number to format.
-   * @param ruleSetName The name of the rule set to format the number with.
-   * This must be the name of a valid public rule set for this formatter.
-   * @param toAppendTo the string that will hold the (appended) result
-   * @param pos the fieldposition
-   * @param status the status
-   * @return A textual representation of the number.
-   * @stable ICU 2.0
-   */
-  virtual UnicodeString& format(double number,
-                                const UnicodeString& ruleSetName,
-                                UnicodeString& toAppendTo,
-                                FieldPosition& pos,
-                                UErrorCode& status) const;
-
-  /**
-   * Formats the specified number using the default ruleset.
-   * @param obj The number to format.
-   * @param toAppendTo the string that will hold the (appended) result
-   * @param pos the fieldposition
-   * @param status the status
-   * @return A textual representation of the number.
-   * @stable ICU 2.0
-   */
-  virtual UnicodeString& format(const Formattable& obj,
-                                UnicodeString& toAppendTo,
-                                FieldPosition& pos,
-                                UErrorCode& status) const;
-  /**
-   * Redeclared Format method.
-   * @param obj    the object to be formatted.
-   * @param result Output param which will receive the formatted string.
-   * @param status Output param set to success/failure code
-   * @return       A reference to 'result'.
-   * @stable ICU 2.0
-   */
-  UnicodeString& format(const Formattable& obj,
-                        UnicodeString& result,
-                        UErrorCode& status) const;
-
-  /**
-   * Redeclared NumberFormat method.
-   * @param number    the double value to be formatted.
-   * @param output    Output param which will receive the formatted string.
-   * @return          A reference to 'output'.
-   * @stable ICU 2.0
-   */
-   UnicodeString& format(double number,
-                         UnicodeString& output) const;
-
-  /**
-   * Redeclared NumberFormat method.
-   * @param number    the long value to be formatted.
-   * @param output    Output param which will receive the formatted string.
-   * @return          A reference to 'output'.
-   * @stable ICU 2.0
-   */
-   UnicodeString& format(int32_t number,
-                         UnicodeString& output) const;
-
-  /**
-   * Parses the specfied string, beginning at the specified position, according
-   * to this formatter's rules.  This will match the string against all of the
-   * formatter's public rule sets and return the value corresponding to the longest
-   * parseable substring.  This function's behavior is affected by the lenient
-   * parse mode.
-   * @param text The string to parse
-   * @param result the result of the parse, either a double or a long.
-   * @param parsePosition On entry, contains the position of the first character
-   * in "text" to examine.  On exit, has been updated to contain the position
-   * of the first character in "text" that wasn't consumed by the parse.
-   * @see #setLenient
-   * @stable ICU 2.0
-   */
-  virtual void parse(const UnicodeString& text,
-                     Formattable& result,
-                     ParsePosition& parsePosition) const;
-
-
-  /**
-   * Redeclared Format method.
-   * @param text   The string to parse
-   * @param result the result of the parse, either a double or a long.
-   * @param status Output param set to failure code when a problem occurs.
-   * @stable ICU 2.0
-   */
-  virtual inline void parse(const UnicodeString& text,
-                      Formattable& result,
-                      UErrorCode& status) const;
-
-#if !UCONFIG_NO_COLLATION
-
-  /**
-   * Turns lenient parse mode on and off.
-   *
-   * When in lenient parse mode, the formatter uses a Collator for parsing the text.
-   * Only primary differences are treated as significant.  This means that case
-   * differences, accent differences, alternate spellings of the same letter
-   * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
-   * matching the text.  In many cases, numerals will be accepted in place of words
-   * or phrases as well.
-   *
-   * For example, all of the following will correctly parse as 255 in English in
-   * lenient-parse mode:
-   * <br>"two hundred fifty-five"
-   * <br>"two hundred fifty five"
-   * <br>"TWO HUNDRED FIFTY-FIVE"
-   * <br>"twohundredfiftyfive"
-   * <br>"2 hundred fifty-5"
-   *
-   * The Collator used is determined by the locale that was
-   * passed to this object on construction.  The description passed to this object
-   * on construction may supply additional collation rules that are appended to the
-   * end of the default collator for the locale, enabling additional equivalences
-   * (such as adding more ignorable characters or permitting spelled-out version of
-   * symbols; see the demo program for examples).
-   *
-   * It's important to emphasize that even strict parsing is relatively lenient: it
-   * will accept some text that it won't produce as output.  In English, for example,
-   * it will correctly parse "two hundred zero" and "fifteen hundred".
-   *
-   * @param enabled If true, turns lenient-parse mode on; if false, turns it off.
-   * @see RuleBasedCollator
-   * @stable ICU 2.0
-   */
-  virtual void setLenient(UBool enabled);
-
-  /**
-   * Returns true if lenient-parse mode is turned on.  Lenient parsing is off
-   * by default.
-   * @return true if lenient-parse mode is turned on.
-   * @see #setLenient
-   * @stable ICU 2.0
-   */
-  virtual inline UBool isLenient(void) const;
-
-#endif
-
-  /**
-   * Override the default rule set to use.  If ruleSetName is null, reset
-   * to the initial default rule set.  If the rule set is not a public rule set name,
-   * U_ILLEGAL_ARGUMENT_ERROR is returned in status.
-   * @param ruleSetName the name of the rule set, or null to reset the initial default.
-   * @param status set to failure code when a problem occurs.
-   * @stable ICU 2.6
-   */
-  virtual void setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status);
-
-  /**
-   * Return the name of the current default rule set.  If the current rule set is
-   * not public, returns a bogus (and empty) UnicodeString.
-   * @return the name of the current default rule set
-   * @stable ICU 3.0
-   */
-  virtual UnicodeString getDefaultRuleSetName() const;
-
-public:
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.8
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.8
-     */
-    virtual UClassID getDynamicClassID(void) const;
-
-private:
-    RuleBasedNumberFormat(); // default constructor not implemented
-
-    // this will ref the localizations if they are not NULL
-    // caller must deref to get adoption 
-    RuleBasedNumberFormat(const UnicodeString& description, LocalizationInfo* localizations, 
-              const Locale& locale, UParseError& perror, UErrorCode& status);
-
-    void init(const UnicodeString& rules, LocalizationInfo* localizations, UParseError& perror, UErrorCode& status);
-    void dispose();
-    void stripWhitespace(UnicodeString& src);
-    void initDefaultRuleSet();
-    void format(double number, NFRuleSet& ruleSet);
-    NFRuleSet* findRuleSet(const UnicodeString& name, UErrorCode& status) const;
-
-    /* friend access */
-    friend class NFSubstitution;
-    friend class NFRule;
-    friend class FractionalPartSubstitution;
-
-    inline NFRuleSet * getDefaultRuleSet() const;
-    Collator * getCollator() const;
-    DecimalFormatSymbols * getDecimalFormatSymbols() const;
-
-private:
-    NFRuleSet **ruleSets;
-    NFRuleSet *defaultRuleSet;
-    Locale locale;
-    Collator* collator;
-    DecimalFormatSymbols* decimalFormatSymbols;
-    UBool lenient;
-    UnicodeString* lenientParseRules;
-    LocalizationInfo* localizations;
-};
-
-// ---------------
-
-inline UnicodeString&
-RuleBasedNumberFormat::format(const Formattable& obj,
-                              UnicodeString& result,
-                              UErrorCode& status) const
-{
-    // Don't use Format:: - use immediate base class only,
-    // in case immediate base modifies behavior later.
-    // dlf - the above comment is bogus, if there were a reason to modify
-    // it, it would be virtual, and there's no reason because it is
-    // a one-line macro in NumberFormat anyway, just like this one.
-    return NumberFormat::format(obj, result, status);
-}
-
-inline UnicodeString&
-RuleBasedNumberFormat::format(double number, UnicodeString& output) const {
-    FieldPosition pos(0);
-    return format(number, output, pos);
-}
-
-inline UnicodeString&
-RuleBasedNumberFormat::format(int32_t number, UnicodeString& output) const {
-    FieldPosition pos(0);
-    return format(number, output, pos);
-}
-
-inline void
-RuleBasedNumberFormat::parse(const UnicodeString& text, Formattable& result, UErrorCode& status) const
-{
-    NumberFormat::parse(text, result, status);
-}
-
-#if !UCONFIG_NO_COLLATION
-
-inline UBool
-RuleBasedNumberFormat::isLenient(void) const {
-    return lenient;
-}
-
-#endif
-
-inline NFRuleSet*
-RuleBasedNumberFormat::getDefaultRuleSet() const {
-    return defaultRuleSet;
-}
-
-U_NAMESPACE_END
-
-/* U_HAVE_RBNF */
-#endif
-
-/* RBNF_H */
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/rbnf.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/rbnf.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/rbnf.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/rbnf.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1049 @@
+/*
+*******************************************************************************
+* Copyright (C) 1997-2006, International Business Machines Corporation and others.
+* All Rights Reserved.
+*******************************************************************************
+*/
+
+#ifndef RBNF_H
+#define RBNF_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Rule Based Number Format
+ */
+
+/**
+ * \def U_HAVE_RBNF
+ * This will be 0 if RBNF support is not included in ICU
+ * and 1 if it is.
+ *
+ * @stable ICU 2.4
+ */
+#if UCONFIG_NO_FORMATTING
+#define U_HAVE_RBNF 0
+#else
+#define U_HAVE_RBNF 1
+
+#include "unicode/coll.h"
+#include "unicode/dcfmtsym.h"
+#include "unicode/fmtable.h"
+#include "unicode/locid.h"
+#include "unicode/numfmt.h"
+#include "unicode/unistr.h"
+#include "unicode/strenum.h"
+
+U_NAMESPACE_BEGIN
+
+class NFRuleSet;
+class LocalizationInfo;
+
+/**
+ * Tags for the predefined rulesets.
+ *
+ * @stable ICU 2.2
+ */
+enum URBNFRuleSetTag {
+    URBNF_SPELLOUT,
+    URBNF_ORDINAL,
+    URBNF_DURATION,
+    URBNF_COUNT
+};
+
+#if UCONFIG_NO_COLLATION
+class Collator;
+#endif
+
+/**
+ * The RuleBasedNumberFormat class formats numbers according to a set of rules. This number formatter is
+ * typically used for spelling out numeric values in words (e.g., 25,3476 as
+ * &quot;twenty-five thousand three hundred seventy-six&quot; or &quot;vingt-cinq mille trois
+ * cents soixante-seize&quot; or
+ * &quot;f&uuml;nfundzwanzigtausenddreihundertsechsundsiebzig&quot;), but can also be used for
+ * other complicated formatting tasks, such as formatting a number of seconds as hours,
+ * minutes and seconds (e.g., 3,730 as &quot;1:02:10&quot;).
+ *
+ * <p>The resources contain three predefined formatters for each locale: spellout, which
+ * spells out a value in words (123 is &quot;one hundred twenty-three&quot;); ordinal, which
+ * appends an ordinal suffix to the end of a numeral (123 is &quot;123rd&quot;); and
+ * duration, which shows a duration in seconds as hours, minutes, and seconds (123 is
+ * &quot;2:03&quot;).&nbsp; The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s
+ * by supplying programmer-defined rule sets.</p>
+ *
+ * <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description
+ * that is either passed to the constructor as a <tt>String</tt> or loaded from a resource
+ * bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
+ * Each rule has a string of output text and a value or range of values it is applicable to.
+ * In a typical spellout rule set, the first twenty rules are the words for the numbers from
+ * 0 to 19:</p>
+ *
+ * <pre>zero; one; two; three; four; five; six; seven; eight; nine;
+ * ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre>
+ *
+ * <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
+ * we only have to supply the words for the multiples of 10:</p>
+ *
+ * <pre> 20: twenty[-&gt;&gt;];
+ * 30: thirty[-&gt;&gt;];
+ * 40: forty[-&gt;&gt;];
+ * 50: fifty[-&gt;&gt;];
+ * 60: sixty[-&gt;&gt;];
+ * 70: seventy[-&gt;&gt;];
+ * 80: eighty[-&gt;&gt;];
+ * 90: ninety[-&gt;&gt;];</pre>
+ *
+ * <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the
+ * rule's output text with a colon. The rules are in a sorted list, and a rule is applicable
+ * to all numbers from its own base value to one less than the next rule's base value. The
+ * &quot;&gt;&gt;&quot; token is called a <em>substitution</em> and tells the fomatter to
+ * isolate the number's ones digit, format it using this same set of rules, and place the
+ * result at the position of the &quot;&gt;&gt;&quot; token. Text in brackets is omitted if
+ * the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24
+ * is &quot;twenty-four,&quot; not &quot;twenty four&quot;).</p>
+ *
+ * <p>For even larger numbers, we can actually look up several parts of the number in the
+ * list:</p>
+ *
+ * <pre>100: &lt;&lt; hundred[ &gt;&gt;];</pre>
+ *
+ * <p>The &quot;&lt;&lt;&quot; represents a new kind of substitution. The &lt;&lt; isolates
+ * the hundreds digit (and any digits to its left), formats it using this same rule set, and
+ * places the result where the &quot;&lt;&lt;&quot; was. Notice also that the meaning of
+ * &gt;&gt; has changed: it now refers to both the tens and the ones digits. The meaning of
+ * both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em>
+ * which is the highest power of 10 that is less than or equal to the base value (the user
+ * can change this). To fill in the substitutions, the formatter divides the number being
+ * formatted by the divisor. The integral quotient is used to fill in the &lt;&lt;
+ * substitution, and the remainder is used to fill in the &gt;&gt; substitution. The meaning
+ * of the brackets changes similarly: text in brackets is omitted if the value being
+ * formatted is an even multiple of the rule's divisor. The rules are applied recursively, so
+ * if a substitution is filled in with text that includes another substitution, that
+ * substitution is also filled in.</p>
+ *
+ * <p>This rule covers values up to 999, at which point we add another rule:</p>
+ *
+ * <pre>1000: &lt;&lt; thousand[ &gt;&gt;];</pre>
+ *
+ * <p>Again, the meanings of the brackets and substitution tokens shift because the rule's
+ * base value is a higher power of 10, changing the rule's divisor. This rule can actually be
+ * used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
+ *
+ * <pre> 1,000,000: &lt;&lt; million[ &gt;&gt;];
+ * 1,000,000,000: &lt;&lt; billion[ &gt;&gt;];
+ * 1,000,000,000,000: &lt;&lt; trillion[ &gt;&gt;];
+ * 1,000,000,000,000,000: OUT OF RANGE!;</pre>
+ *
+ * <p>Commas, periods, and spaces can be used in the base values to improve legibility and
+ * are ignored by the rule parser. The last rule in the list is customarily treated as an
+ * &quot;overflow rule,&quot; applying to everything from its base value on up, and often (as
+ * in this example) being used to print out an error message or default representation.
+ * Notice also that the size of the major groupings in large numbers is controlled by the
+ * spacing of the rules: because in English we group numbers by thousand, the higher rules
+ * are separated from each other by a factor of 1,000.</p>
+ *
+ * <p>To see how these rules actually work in practice, consider the following example:
+ * Formatting 25,430 with this rule set would work like this:</p>
+ *
+ * <table border="0" width="100%">
+ *   <tr>
+ *     <td><strong>&lt;&lt; thousand &gt;&gt;</strong></td>
+ *     <td>[the rule whose base value is 1,000 is applicable to 25,340]</td>
+ *   </tr>
+ *   <tr>
+ *     <td><strong>twenty-&gt;&gt;</strong> thousand &gt;&gt;</td>
+ *     <td>[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
+ *   </tr>
+ *   <tr>
+ *     <td>twenty-<strong>five</strong> thousand &gt;&gt;</td>
+ *     <td>[25 mod 10 is 5. The rule for 5 is &quot;five.&quot;</td>
+ *   </tr>
+ *   <tr>
+ *     <td>twenty-five thousand <strong>&lt;&lt; hundred &gt;&gt;</strong></td>
+ *     <td>[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
+ *   </tr>
+ *   <tr>
+ *     <td>twenty-five thousand <strong>three</strong> hundred &gt;&gt;</td>
+ *     <td>[340 over 100 is 3. The rule for 3 is &quot;three.&quot;]</td>
+ *   </tr>
+ *   <tr>
+ *     <td>twenty-five thousand three hundred <strong>forty</strong></td>
+ *     <td>[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
+ *     evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
+ *   </tr>
+ * </table>
+ *
+ * <p>The above syntax suffices only to format positive integers. To format negative numbers,
+ * we add a special rule:</p>
+ *
+ * <pre>-x: minus &gt;&gt;;</pre>
+ *
+ * <p>This is called a <em>negative-number rule,</em> and is identified by &quot;-x&quot;
+ * where the base value would be. This rule is used to format all negative numbers. the
+ * &gt;&gt; token here means &quot;find the number's absolute value, format it with these
+ * rules, and put the result here.&quot;</p>
+ *
+ * <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional
+ * parts:</p>
+ *
+ * <pre>x.x: &lt;&lt; point &gt;&gt;;</pre>
+ *
+ * <p>This rule is used for all positive non-integers (negative non-integers pass through the
+ * negative-number rule first and then through this rule). Here, the &lt;&lt; token refers to
+ * the number's integral part, and the &gt;&gt; to the number's fractional part. The
+ * fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be
+ * formatted as &quot;one hundred twenty-three point four five six&quot;).</p>
+ *
+ * <p>To see how this rule syntax is applied to various languages, examine the resource data.</p>
+ *
+ * <p>There is actually much more flexibility built into the rule language than the
+ * description above shows. A formatter may own multiple rule sets, which can be selected by
+ * the caller, and which can use each other to fill in their substitutions. Substitutions can
+ * also be filled in with digits, using a DecimalFormat object. There is syntax that can be
+ * used to alter a rule's divisor in various ways. And there is provision for much more
+ * flexible fraction handling. A complete description of the rule syntax follows:</p>
+ *
+ * <hr>
+ *
+ * <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule
+ * sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule
+ * set name must begin with a % sign. Rule sets with names that begin with a single % sign
+ * are <em>public:</em> the caller can specify that they be used to format and parse numbers.
+ * Rule sets with names that begin with %% are <em>private:</em> they exist only for the use
+ * of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
+ *
+ * <p>The user can also specify a special &quot;rule set&quot; named <tt>%%lenient-parse</tt>.
+ * The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt>
+ * description which is used to define equivalences for lenient parsing. For more information
+ * on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing,
+ * see <tt>setLenientParse()</tt>.  <em>Note:</em> symbols that have syntactic meaning
+ * in collation rules, such as '&amp;', have no particular meaning when appearing outside
+ * of the <tt>lenient-parse</tt> rule set.</p>
+ *
+ * <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
+ * Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
+ * These parameters are controlled by the description syntax, which consists of a <em>rule
+ * descriptor,</em> a colon, and a <em>rule body.</em></p>
+ *
+ * <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
+ * name of a token):</p>
+ *
+ * <table border="0" width="100%">
+ *   <tr>
+ *     <td><em>bv</em>:</td>
+ *     <td><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
+ *     number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
+ *     which are ignored. The rule's divisor is the highest power of 10 less than or equal to
+ *     the base value.</td>
+ *   </tr>
+ *   <tr>
+ *     <td><em>bv</em>/<em>rad</em>:</td>
+ *     <td><em>bv</em> specifies the rule's base value. The rule's divisor is the
+ *     highest power of <em>rad</em> less than or equal to the base value.</td>
+ *   </tr>
+ *   <tr>
+ *     <td><em>bv</em>&gt;:</td>
+ *     <td><em>bv</em> specifies the rule's base value. To calculate the divisor,
+ *     let the radix be 10, and the exponent be the highest exponent of the radix that yields a
+ *     result less than or equal to the base value. Every &gt; character after the base value
+ *     decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
+ *     raised to the power of the exponent; otherwise, the divisor is 1.</td>
+ *   </tr>
+ *   <tr>
+ *     <td><em>bv</em>/<em>rad</em>&gt;:</td>
+ *     <td><em>bv</em> specifies the rule's base value. To calculate the divisor,
+ *     let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
+ *     yields a result less than or equal to the base value. Every &gt; character after the radix
+ *     decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
+ *     raised to the power of the exponent; otherwise, the divisor is 1.</td>
+ *   </tr>
+ *   <tr>
+ *     <td>-x:</td>
+ *     <td>The rule is a negative-number rule.</td>
+ *   </tr>
+ *   <tr>
+ *     <td>x.x:</td>
+ *     <td>The rule is an <em>improper fraction rule.</em></td>
+ *   </tr>
+ *   <tr>
+ *     <td>0.x:</td>
+ *     <td>The rule is a <em>proper fraction rule.</em></td>
+ *   </tr>
+ *   <tr>
+ *     <td>x.0:</td>
+ *     <td>The rule is a <em>master rule.</em></td>
+ *   </tr>
+ *   <tr>
+ *     <td><em>nothing</em></td>
+ *     <td>If the rule's rule descriptor is left out, the base value is one plus the
+ *     preceding rule's base value (or zero if this is the first rule in the list) in a normal
+ *     rule set.&nbsp; In a fraction rule set, the base value is the same as the preceding rule's
+ *     base value.</td>
+ *   </tr>
+ * </table>
+ *
+ * <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending
+ * on whether it is used to format a number's integral part (or the whole number) or a
+ * number's fractional part. Using a rule set to format a rule's fractional part makes it a
+ * fraction rule set.</p>
+ *
+ * <p>Which rule is used to format a number is defined according to one of the following
+ * algorithms: If the rule set is a regular rule set, do the following:
+ *
+ * <ul>
+ *   <li>If the rule set includes a master rule (and the number was passed in as a <tt>double</tt>),
+ *     use the master rule.&nbsp; (If the number being formatted was passed in as a <tt>long</tt>,
+ *     the master rule is ignored.)</li>
+ *   <li>If the number is negative, use the negative-number rule.</li>
+ *   <li>If the number has a fractional part and is greater than 1, use the improper fraction
+ *     rule.</li>
+ *   <li>If the number has a fractional part and is between 0 and 1, use the proper fraction
+ *     rule.</li>
+ *   <li>Binary-search the rule list for the rule with the highest base value less than or equal
+ *     to the number. If that rule has two substitutions, its base value is not an even multiple
+ *     of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the
+ *     rule that precedes it in the rule list. Otherwise, use the rule itself.</li>
+ * </ul>
+ *
+ * <p>If the rule set is a fraction rule set, do the following:
+ *
+ * <ul>
+ *   <li>Ignore negative-number and fraction rules.</li>
+ *   <li>For each rule in the list, multiply the number being formatted (which will always be
+ *     between 0 and 1) by the rule's base value. Keep track of the distance between the result
+ *     the nearest integer.</li>
+ *   <li>Use the rule that produced the result closest to zero in the above calculation. In the
+ *     event of a tie or a direct hit, use the first matching rule encountered. (The idea here is
+ *     to try each rule's base value as a possible denominator of a fraction. Whichever
+ *     denominator produces the fraction closest in value to the number being formatted wins.) If
+ *     the rule following the matching rule has the same base value, use it if the numerator of
+ *     the fraction is anything other than 1; if the numerator is 1, use the original matching
+ *     rule. (This is to allow singular and plural forms of the rule text without a lot of extra
+ *     hassle.)</li>
+ * </ul>
+ *
+ * <p>A rule's body consists of a string of characters terminated by a semicolon. The rule
+ * may include zero, one, or two <em>substitution tokens,</em> and a range of text in
+ * brackets. The brackets denote optional text (and may also include one or both
+ * substitutions). The exact meanings of the substitution tokens, and under what conditions
+ * optional text is omitted, depend on the syntax of the substitution token and the context.
+ * The rest of the text in a rule body is literal text that is output when the rule matches
+ * the number being formatted.</p>
+ *
+ * <p>A substitution token begins and ends with a <em>token character.</em> The token
+ * character and the context together specify a mathematical operation to be performed on the
+ * number being formatted. An optional <em>substitution descriptor </em>specifies how the
+ * value resulting from that operation is used to fill in the substitution. The position of
+ * the substitution token in the rule body specifies the location of the resultant text in
+ * the original rule text.</p>
+ *
+ * <p>The meanings of the substitution token characters are as follows:</p>
+ *
+ * <table border="0" width="100%">
+ *   <tr>
+ *     <td>&gt;&gt;</td>
+ *     <td>in normal rule</td>
+ *     <td>Divide the number by the rule's divisor and format the remainder</td>
+ *   </tr>
+ *   <tr>
+ *     <td></td>
+ *     <td>in negative-number rule</td>
+ *     <td>Find the absolute value of the number and format the result</td>
+ *   </tr>
+ *   <tr>
+ *     <td></td>
+ *     <td>in fraction or master rule</td>
+ *     <td>Isolate the number's fractional part and format it.</td>
+ *   </tr>
+ *   <tr>
+ *     <td></td>
+ *     <td>in rule in fraction rule set</td>
+ *     <td>Not allowed.</td>
+ *   </tr>
+ *   <tr>
+ *     <td>&gt;&gt;&gt;</td>
+ *     <td>in normal rule</td>
+ *     <td>Divide the number by the rule's divisor and format the remainder,
+ *       but bypass the normal rule-selection process and just use the
+ *       rule that precedes this one in this rule list.</td>
+ *   </tr>
+ *   <tr>
+ *     <td></td>
+ *     <td>in all other rules</td>
+ *     <td>Not allowed.</td>
+ *   </tr>
+ *   <tr>
+ *     <td>&lt;&lt;</td>
+ *     <td>in normal rule</td>
+ *     <td>Divide the number by the rule's divisor and format the quotient</td>
+ *   </tr>
+ *   <tr>
+ *     <td></td>
+ *     <td>in negative-number rule</td>
+ *     <td>Not allowed.</td>
+ *   </tr>
+ *   <tr>
+ *     <td></td>
+ *     <td>in fraction or master rule</td>
+ *     <td>Isolate the number's integral part and format it.</td>
+ *   </tr>
+ *   <tr>
+ *     <td></td>
+ *     <td>in rule in fraction rule set</td>
+ *     <td>Multiply the number by the rule's base value and format the result.</td>
+ *   </tr>
+ *   <tr>
+ *     <td>==</td>
+ *     <td>in all rule sets</td>
+ *     <td>Format the number unchanged</td>
+ *   </tr>
+ *   <tr>
+ *     <td>[]</td>
+ *     <td>in normal rule</td>
+ *     <td>Omit the optional text if the number is an even multiple of the rule's divisor</td>
+ *   </tr>
+ *   <tr>
+ *     <td></td>
+ *     <td>in negative-number rule</td>
+ *     <td>Not allowed.</td>
+ *   </tr>
+ *   <tr>
+ *     <td></td>
+ *     <td>in improper-fraction rule</td>
+ *     <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an
+ *     x.x rule and a 0.x rule)</td>
+ *   </tr>
+ *   <tr>
+ *     <td></td>
+ *     <td>in master rule</td>
+ *     <td>Omit the optional text if the number is an integer (same as specifying both an x.x
+ *     rule and an x.0 rule)</td>
+ *   </tr>
+ *   <tr>
+ *     <td></td>
+ *     <td>in proper-fraction rule</td>
+ *     <td>Not allowed.</td>
+ *   </tr>
+ *   <tr>
+ *     <td></td>
+ *     <td>in rule in fraction rule set</td>
+ *     <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td>
+ *   </tr>
+ * </table>
+ *
+ * <p>The substitution descriptor (i.e., the text between the token characters) may take one
+ * of three forms:</p>
+ *
+ * <table border="0" width="100%">
+ *   <tr>
+ *     <td>a rule set name</td>
+ *     <td>Perform the mathematical operation on the number, and format the result using the
+ *     named rule set.</td>
+ *   </tr>
+ *   <tr>
+ *     <td>a DecimalFormat pattern</td>
+ *     <td>Perform the mathematical operation on the number, and format the result using a
+ *     DecimalFormat with the specified pattern.&nbsp; The pattern must begin with 0 or #.</td>
+ *   </tr>
+ *   <tr>
+ *     <td>nothing</td>
+ *     <td>Perform the mathematical operation on the number, and format the result using the rule
+ *     set containing the current rule, except:
+ *     <ul>
+ *       <li>You can't have an empty substitution descriptor with a == substitution.</li>
+ *       <li>If you omit the substitution descriptor in a &gt;&gt; substitution in a fraction rule,
+ *         format the result one digit at a time using the rule set containing the current rule.</li>
+ *       <li>If you omit the substitution descriptor in a &lt;&lt; substitution in a rule in a
+ *         fraction rule set, format the result using the default rule set for this formatter.</li>
+ *     </ul>
+ *     </td>
+ *   </tr>
+ * </table>
+ *
+ * <p>Whitespace is ignored between a rule set name and a rule set body, between a rule
+ * descriptor and a rule body, or between rules. If a rule body begins with an apostrophe,
+ * the apostrophe is ignored, but all text after it becomes significant (this is how you can
+ * have a rule's rule text begin with whitespace). There is no escape function: the semicolon
+ * is not allowed in rule set names or in rule text, and the colon is not allowed in rule set
+ * names. The characters beginning a substitution token are always treated as the beginning
+ * of a substitution token.</p>
+ *
+ * <p>See the resource data and the demo program for annotated examples of real rule sets
+ * using these features.</p>
+ *
+ * <p><em>User subclasses are not supported.</em> While clients may write
+ * subclasses, such code will not necessarily work and will not be
+ * guaranteed to work stably from release to release.
+ *
+ * <p><b>Localizations</b></p>
+ * <p>Constructors are available that allow the specification of localizations for the
+ * public rule sets (and also allow more control over what public rule sets are available).
+ * Localization data is represented as a textual description.  The description represents
+ * an array of arrays of string.  The first element is an array of the public rule set names,
+ * each of these must be one of the public rule set names that appear in the rules.  Only
+ * names in this array will be treated as public rule set names by the API.  Each subsequent
+ * element is an array of localizations of these names.  The first element of one of these
+ * subarrays is the locale name, and the remaining elements are localizations of the
+ * public rule set names, in the same order as they were listed in the first arrray.</p>
+ * <p>In the syntax, angle brackets '<', '>' are used to delimit the arrays, and comma ',' is used
+ * to separate elements of an array.  Whitespace is ignored, unless quoted.</p>
+ * <p>For example:<pre>
+ * < < %foo, %bar, %baz >, 
+ *   < en, Foo, Bar, Baz >, 
+ *   < fr, 'le Foo', 'le Bar', 'le Baz' > 
+ *   < zh, \\u7532, \\u4e59, \\u4e19 > >
+ * </pre></p>
+ * @author Richard Gillam
+ * @see NumberFormat
+ * @see DecimalFormat
+ * @stable ICU 2.0
+ */
+class U_I18N_API RuleBasedNumberFormat : public NumberFormat {
+public:
+
+  //-----------------------------------------------------------------------
+  // constructors
+  //-----------------------------------------------------------------------
+
+    /**
+     * Creates a RuleBasedNumberFormat that behaves according to the description
+     * passed in.  The formatter uses the default locale.
+     * @param rules A description of the formatter's desired behavior.
+     * See the class documentation for a complete explanation of the description
+     * syntax.
+     * @param perror The parse error if an error was encountered.
+     * @param status The status indicating whether the constructor succeeded.
+     * @stable ICU 3.2
+     */
+    RuleBasedNumberFormat(const UnicodeString& rules, UParseError& perror, UErrorCode& status);
+
+    /**
+     * Creates a RuleBasedNumberFormat that behaves according to the description
+     * passed in.  The formatter uses the default locale.  
+     * <p>
+     * The localizations data provides information about the public
+     * rule sets and their localized display names for different
+     * locales. The first element in the list is an array of the names
+     * of the public rule sets.  The first element in this array is
+     * the initial default ruleset.  The remaining elements in the
+     * list are arrays of localizations of the names of the public
+     * rule sets.  Each of these is one longer than the initial array,
+     * with the first String being the ULocale ID, and the remaining
+     * Strings being the localizations of the rule set names, in the
+     * same order as the initial array.  Arrays are NULL-terminated.
+     * @param rules A description of the formatter's desired behavior.
+     * See the class documentation for a complete explanation of the description
+     * syntax.
+     * @param localizations the localization information.
+     * names in the description.  These will be copied by the constructor.
+     * @param perror The parse error if an error was encountered.
+     * @param status The status indicating whether the constructor succeeded.
+     * @stable ICU 3.2
+     */
+    RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations,
+                        UParseError& perror, UErrorCode& status);
+
+  /**
+   * Creates a RuleBasedNumberFormat that behaves according to the rules
+   * passed in.  The formatter uses the specified locale to determine the
+   * characters to use when formatting numerals, and to define equivalences
+   * for lenient parsing.
+   * @param rules The formatter rules.
+   * See the class documentation for a complete explanation of the rule
+   * syntax.
+   * @param locale A locale that governs which characters are used for
+   * formatting values in numerals and which characters are equivalent in
+   * lenient parsing.
+   * @param perror The parse error if an error was encountered.
+   * @param status The status indicating whether the constructor succeeded.
+   * @stable ICU 2.0
+   */
+  RuleBasedNumberFormat(const UnicodeString& rules, const Locale& locale,
+                        UParseError& perror, UErrorCode& status);
+
+    /**
+     * Creates a RuleBasedNumberFormat that behaves according to the description
+     * passed in.  The formatter uses the default locale.  
+     * <p>
+     * The localizations data provides information about the public
+     * rule sets and their localized display names for different
+     * locales. The first element in the list is an array of the names
+     * of the public rule sets.  The first element in this array is
+     * the initial default ruleset.  The remaining elements in the
+     * list are arrays of localizations of the names of the public
+     * rule sets.  Each of these is one longer than the initial array,
+     * with the first String being the ULocale ID, and the remaining
+     * Strings being the localizations of the rule set names, in the
+     * same order as the initial array.  Arrays are NULL-terminated.
+     * @param rules A description of the formatter's desired behavior.
+     * See the class documentation for a complete explanation of the description
+     * syntax.
+     * @param localizations a list of localizations for the rule set
+     * names in the description.  These will be copied by the constructor.
+     * @param locale A locale that governs which characters are used for
+     * formatting values in numerals and which characters are equivalent in
+     * lenient parsing.
+     * @param perror The parse error if an error was encountered.
+     * @param status The status indicating whether the constructor succeeded.
+     * @stable ICU 3.2
+     */
+    RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations,
+                        const Locale& locale, UParseError& perror, UErrorCode& status);
+
+  /**
+   * Creates a RuleBasedNumberFormat from a predefined ruleset.  The selector
+   * code choosed among three possible predefined formats: spellout, ordinal,
+   * and duration.
+   * @param tag A selector code specifying which kind of formatter to create for that
+   * locale.  There are three legal values: URBNF_SPELLOUT, which creates a formatter that
+   * spells out a value in words in the desired language, URBNF_ORDINAL, which attaches
+   * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
+   * and URBNF_DURATION, which formats a duration in seconds as hours, minutes, and seconds.
+   * @param locale The locale for the formatter.
+   * @param status The status indicating whether the constructor succeeded.
+   * @stable ICU 2.0
+   */
+  RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& locale, UErrorCode& status);
+
+  //-----------------------------------------------------------------------
+  // boilerplate
+  //-----------------------------------------------------------------------
+
+  /**
+   * Copy constructor
+   * @param rhs    the object to be copied from.
+   * @stable ICU 2.6
+   */
+  RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs);
+
+  /**
+   * Assignment operator
+   * @param rhs    the object to be copied from.
+   * @stable ICU 2.6
+   */
+  RuleBasedNumberFormat& operator=(const RuleBasedNumberFormat& rhs);
+
+  /**
+   * Release memory allocated for a RuleBasedNumberFormat when you are finished with it.
+   * @stable ICU 2.6
+   */
+  virtual ~RuleBasedNumberFormat();
+
+  /**
+   * Clone this object polymorphically.  The caller is responsible
+   * for deleting the result when done.
+   * @return  A copy of the object.
+   * @stable ICU 2.6
+   */
+  virtual Format* clone(void) const;
+
+  /**
+   * Return true if the given Format objects are semantically equal.
+   * Objects of different subclasses are considered unequal.
+   * @param other    the object to be compared with.
+   * @return        true if the given Format objects are semantically equal.
+   * @stable ICU 2.6
+   */
+  virtual UBool operator==(const Format& other) const;
+
+//-----------------------------------------------------------------------
+// public API functions
+//-----------------------------------------------------------------------
+
+  /**
+   * return the rules that were provided to the RuleBasedNumberFormat.
+   * @return the result String that was passed in
+   * @stable ICU 2.0
+   */
+  virtual UnicodeString getRules() const;
+
+  /**
+   * Return the number of public rule set names.
+   * @return the number of public rule set names.
+   * @stable ICU 2.0
+   */
+  virtual int32_t getNumberOfRuleSetNames() const;
+
+  /**
+   * Return the name of the index'th public ruleSet.  If index is not valid,
+   * the function returns null.
+   * @param index the index of the ruleset
+   * @return the name of the index'th public ruleSet.
+   * @stable ICU 2.0
+   */
+  virtual UnicodeString getRuleSetName(int32_t index) const;
+
+  /**
+   * Return the number of locales for which we have localized rule set display names.
+   * @return the number of locales for which we have localized rule set display names.
+   * @stable ICU 3.2
+   */
+  virtual int32_t getNumberOfRuleSetDisplayNameLocales(void) const;
+
+  /**
+   * Return the index'th display name locale.
+   * @param index the index of the locale
+   * @param status set to a failure code when this function fails
+   * @return the locale
+   * @see #getNumberOfRuleSetDisplayNameLocales
+   * @stable ICU 3.2
+   */
+  virtual Locale getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const;
+
+    /**
+     * Return the rule set display names for the provided locale.  These are in the same order
+     * as those returned by getRuleSetName.  The locale is matched against the locales for
+     * which there is display name data, using normal fallback rules.  If no locale matches, 
+     * the default display names are returned.  (These are the internal rule set names minus
+     * the leading '%'.)
+     * @param index the index of the rule set
+     * @param locale the locale (returned by getRuleSetDisplayNameLocales) for which the localized
+     * display name is desired
+     * @return the display name for the given index, which might be bogus if there is an error
+     * @see #getRuleSetName
+     * @stable ICU 3.2
+     */
+  virtual UnicodeString getRuleSetDisplayName(int32_t index, 
+                          const Locale& locale = Locale::getDefault());
+
+    /**
+     * Return the rule set display name for the provided rule set and locale.  
+     * The locale is matched against the locales for which there is display name data, using
+     * normal fallback rules.  If no locale matches, the default display name is returned.
+     * @return the display name for the rule set
+     * @stable ICU 3.2
+     * @see #getRuleSetDisplayName
+     */
+  virtual UnicodeString getRuleSetDisplayName(const UnicodeString& ruleSetName, 
+                          const Locale& locale = Locale::getDefault());
+
+  /**
+   * Formats the specified 32-bit number using the default ruleset.
+   * @param number The number to format.
+   * @param toAppendTo the string that will hold the (appended) result
+   * @param pos the fieldposition
+   * @return A textual representation of the number.
+   * @stable ICU 2.0
+   */
+  virtual UnicodeString& format(int32_t number,
+                                UnicodeString& toAppendTo,
+                                FieldPosition& pos) const;
+
+  /**
+   * Formats the specified 64-bit number using the default ruleset.
+   * @param number The number to format.
+   * @param toAppendTo the string that will hold the (appended) result
+   * @param pos the fieldposition
+   * @return A textual representation of the number.
+   * @stable ICU 2.1
+   */
+  virtual UnicodeString& format(int64_t number,
+                                UnicodeString& toAppendTo,
+                                FieldPosition& pos) const;
+  /**
+   * Formats the specified number using the default ruleset.
+   * @param number The number to format.
+   * @param toAppendTo the string that will hold the (appended) result
+   * @param pos the fieldposition
+   * @return A textual representation of the number.
+   * @stable ICU 2.0
+   */
+  virtual UnicodeString& format(double number,
+                                UnicodeString& toAppendTo,
+                                FieldPosition& pos) const;
+
+  /**
+   * Formats the specified number using the named ruleset.
+   * @param number The number to format.
+   * @param ruleSetName The name of the rule set to format the number with.
+   * This must be the name of a valid public rule set for this formatter.
+   * @param toAppendTo the string that will hold the (appended) result
+   * @param pos the fieldposition
+   * @param status the status
+   * @return A textual representation of the number.
+   * @stable ICU 2.0
+   */
+  virtual UnicodeString& format(int32_t number,
+                                const UnicodeString& ruleSetName,
+                                UnicodeString& toAppendTo,
+                                FieldPosition& pos,
+                                UErrorCode& status) const;
+  /**
+   * Formats the specified 64-bit number using the named ruleset.
+   * @param number The number to format.
+   * @param ruleSetName The name of the rule set to format the number with.
+   * This must be the name of a valid public rule set for this formatter.
+   * @param toAppendTo the string that will hold the (appended) result
+   * @param pos the fieldposition
+   * @param status the status
+   * @return A textual representation of the number.
+   * @stable ICU 2.1
+   */
+  virtual UnicodeString& format(int64_t number,
+                                const UnicodeString& ruleSetName,
+                                UnicodeString& toAppendTo,
+                                FieldPosition& pos,
+                                UErrorCode& status) const;
+  /**
+   * Formats the specified number using the named ruleset.
+   * @param number The number to format.
+   * @param ruleSetName The name of the rule set to format the number with.
+   * This must be the name of a valid public rule set for this formatter.
+   * @param toAppendTo the string that will hold the (appended) result
+   * @param pos the fieldposition
+   * @param status the status
+   * @return A textual representation of the number.
+   * @stable ICU 2.0
+   */
+  virtual UnicodeString& format(double number,
+                                const UnicodeString& ruleSetName,
+                                UnicodeString& toAppendTo,
+                                FieldPosition& pos,
+                                UErrorCode& status) const;
+
+  /**
+   * Formats the specified number using the default ruleset.
+   * @param obj The number to format.
+   * @param toAppendTo the string that will hold the (appended) result
+   * @param pos the fieldposition
+   * @param status the status
+   * @return A textual representation of the number.
+   * @stable ICU 2.0
+   */
+  virtual UnicodeString& format(const Formattable& obj,
+                                UnicodeString& toAppendTo,
+                                FieldPosition& pos,
+                                UErrorCode& status) const;
+  /**
+   * Redeclared Format method.
+   * @param obj    the object to be formatted.
+   * @param result Output param which will receive the formatted string.
+   * @param status Output param set to success/failure code
+   * @return       A reference to 'result'.
+   * @stable ICU 2.0
+   */
+  UnicodeString& format(const Formattable& obj,
+                        UnicodeString& result,
+                        UErrorCode& status) const;
+
+  /**
+   * Redeclared NumberFormat method.
+   * @param number    the double value to be formatted.
+   * @param output    Output param which will receive the formatted string.
+   * @return          A reference to 'output'.
+   * @stable ICU 2.0
+   */
+   UnicodeString& format(double number,
+                         UnicodeString& output) const;
+
+  /**
+   * Redeclared NumberFormat method.
+   * @param number    the long value to be formatted.
+   * @param output    Output param which will receive the formatted string.
+   * @return          A reference to 'output'.
+   * @stable ICU 2.0
+   */
+   UnicodeString& format(int32_t number,
+                         UnicodeString& output) const;
+
+  /**
+   * Parses the specfied string, beginning at the specified position, according
+   * to this formatter's rules.  This will match the string against all of the
+   * formatter's public rule sets and return the value corresponding to the longest
+   * parseable substring.  This function's behavior is affected by the lenient
+   * parse mode.
+   * @param text The string to parse
+   * @param result the result of the parse, either a double or a long.
+   * @param parsePosition On entry, contains the position of the first character
+   * in "text" to examine.  On exit, has been updated to contain the position
+   * of the first character in "text" that wasn't consumed by the parse.
+   * @see #setLenient
+   * @stable ICU 2.0
+   */
+  virtual void parse(const UnicodeString& text,
+                     Formattable& result,
+                     ParsePosition& parsePosition) const;
+
+
+  /**
+   * Redeclared Format method.
+   * @param text   The string to parse
+   * @param result the result of the parse, either a double or a long.
+   * @param status Output param set to failure code when a problem occurs.
+   * @stable ICU 2.0
+   */
+  virtual inline void parse(const UnicodeString& text,
+                      Formattable& result,
+                      UErrorCode& status) const;
+
+#if !UCONFIG_NO_COLLATION
+
+  /**
+   * Turns lenient parse mode on and off.
+   *
+   * When in lenient parse mode, the formatter uses a Collator for parsing the text.
+   * Only primary differences are treated as significant.  This means that case
+   * differences, accent differences, alternate spellings of the same letter
+   * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
+   * matching the text.  In many cases, numerals will be accepted in place of words
+   * or phrases as well.
+   *
+   * For example, all of the following will correctly parse as 255 in English in
+   * lenient-parse mode:
+   * <br>"two hundred fifty-five"
+   * <br>"two hundred fifty five"
+   * <br>"TWO HUNDRED FIFTY-FIVE"
+   * <br>"twohundredfiftyfive"
+   * <br>"2 hundred fifty-5"
+   *
+   * The Collator used is determined by the locale that was
+   * passed to this object on construction.  The description passed to this object
+   * on construction may supply additional collation rules that are appended to the
+   * end of the default collator for the locale, enabling additional equivalences
+   * (such as adding more ignorable characters or permitting spelled-out version of
+   * symbols; see the demo program for examples).
+   *
+   * It's important to emphasize that even strict parsing is relatively lenient: it
+   * will accept some text that it won't produce as output.  In English, for example,
+   * it will correctly parse "two hundred zero" and "fifteen hundred".
+   *
+   * @param enabled If true, turns lenient-parse mode on; if false, turns it off.
+   * @see RuleBasedCollator
+   * @stable ICU 2.0
+   */
+  virtual void setLenient(UBool enabled);
+
+  /**
+   * Returns true if lenient-parse mode is turned on.  Lenient parsing is off
+   * by default.
+   * @return true if lenient-parse mode is turned on.
+   * @see #setLenient
+   * @stable ICU 2.0
+   */
+  virtual inline UBool isLenient(void) const;
+
+#endif
+
+  /**
+   * Override the default rule set to use.  If ruleSetName is null, reset
+   * to the initial default rule set.  If the rule set is not a public rule set name,
+   * U_ILLEGAL_ARGUMENT_ERROR is returned in status.
+   * @param ruleSetName the name of the rule set, or null to reset the initial default.
+   * @param status set to failure code when a problem occurs.
+   * @stable ICU 2.6
+   */
+  virtual void setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status);
+
+  /**
+   * Return the name of the current default rule set.  If the current rule set is
+   * not public, returns a bogus (and empty) UnicodeString.
+   * @return the name of the current default rule set
+   * @stable ICU 3.0
+   */
+  virtual UnicodeString getDefaultRuleSetName() const;
+
+public:
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.8
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.8
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+private:
+    RuleBasedNumberFormat(); // default constructor not implemented
+
+    // this will ref the localizations if they are not NULL
+    // caller must deref to get adoption 
+    RuleBasedNumberFormat(const UnicodeString& description, LocalizationInfo* localizations, 
+              const Locale& locale, UParseError& perror, UErrorCode& status);
+
+    void init(const UnicodeString& rules, LocalizationInfo* localizations, UParseError& perror, UErrorCode& status);
+    void dispose();
+    void stripWhitespace(UnicodeString& src);
+    void initDefaultRuleSet();
+    void format(double number, NFRuleSet& ruleSet);
+    NFRuleSet* findRuleSet(const UnicodeString& name, UErrorCode& status) const;
+
+    /* friend access */
+    friend class NFSubstitution;
+    friend class NFRule;
+    friend class FractionalPartSubstitution;
+
+    inline NFRuleSet * getDefaultRuleSet() const;
+    Collator * getCollator() const;
+    DecimalFormatSymbols * getDecimalFormatSymbols() const;
+
+private:
+    NFRuleSet **ruleSets;
+    NFRuleSet *defaultRuleSet;
+    Locale locale;
+    Collator* collator;
+    DecimalFormatSymbols* decimalFormatSymbols;
+    UBool lenient;
+    UnicodeString* lenientParseRules;
+    LocalizationInfo* localizations;
+};
+
+// ---------------
+
+inline UnicodeString&
+RuleBasedNumberFormat::format(const Formattable& obj,
+                              UnicodeString& result,
+                              UErrorCode& status) const
+{
+    // Don't use Format:: - use immediate base class only,
+    // in case immediate base modifies behavior later.
+    // dlf - the above comment is bogus, if there were a reason to modify
+    // it, it would be virtual, and there's no reason because it is
+    // a one-line macro in NumberFormat anyway, just like this one.
+    return NumberFormat::format(obj, result, status);
+}
+
+inline UnicodeString&
+RuleBasedNumberFormat::format(double number, UnicodeString& output) const {
+    FieldPosition pos(0);
+    return format(number, output, pos);
+}
+
+inline UnicodeString&
+RuleBasedNumberFormat::format(int32_t number, UnicodeString& output) const {
+    FieldPosition pos(0);
+    return format(number, output, pos);
+}
+
+inline void
+RuleBasedNumberFormat::parse(const UnicodeString& text, Formattable& result, UErrorCode& status) const
+{
+    NumberFormat::parse(text, result, status);
+}
+
+#if !UCONFIG_NO_COLLATION
+
+inline UBool
+RuleBasedNumberFormat::isLenient(void) const {
+    return lenient;
+}
+
+#endif
+
+inline NFRuleSet*
+RuleBasedNumberFormat::getDefaultRuleSet() const {
+    return defaultRuleSet;
+}
+
+U_NAMESPACE_END
+
+/* U_HAVE_RBNF */
+#endif
+
+/* RBNF_H */
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/rbtz.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/rbtz.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/rbtz.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,361 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 2007-2008, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
-*******************************************************************************
-*/
-#ifndef RBTZ_H
-#define RBTZ_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Rule based customizable time zone
- */
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/basictz.h"
-#include "unicode/unistr.h"
-
-U_NAMESPACE_BEGIN
-
-// forward declaration
-class UVector;
-struct Transition;
-
-/**
- * a BasicTimeZone subclass implemented in terms of InitialTimeZoneRule and TimeZoneRule instances
- * @see BasicTimeZone
- * @see InitialTimeZoneRule
- * @see TimeZoneRule
- */
-class U_I18N_API RuleBasedTimeZone : public BasicTimeZone {
-public:
-    /**
-     * Constructs a <code>RuleBasedTimeZone</code> object with the ID and the
-     * <code>InitialTimeZoneRule</code>.  The input <code>InitialTimeZoneRule</code>
-     * is adopted by this <code>RuleBasedTimeZone</code>, thus the caller must not
-     * delete it.
-     * @param id                The time zone ID.
-     * @param initialRule       The initial time zone rule.
-     * @stable ICU 4.0
-     */
-    RuleBasedTimeZone(const UnicodeString& id, InitialTimeZoneRule* initialRule);
-
-    /**
-     * Copy constructor.
-     * @param source    The RuleBasedTimeZone object to be copied.
-     * @stable ICU 4.0
-     */
-    RuleBasedTimeZone(const RuleBasedTimeZone& source);
-
-    /**
-     * Destructor.
-     * @stable ICU 4.0
-     */
-    virtual ~RuleBasedTimeZone();
-
-    /**
-     * Assignment operator.
-     * @param right The object to be copied.
-     * @stable ICU 4.0
-     */
-    RuleBasedTimeZone& operator=(const RuleBasedTimeZone& right);
-
-    /**
-     * Return true if the given <code>TimeZone</code> objects are
-     * semantically equal. Objects of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given <code>TimeZone</code> objects are
-      *semantically equal.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator==(const TimeZone& that) const;
-
-    /**
-     * Return true if the given <code>TimeZone</code> objects are
-     * semantically unequal. Objects of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given <code>TimeZone</code> objects are
-     * semantically unequal.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator!=(const TimeZone& that) const;
-
-    /**
-     * Adds the <code>TimeZoneRule</code> which represents time transitions.
-     * The <code>TimeZoneRule</code> must have start times, that is, the result
-     * of isTransitionRule() must be true. Otherwise, U_ILLEGAL_ARGUMENT_ERROR
-     * is set to the error code.
-     * The input <code>TimeZoneRule</code> is adopted by this
-     * <code>RuleBasedTimeZone</code> on successful completion of this method,
-     * thus, the caller must not delete it when no error is returned.
-     * After all rules are added, the caller must call complete() method to
-     * make this <code>RuleBasedTimeZone</code> ready to handle common time
-     * zone functions.
-     * @param rule The <code>TimeZoneRule</code>.
-     * @param status Output param to filled in with a success or an error.
-     * @stable ICU 4.0
-     */
-    void addTransitionRule(TimeZoneRule* rule, UErrorCode& status);
-
-    /**
-     * Makes the <code>TimeZoneRule</code> ready to handle actual timezone
-     * calcuation APIs.  This method collects time zone rules specified
-     * by the caller via the constructor and addTransitionRule() and
-     * builds internal structure for making the object ready to support
-     * time zone APIs such as getOffset(), getNextTransition() and others.
-     * @param status Output param to filled in with a success or an error.
-     * @stable ICU 4.0
-     */
-    void complete(UErrorCode& status);
-
-    /**
-     * Clones TimeZone objects polymorphically. Clients are responsible for deleting
-     * the TimeZone object cloned.
-     *
-     * @return   A new copy of this TimeZone object.
-     * @stable ICU 4.0
-     */
-    virtual TimeZone* clone(void) const;
-
-    /**
-     * Returns the TimeZone's adjusted GMT offset (i.e., the number of milliseconds to add
-     * to GMT to get local time in this time zone, taking daylight savings time into
-     * account) as of a particular reference date.  The reference date is used to determine
-     * whether daylight savings time is in effect and needs to be figured into the offset
-     * that is returned (in other words, what is the adjusted GMT offset in this time zone
-     * at this particular date and time?).  For the time zones produced by createTimeZone(),
-     * the reference data is specified according to the Gregorian calendar, and the date
-     * and time fields are local standard time.
-     *
-     * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload,
-     * which returns both the raw and the DST offset for a given time. This method
-     * is retained only for backward compatibility.
-     *
-     * @param era        The reference date's era
-     * @param year       The reference date's year
-     * @param month      The reference date's month (0-based; 0 is January)
-     * @param day        The reference date's day-in-month (1-based)
-     * @param dayOfWeek  The reference date's day-of-week (1-based; 1 is Sunday)
-     * @param millis     The reference date's milliseconds in day, local standard time
-     * @param status     Output param to filled in with a success or an error.
-     * @return           The offset in milliseconds to add to GMT to get local time.
-     * @stable ICU 4.0
-     */
-    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
-                              uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const;
-
-    /**
-     * Gets the time zone offset, for current date, modified in case of
-     * daylight savings. This is the offset to add *to* UTC to get local time.
-     *
-     * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload,
-     * which returns both the raw and the DST offset for a given time. This method
-     * is retained only for backward compatibility.
-     *
-     * @param era        The reference date's era
-     * @param year       The reference date's year
-     * @param month      The reference date's month (0-based; 0 is January)
-     * @param day        The reference date's day-in-month (1-based)
-     * @param dayOfWeek  The reference date's day-of-week (1-based; 1 is Sunday)
-     * @param millis     The reference date's milliseconds in day, local standard time
-     * @param monthLength The length of the given month in days.
-     * @param status     Output param to filled in with a success or an error.
-     * @return           The offset in milliseconds to add to GMT to get local time.
-     * @stable ICU 4.0
-     */
-    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
-                           uint8_t dayOfWeek, int32_t millis,
-                           int32_t monthLength, UErrorCode& status) const;
-
-    /**
-     * Returns the time zone raw and GMT offset for the given moment
-     * in time.  Upon return, local-millis = GMT-millis + rawOffset +
-     * dstOffset.  All computations are performed in the proleptic
-     * Gregorian calendar.  The default implementation in the TimeZone
-     * class delegates to the 8-argument getOffset().
-     *
-     * @param date moment in time for which to return offsets, in
-     * units of milliseconds from January 1, 1970 0:00 GMT, either GMT
-     * time or local wall time, depending on `local'.
-     * @param local if true, `date' is local wall time; otherwise it
-     * is in GMT time.
-     * @param rawOffset output parameter to receive the raw offset, that
-     * is, the offset not including DST adjustments
-     * @param dstOffset output parameter to receive the DST offset,
-     * that is, the offset to be added to `rawOffset' to obtain the
-     * total offset between local and GMT time. If DST is not in
-     * effect, this value is zero; otherwise it is a positive value,
-     * typically one hour.
-     * @param ec input-output error code
-     * @stable ICU 4.0
-     */
-    virtual void getOffset(UDate date, UBool local, int32_t& rawOffset,
-                           int32_t& dstOffset, UErrorCode& ec) const;
-
-    /**
-     * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
-     * to GMT to get local time, before taking daylight savings time into account).
-     *
-     * @param offsetMillis  The new raw GMT offset for this time zone.
-     * @stable ICU 4.0
-     */
-    virtual void setRawOffset(int32_t offsetMillis);
-
-    /**
-     * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
-     * to GMT to get local time, before taking daylight savings time into account).
-     *
-     * @return   The TimeZone's raw GMT offset.
-     * @stable ICU 4.0
-     */
-    virtual int32_t getRawOffset(void) const;
-
-    /**
-     * Queries if this time zone uses daylight savings time.
-     * @return true if this time zone uses daylight savings time,
-     * false, otherwise.
-     * @stable ICU 4.0
-     */
-    virtual UBool useDaylightTime(void) const;
-
-    /**
-     * Queries if the given date is in daylight savings time in
-     * this time zone.
-     * This method is wasteful since it creates a new GregorianCalendar and
-     * deletes it each time it is called. This is a deprecated method
-     * and provided only for Java compatibility.
-     *
-     * @param date the given UDate.
-     * @param status Output param filled in with success/error code.
-     * @return true if the given date is in daylight savings time,
-     * false, otherwise.
-     * @deprecated ICU 2.4. Use Calendar::inDaylightTime() instead.
-     */
-    virtual UBool inDaylightTime(UDate date, UErrorCode& status) const;
-
-    /**
-     * Returns true if this zone has the same rule and offset as another zone.
-     * That is, if this zone differs only in ID, if at all.
-     * @param other the <code>TimeZone</code> object to be compared with
-     * @return true if the given zone is the same as this one,
-     * with the possible exception of the ID
-     * @stable ICU 4.0
-     */
-    virtual UBool hasSameRules(const TimeZone& other) const;
-
-    /**
-     * Gets the first time zone transition after the base time.
-     * @param base      The base time.
-     * @param inclusive Whether the base time is inclusive or not.
-     * @param result    Receives the first transition after the base time.
-     * @return  TRUE if the transition is found.
-     * @stable ICU 4.0
-     */
-    virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/;
-
-    /**
-     * Gets the most recent time zone transition before the base time.
-     * @param base      The base time.
-     * @param inclusive Whether the base time is inclusive or not.
-     * @param result    Receives the most recent transition before the base time.
-     * @return  TRUE if the transition is found.
-     * @stable ICU 4.0
-     */
-    virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/;
-
-    /**
-     * Returns the number of <code>TimeZoneRule</code>s which represents time transitions,
-     * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except
-     * <code>InitialTimeZoneRule</code>.  The return value range is 0 or any positive value.
-     * @param status    Receives error status code.
-     * @return The number of <code>TimeZoneRule</code>s representing time transitions.
-     * @stable ICU 4.0
-     */
-    virtual int32_t countTransitionRules(UErrorCode& status) /*const*/;
-
-    /**
-     * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code>
-     * which represent time transitions for this time zone.  On successful return,
-     * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and
-     * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code>
-     * instances up to the size specified by trscount.  The results are referencing the
-     * rule instance held by this time zone instance.  Therefore, after this time zone
-     * is destructed, they are no longer available.
-     * @param initial       Receives the initial timezone rule
-     * @param trsrules      Receives the timezone transition rules
-     * @param trscount      On input, specify the size of the array 'transitions' receiving
-     *                      the timezone transition rules.  On output, actual number of
-     *                      rules filled in the array will be set.
-     * @param status        Receives error status code.
-     * @stable ICU 4.0
-     */
-    virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial,
-        const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) /*const*/;
-
-    /**
-     * Get time zone offsets from local wall time.
-     * @internal
-     */
-    virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt,
-        int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) /*const*/;
-
-private:
-    void deleteRules(void);
-    void deleteTransitions(void);
-    UVector* copyRules(UVector* source);
-    TimeZoneRule* findRuleInFinal(UDate date, UBool local,
-        int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt) const;
-    UBool findNext(UDate base, UBool inclusive, UDate& time, TimeZoneRule*& from, TimeZoneRule*& to) const;
-    UBool findPrev(UDate base, UBool inclusive, UDate& time, TimeZoneRule*& from, TimeZoneRule*& to) const;
-    int32_t getLocalDelta(int32_t rawBefore, int32_t dstBefore, int32_t rawAfter, int32_t dstAfter,
-        int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt) const;
-    UDate getTransitionTime(Transition* transition, UBool local,
-        int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt) const;
-    void getOffsetInternal(UDate date, UBool local, int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt,
-        int32_t& rawOffset, int32_t& dstOffset, UErrorCode& ec) const;
-
-    InitialTimeZoneRule *fInitialRule;
-    UVector             *fHistoricRules;
-    UVector             *fFinalRules;
-    UVector             *fHistoricTransitions;
-    UBool               fUpToDate;
-
-public:
-    /**
-     * Return the class ID for this class. This is useful only for comparing to
-     * a return value from getDynamicClassID(). For example:
-     * <pre>
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       erived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 4.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
-     * method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone()
-     * methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 4.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // RBTZ_H
-
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/rbtz.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/rbtz.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/rbtz.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/rbtz.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,361 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2008, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*/
+#ifndef RBTZ_H
+#define RBTZ_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Rule based customizable time zone
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/basictz.h"
+#include "unicode/unistr.h"
+
+U_NAMESPACE_BEGIN
+
+// forward declaration
+class UVector;
+struct Transition;
+
+/**
+ * a BasicTimeZone subclass implemented in terms of InitialTimeZoneRule and TimeZoneRule instances
+ * @see BasicTimeZone
+ * @see InitialTimeZoneRule
+ * @see TimeZoneRule
+ */
+class U_I18N_API RuleBasedTimeZone : public BasicTimeZone {
+public:
+    /**
+     * Constructs a <code>RuleBasedTimeZone</code> object with the ID and the
+     * <code>InitialTimeZoneRule</code>.  The input <code>InitialTimeZoneRule</code>
+     * is adopted by this <code>RuleBasedTimeZone</code>, thus the caller must not
+     * delete it.
+     * @param id                The time zone ID.
+     * @param initialRule       The initial time zone rule.
+     * @stable ICU 4.0
+     */
+    RuleBasedTimeZone(const UnicodeString& id, InitialTimeZoneRule* initialRule);
+
+    /**
+     * Copy constructor.
+     * @param source    The RuleBasedTimeZone object to be copied.
+     * @stable ICU 4.0
+     */
+    RuleBasedTimeZone(const RuleBasedTimeZone& source);
+
+    /**
+     * Destructor.
+     * @stable ICU 4.0
+     */
+    virtual ~RuleBasedTimeZone();
+
+    /**
+     * Assignment operator.
+     * @param right The object to be copied.
+     * @stable ICU 4.0
+     */
+    RuleBasedTimeZone& operator=(const RuleBasedTimeZone& right);
+
+    /**
+     * Return true if the given <code>TimeZone</code> objects are
+     * semantically equal. Objects of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given <code>TimeZone</code> objects are
+      *semantically equal.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator==(const TimeZone& that) const;
+
+    /**
+     * Return true if the given <code>TimeZone</code> objects are
+     * semantically unequal. Objects of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given <code>TimeZone</code> objects are
+     * semantically unequal.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator!=(const TimeZone& that) const;
+
+    /**
+     * Adds the <code>TimeZoneRule</code> which represents time transitions.
+     * The <code>TimeZoneRule</code> must have start times, that is, the result
+     * of isTransitionRule() must be true. Otherwise, U_ILLEGAL_ARGUMENT_ERROR
+     * is set to the error code.
+     * The input <code>TimeZoneRule</code> is adopted by this
+     * <code>RuleBasedTimeZone</code> on successful completion of this method,
+     * thus, the caller must not delete it when no error is returned.
+     * After all rules are added, the caller must call complete() method to
+     * make this <code>RuleBasedTimeZone</code> ready to handle common time
+     * zone functions.
+     * @param rule The <code>TimeZoneRule</code>.
+     * @param status Output param to filled in with a success or an error.
+     * @stable ICU 4.0
+     */
+    void addTransitionRule(TimeZoneRule* rule, UErrorCode& status);
+
+    /**
+     * Makes the <code>TimeZoneRule</code> ready to handle actual timezone
+     * calcuation APIs.  This method collects time zone rules specified
+     * by the caller via the constructor and addTransitionRule() and
+     * builds internal structure for making the object ready to support
+     * time zone APIs such as getOffset(), getNextTransition() and others.
+     * @param status Output param to filled in with a success or an error.
+     * @stable ICU 4.0
+     */
+    void complete(UErrorCode& status);
+
+    /**
+     * Clones TimeZone objects polymorphically. Clients are responsible for deleting
+     * the TimeZone object cloned.
+     *
+     * @return   A new copy of this TimeZone object.
+     * @stable ICU 4.0
+     */
+    virtual TimeZone* clone(void) const;
+
+    /**
+     * Returns the TimeZone's adjusted GMT offset (i.e., the number of milliseconds to add
+     * to GMT to get local time in this time zone, taking daylight savings time into
+     * account) as of a particular reference date.  The reference date is used to determine
+     * whether daylight savings time is in effect and needs to be figured into the offset
+     * that is returned (in other words, what is the adjusted GMT offset in this time zone
+     * at this particular date and time?).  For the time zones produced by createTimeZone(),
+     * the reference data is specified according to the Gregorian calendar, and the date
+     * and time fields are local standard time.
+     *
+     * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload,
+     * which returns both the raw and the DST offset for a given time. This method
+     * is retained only for backward compatibility.
+     *
+     * @param era        The reference date's era
+     * @param year       The reference date's year
+     * @param month      The reference date's month (0-based; 0 is January)
+     * @param day        The reference date's day-in-month (1-based)
+     * @param dayOfWeek  The reference date's day-of-week (1-based; 1 is Sunday)
+     * @param millis     The reference date's milliseconds in day, local standard time
+     * @param status     Output param to filled in with a success or an error.
+     * @return           The offset in milliseconds to add to GMT to get local time.
+     * @stable ICU 4.0
+     */
+    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
+                              uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const;
+
+    /**
+     * Gets the time zone offset, for current date, modified in case of
+     * daylight savings. This is the offset to add *to* UTC to get local time.
+     *
+     * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload,
+     * which returns both the raw and the DST offset for a given time. This method
+     * is retained only for backward compatibility.
+     *
+     * @param era        The reference date's era
+     * @param year       The reference date's year
+     * @param month      The reference date's month (0-based; 0 is January)
+     * @param day        The reference date's day-in-month (1-based)
+     * @param dayOfWeek  The reference date's day-of-week (1-based; 1 is Sunday)
+     * @param millis     The reference date's milliseconds in day, local standard time
+     * @param monthLength The length of the given month in days.
+     * @param status     Output param to filled in with a success or an error.
+     * @return           The offset in milliseconds to add to GMT to get local time.
+     * @stable ICU 4.0
+     */
+    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
+                           uint8_t dayOfWeek, int32_t millis,
+                           int32_t monthLength, UErrorCode& status) const;
+
+    /**
+     * Returns the time zone raw and GMT offset for the given moment
+     * in time.  Upon return, local-millis = GMT-millis + rawOffset +
+     * dstOffset.  All computations are performed in the proleptic
+     * Gregorian calendar.  The default implementation in the TimeZone
+     * class delegates to the 8-argument getOffset().
+     *
+     * @param date moment in time for which to return offsets, in
+     * units of milliseconds from January 1, 1970 0:00 GMT, either GMT
+     * time or local wall time, depending on `local'.
+     * @param local if true, `date' is local wall time; otherwise it
+     * is in GMT time.
+     * @param rawOffset output parameter to receive the raw offset, that
+     * is, the offset not including DST adjustments
+     * @param dstOffset output parameter to receive the DST offset,
+     * that is, the offset to be added to `rawOffset' to obtain the
+     * total offset between local and GMT time. If DST is not in
+     * effect, this value is zero; otherwise it is a positive value,
+     * typically one hour.
+     * @param ec input-output error code
+     * @stable ICU 4.0
+     */
+    virtual void getOffset(UDate date, UBool local, int32_t& rawOffset,
+                           int32_t& dstOffset, UErrorCode& ec) const;
+
+    /**
+     * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
+     * to GMT to get local time, before taking daylight savings time into account).
+     *
+     * @param offsetMillis  The new raw GMT offset for this time zone.
+     * @stable ICU 4.0
+     */
+    virtual void setRawOffset(int32_t offsetMillis);
+
+    /**
+     * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
+     * to GMT to get local time, before taking daylight savings time into account).
+     *
+     * @return   The TimeZone's raw GMT offset.
+     * @stable ICU 4.0
+     */
+    virtual int32_t getRawOffset(void) const;
+
+    /**
+     * Queries if this time zone uses daylight savings time.
+     * @return true if this time zone uses daylight savings time,
+     * false, otherwise.
+     * @stable ICU 4.0
+     */
+    virtual UBool useDaylightTime(void) const;
+
+    /**
+     * Queries if the given date is in daylight savings time in
+     * this time zone.
+     * This method is wasteful since it creates a new GregorianCalendar and
+     * deletes it each time it is called. This is a deprecated method
+     * and provided only for Java compatibility.
+     *
+     * @param date the given UDate.
+     * @param status Output param filled in with success/error code.
+     * @return true if the given date is in daylight savings time,
+     * false, otherwise.
+     * @deprecated ICU 2.4. Use Calendar::inDaylightTime() instead.
+     */
+    virtual UBool inDaylightTime(UDate date, UErrorCode& status) const;
+
+    /**
+     * Returns true if this zone has the same rule and offset as another zone.
+     * That is, if this zone differs only in ID, if at all.
+     * @param other the <code>TimeZone</code> object to be compared with
+     * @return true if the given zone is the same as this one,
+     * with the possible exception of the ID
+     * @stable ICU 4.0
+     */
+    virtual UBool hasSameRules(const TimeZone& other) const;
+
+    /**
+     * Gets the first time zone transition after the base time.
+     * @param base      The base time.
+     * @param inclusive Whether the base time is inclusive or not.
+     * @param result    Receives the first transition after the base time.
+     * @return  TRUE if the transition is found.
+     * @stable ICU 4.0
+     */
+    virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/;
+
+    /**
+     * Gets the most recent time zone transition before the base time.
+     * @param base      The base time.
+     * @param inclusive Whether the base time is inclusive or not.
+     * @param result    Receives the most recent transition before the base time.
+     * @return  TRUE if the transition is found.
+     * @stable ICU 4.0
+     */
+    virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/;
+
+    /**
+     * Returns the number of <code>TimeZoneRule</code>s which represents time transitions,
+     * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except
+     * <code>InitialTimeZoneRule</code>.  The return value range is 0 or any positive value.
+     * @param status    Receives error status code.
+     * @return The number of <code>TimeZoneRule</code>s representing time transitions.
+     * @stable ICU 4.0
+     */
+    virtual int32_t countTransitionRules(UErrorCode& status) /*const*/;
+
+    /**
+     * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code>
+     * which represent time transitions for this time zone.  On successful return,
+     * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and
+     * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code>
+     * instances up to the size specified by trscount.  The results are referencing the
+     * rule instance held by this time zone instance.  Therefore, after this time zone
+     * is destructed, they are no longer available.
+     * @param initial       Receives the initial timezone rule
+     * @param trsrules      Receives the timezone transition rules
+     * @param trscount      On input, specify the size of the array 'transitions' receiving
+     *                      the timezone transition rules.  On output, actual number of
+     *                      rules filled in the array will be set.
+     * @param status        Receives error status code.
+     * @stable ICU 4.0
+     */
+    virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial,
+        const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) /*const*/;
+
+    /**
+     * Get time zone offsets from local wall time.
+     * @internal
+     */
+    virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt,
+        int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) /*const*/;
+
+private:
+    void deleteRules(void);
+    void deleteTransitions(void);
+    UVector* copyRules(UVector* source);
+    TimeZoneRule* findRuleInFinal(UDate date, UBool local,
+        int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt) const;
+    UBool findNext(UDate base, UBool inclusive, UDate& time, TimeZoneRule*& from, TimeZoneRule*& to) const;
+    UBool findPrev(UDate base, UBool inclusive, UDate& time, TimeZoneRule*& from, TimeZoneRule*& to) const;
+    int32_t getLocalDelta(int32_t rawBefore, int32_t dstBefore, int32_t rawAfter, int32_t dstAfter,
+        int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt) const;
+    UDate getTransitionTime(Transition* transition, UBool local,
+        int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt) const;
+    void getOffsetInternal(UDate date, UBool local, int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt,
+        int32_t& rawOffset, int32_t& dstOffset, UErrorCode& ec) const;
+
+    InitialTimeZoneRule *fInitialRule;
+    UVector             *fHistoricRules;
+    UVector             *fFinalRules;
+    UVector             *fHistoricTransitions;
+    UBool               fUpToDate;
+
+public:
+    /**
+     * Return the class ID for this class. This is useful only for comparing to
+     * a return value from getDynamicClassID(). For example:
+     * <pre>
+     * .   Base* polymorphic_pointer = createPolymorphicObject();
+     * .   if (polymorphic_pointer->getDynamicClassID() ==
+     * .       erived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 4.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+     * method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 4.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // RBTZ_H
+
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/regex.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/regex.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/regex.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1232 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 2002-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*   file name:  regex.h
-*   encoding:   US-ASCII
-*   indentation:4
-*
-*   created on: 2002oct22
-*   created by: Andy Heninger
-*
-*   ICU Regular Expressions, API for C++
-*/
-
-#ifndef REGEX_H
-#define REGEX_H
-
-//#define REGEX_DEBUG
-
-/**
- * \file
- * \brief  C++ API:  Regular Expressions
- *
- * <h2>Regular Expression API</h2>
- *
- * <p>The ICU API for processing regular expressions consists of two classes,
- *  <code>RegexPattern</code> and <code>RegexMatcher</code>.
- *  <code>RegexPattern</code> objects represent a pre-processed, or compiled
- *  regular expression.  They are created from a regular expression pattern string,
- *  and can be used to create <code>RegexMatcher</code> objects for the pattern.</p>
- *
- * <p>Class <code>RegexMatcher</code> bundles together a regular expression
- *  pattern and a target string to which the search pattern will be applied.
- *  <code>RegexMatcher</code> includes API for doing plain find or search
- *  operations, for search and replace operations, and for obtaining detailed
- *  information about bounds of a match. </p>
- *
- * <p>Note that by constructing <code>RegexMatcher</code> objects directly from regular
- * expression pattern strings application code can be simplified and the explicit
- * need for <code>RegexPattern</code> objects can usually be eliminated.
- * </p>
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_REGULAR_EXPRESSIONS
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-#include "unicode/parseerr.h"
-
-#include "unicode/uregex.h"
-
-U_NAMESPACE_BEGIN
-
-
-// Forward Declarations...
-
-class RegexMatcher;
-class RegexPattern;
-class UVector;
-class UVector32;
-class UnicodeSet;
-struct REStackFrame;
-struct Regex8BitSet;
-class  RuleBasedBreakIterator;
-class  RegexCImpl;
-
-
-
-
-/**
- *   RBBIPatternDump   Debug function, displays the compiled form of a pattern.
- *   @internal
- */
-#ifdef REGEX_DEBUG
-U_INTERNAL void U_EXPORT2
-    RegexPatternDump(const RegexPattern *pat);
-#else
-    #define RegexPatternDump(pat)
-#endif
-
-
-
-/**
-  * Class <code>RegexPattern</code> represents a compiled regular expression.  It includes
-  * factory methods for creating a RegexPattern object from the source (string) form
-  * of a regular expression, methods for creating RegexMatchers that allow the pattern
-  * to be applied to input text, and a few convenience methods for simple common
-  * uses of regular expressions.
-  *
-  * <p>Class RegexPattern is not intended to be subclassed.</p>
-  *
-  * @stable ICU 2.4
-  */
-class U_I18N_API RegexPattern: public UObject {
-public:
-
-    /**
-     * default constructor.  Create a RegexPattern object that refers to no actual
-     *   pattern.  Not normally needed; RegexPattern objects are usually
-     *   created using the factory method <code>compile()</code>.
-     *
-     * @stable ICU 2.4
-     */
-    RegexPattern();
-
-    /**
-     * Copy Constructor.  Create a new RegexPattern object that is equivalent
-     *                    to the source object.
-     * @param source the pattern object to be copied.
-     * @stable ICU 2.4
-     */
-    RegexPattern(const RegexPattern &source);
-
-    /**
-     * Destructor.  Note that a RegexPattern object must persist so long as any
-     *  RegexMatcher objects that were created from the RegexPattern are active.
-     * @stable ICU 2.4
-     */
-    virtual ~RegexPattern();
-
-    /**
-     * Comparison operator.  Two RegexPattern objects are considered equal if they
-     * were constructed from identical source patterns using the same match flag
-     * settings.
-     * @param that a RegexPattern object to compare with "this".
-     * @return TRUE if the objects are equivalent.
-     * @stable ICU 2.4
-     */
-    UBool           operator==(const RegexPattern& that) const;
-
-    /**
-     * Comparison operator.  Two RegexPattern objects are considered equal if they
-     * were constructed from identical source patterns using the same match flag
-     * settings.
-     * @param that a RegexPattern object to compare with "this".
-     * @return TRUE if the objects are different.
-     * @stable ICU 2.4
-     */
-    inline UBool    operator!=(const RegexPattern& that) const {return ! operator ==(that);};
-
-    /**
-     * Assignment operator.  After assignment, this RegexPattern will behave identically
-     *     to the source object.
-     * @stable ICU 2.4
-     */
-    RegexPattern  &operator =(const RegexPattern &source);
-
-    /**
-     * Create an exact copy of this RegexPattern object.  Since RegexPattern is not
-     * intended to be subclasses, <code>clone()</code> and the copy construction are
-     * equivalent operations.
-     * @return the copy of this RegexPattern
-     * @stable ICU 2.4
-     */
-    virtual RegexPattern  *clone() const;
-
-
-   /**
-    * Compiles the regular expression in string form into a RegexPattern
-    * object.  These compile methods, rather than the constructors, are the usual
-    * way that RegexPattern objects are created.
-    *
-    * <p>Note that RegexPattern objects must not be deleted while RegexMatcher
-    * objects created from the pattern are active.  RegexMatchers keep a pointer
-    * back to their pattern, so premature deletion of the pattern is a
-    * catastrophic error.</p>
-    *
-    * <p>All pattern match mode flags are set to their default values.</p>
-    *
-    * <p>Note that it is often more convenient to construct a RegexMatcher directly
-    *    from a pattern string rather than separately compiling the pattern and
-    *    then creating a RegexMatcher object from the pattern.</p>
-    *
-    * @param regex The regular expression to be compiled.
-    * @param pe    Receives the position (line and column nubers) of any error
-    *              within the regular expression.)
-    * @param status A reference to a UErrorCode to receive any errors.
-    * @return      A regexPattern object for the compiled pattern.
-    *
-    * @stable ICU 2.4
-    */
-    static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
-        UParseError          &pe,
-        UErrorCode           &status);
-
-   /**
-    * Compiles the regular expression in string form into a RegexPattern
-    * object using the specified match mode flags.  These compile methods,
-    * rather than the constructors, are the usual way that RegexPattern objects
-    * are created.
-    *
-    * <p>Note that RegexPattern objects must not be deleted while RegexMatcher
-    * objects created from the pattern are active.  RegexMatchers keep a pointer
-    * back to their pattern, so premature deletion of the pattern is a
-    * catastrophic error.</p>
-    *
-    * <p>Note that it is often more convenient to construct a RegexMatcher directly
-    *    from a pattern string instead of than separately compiling the pattern and
-    *    then creating a RegexMatcher object from the pattern.</p>
-    *
-    * @param regex The regular expression to be compiled.
-    * @param flags The match mode flags to be used.
-    * @param pe    Receives the position (line and column nubers) of any error
-    *              within the regular expression.)
-    * @param status   A reference to a UErrorCode to receive any errors.
-    * @return      A regexPattern object for the compiled pattern.
-    *
-    * @stable ICU 2.4
-    */
-    static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
-        uint32_t             flags,
-        UParseError          &pe,
-        UErrorCode           &status);
-
-
-   /**
-    * Compiles the regular expression in string form into a RegexPattern
-    * object using the specified match mode flags.  These compile methods,
-    * rather than the constructors, are the usual way that RegexPattern objects
-    * are created.
-    *
-    * <p>Note that RegexPattern objects must not be deleted while RegexMatcher
-    * objects created from the pattern are active.  RegexMatchers keep a pointer
-    * back to their pattern, so premature deletion of the pattern is a
-    * catastrophic error.</p>
-    *
-    * <p>Note that it is often more convenient to construct a RegexMatcher directly
-    *    from a pattern string instead of than separately compiling the pattern and
-    *    then creating a RegexMatcher object from the pattern.</p>
-    *
-    * @param regex The regular expression to be compiled.
-    * @param flags The match mode flags to be used.
-    * @param status   A reference to a UErrorCode to receive any errors.
-    * @return      A regexPattern object for the compiled pattern.
-    *
-    * @stable ICU 2.6
-    */
-    static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
-        uint32_t             flags,
-        UErrorCode           &status);
-
-
-   /**
-    * Get the match mode flags that were used when compiling this pattern.
-    * @return  the match mode flags
-    * @stable ICU 2.4
-    */
-    virtual uint32_t flags() const;
-
-   /**
-    * Creates a RegexMatcher that will match the given input against this pattern.  The
-    * RegexMatcher can then be used to perform match, find or replace operations
-    * on the input.  Note that a RegexPattern object must not be deleted while
-    * RegexMatchers created from it still exist and might possibly be used again.
-    * <p>
-    * The matcher will retain a reference to the supplied input string, and all regexp
-    * pattern matching operations happen directly on this original string.  It is
-    * critical that the string not be altered or deleted before use by the regular
-    * expression operations is complete.
-    *
-    * @param input    The input string to which the regular expression will be applied.
-    * @param status   A reference to a UErrorCode to receive any errors.
-    * @return         A RegexMatcher object for this pattern and input.
-    *
-    * @stable ICU 2.4
-    */
-    virtual RegexMatcher *matcher(const UnicodeString &input,
-        UErrorCode          &status) const;
-
-private:
-    /**
-     * Cause a compilation error if an application accidently attempts to
-     *   create a matcher with a (UChar *) string as input rather than
-     *   a UnicodeString.  Avoids a dangling reference to a temporary string.
-     * <p>
-     * To efficiently work with UChar *strings, wrap the data in a UnicodeString
-     * using one of the aliasing constructors, such as
-     * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code>
-     *
-     * @internal
-     */
-    RegexMatcher *matcher(const UChar *input,
-        UErrorCode          &status) const;
-public:
-
-
-   /**
-    * Creates a RegexMatcher that will match against this pattern.  The
-    * RegexMatcher can be used to perform match, find or replace operations.
-    * Note that a RegexPattern object must not be deleted while
-    * RegexMatchers created from it still exist and might possibly be used again.
-    *
-    * @param status   A reference to a UErrorCode to receive any errors.
-    * @return      A RegexMatcher object for this pattern and input.
-    *
-    * @stable ICU 2.6
-    */
-    virtual RegexMatcher *matcher(UErrorCode  &status) const;
-
-
-   /**
-    * Test whether a string matches a regular expression.  This convenience function
-    * both compiles the reguluar expression and applies it in a single operation.
-    * Note that if the same pattern needs to be applied repeatedly, this method will be
-    * less efficient than creating and reusing a RegexMatcher object.
-    *
-    * @param regex The regular expression
-    * @param input The string data to be matched
-    * @param pe Receives the position of any syntax errors within the regular expression
-    * @param status A reference to a UErrorCode to receive any errors.
-    * @return True if the regular expression exactly matches the full input string.
-    *
-    * @stable ICU 2.4
-    */
-    static UBool U_EXPORT2 matches(const UnicodeString   &regex,
-        const UnicodeString   &input,
-        UParseError     &pe,
-        UErrorCode      &status);
-
-
-   /**
-    *    Returns the regular expression from which this pattern was compiled.
-    *    @stable ICU 2.4
-    */
-    virtual UnicodeString pattern() const;
-
-
-    /**
-     * Split a string into fields.  Somewhat like split() from Perl.
-     * The pattern matches identify delimiters that separate the input
-     *  into fields.  The input data between the matches becomes the
-     *  fields themselves.
-     * <p>
-     *  For the best performance on split() operations,
-     *  <code>RegexMatcher::split</code> is perferable to this function
-     *
-     * @param input   The string to be split into fields.  The field delimiters
-     *                match the pattern (in the "this" object)
-     * @param dest    An array of UnicodeStrings to receive the results of the split.
-     *                This is an array of actual UnicodeString objects, not an
-     *                array of pointers to strings.  Local (stack based) arrays can
-     *                work well here.
-     * @param destCapacity  The number of elements in the destination array.
-     *                If the number of fields found is less than destCapacity, the
-     *                extra strings in the destination array are not altered.
-     *                If the number of destination strings is less than the number
-     *                of fields, the trailing part of the input string, including any
-     *                field delimiters, is placed in the last destination string.
-     * @param status  A reference to a UErrorCode to receive any errors.
-     * @return        The number of fields into which the input string was split.
-     * @stable ICU 2.4
-     */
-    virtual int32_t  split(const UnicodeString &input,
-        UnicodeString    dest[],
-        int32_t          destCapacity,
-        UErrorCode       &status) const;
-
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.4
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.4
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-private:
-    //
-    //  Implementation Data
-    //
-    UnicodeString   fPattern;      // The original pattern string.
-    uint32_t        fFlags;        // The flags used when compiling the pattern.
-                                   //
-    UVector32       *fCompiledPat; // The compiled pattern p-code.
-    UnicodeString   fLiteralText;  // Any literal string data from the pattern,
-                                   //   after un-escaping, for use during the match.
-
-    UVector         *fSets;        // Any UnicodeSets referenced from the pattern.
-    Regex8BitSet    *fSets8;       //      (and fast sets for latin-1 range.)
-
-
-    UErrorCode      fDeferredStatus; // status if some prior error has left this
-                                   //  RegexPattern in an unusable state.
-
-    int32_t         fMinMatchLen;  // Minimum Match Length.  All matches will have length
-                                   //   >= this value.  For some patterns, this calculated
-                                   //   value may be less than the true shortest
-                                   //   possible match.
-
-    int32_t         fFrameSize;    // Size of a state stack frame in the
-                                   //   execution engine.
-
-    int32_t         fDataSize;     // The size of the data needed by the pattern that
-                                   //   does not go on the state stack, but has just
-                                   //   a single copy per matcher.
-
-    UVector32       *fGroupMap;    // Map from capture group number to position of
-                                   //   the group's variables in the matcher stack frame.
-
-    int32_t         fMaxCaptureDigits;
-
-    UnicodeSet     **fStaticSets;  // Ptr to static (shared) sets for predefined
-                                   //   regex character classes, e.g. Word.
-
-    Regex8BitSet   *fStaticSets8;  // Ptr to the static (shared) latin-1 only
-                                   //  sets for predefined regex classes.
-
-    int32_t         fStartType;    // Info on how a match must start.
-    int32_t         fInitialStringIdx;     //
-    int32_t         fInitialStringLen;
-    UnicodeSet     *fInitialChars;
-    UChar32         fInitialChar;
-    Regex8BitSet   *fInitialChars8;
-
-    friend class RegexCompile;
-    friend class RegexMatcher;
-    friend class RegexCImpl;
-
-    //
-    //  Implementation Methods
-    //
-    void        init();            // Common initialization, for use by constructors.
-    void        zap();             // Common cleanup
-#ifdef REGEX_DEBUG
-    void        dumpOp(int32_t index) const;
-    friend     void U_EXPORT2 RegexPatternDump(const RegexPattern *);
-#endif
-
-};
-
-
-
-/**
- *  class RegexMatcher bundles together a reular expression pattern and
- *  input text to which the expression can be applied.  It includes methods
- *  for testing for matches, and for find and replace operations.
- *
- * <p>Class RegexMatcher is not intended to be subclassed.</p>
- *
- * @stable ICU 2.4
- */
-class U_I18N_API RegexMatcher: public UObject {
-public:
-
-    /**
-      * Construct a RegexMatcher for a regular expression.
-      * This is a convenience method that avoids the need to explicitly create
-      * a RegexPattern object.  Note that if several RegexMatchers need to be
-      * created for the same expression, it will be more efficient to
-      * separately create and cache a RegexPattern object, and use
-      * its matcher() method to create the RegexMatcher objects.
-      *
-      *  @param regexp The Regular Expression to be compiled.
-      *  @param flags  Regular expression options, such as case insensitive matching.
-      *                @see UREGEX_CASE_INSENSITIVE
-      *  @param status Any errors are reported by setting this UErrorCode variable.
-      *  @stable ICU 2.6
-      */
-    RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status);
-
-    /**
-      * Construct a RegexMatcher for a regular expression.
-      * This is a convenience method that avoids the need to explicitly create
-      * a RegexPattern object.  Note that if several RegexMatchers need to be
-      * created for the same expression, it will be more efficient to
-      * separately create and cache a RegexPattern object, and use
-      * its matcher() method to create the RegexMatcher objects.
-      * <p>
-      * The matcher will retain a reference to the supplied input string, and all regexp
-      * pattern matching operations happen directly on the original string.  It is
-      * critical that the string not be altered or deleted before use by the regular
-      * expression operations is complete.
-      *
-      *  @param regexp The Regular Expression to be compiled.
-      *  @param input  The string to match.  The matcher retains a reference to the
-      *                caller's string; mo copy is made.
-      *  @param flags  Regular expression options, such as case insensitive matching.
-      *                @see UREGEX_CASE_INSENSITIVE
-      *  @param status Any errors are reported by setting this UErrorCode variable.
-      *  @stable ICU 2.6
-      */
-    RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
-        uint32_t flags, UErrorCode &status);
-
-private:
-    /**
-     * Cause a compilation error if an application accidently attempts to
-     *   create a matcher with a (UChar *) string as input rather than
-     *   a UnicodeString.    Avoids a dangling reference to a temporary string.
-     * <p>
-     * To efficiently work with UChar *strings, wrap the data in a UnicodeString
-     * using one of the aliasing constructors, such as
-     * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code>
-     *
-     * @internal
-     */
-    RegexMatcher(const UnicodeString &regexp, const UChar *input,
-        uint32_t flags, UErrorCode &status);
-public:
-
-
-   /**
-    *   Destructor.
-    *
-    *  @stable ICU 2.4
-    */
-    virtual ~RegexMatcher();
-
-
-   /**
-    *   Attempts to match the entire input region against the pattern.
-    *    @param   status     A reference to a UErrorCode to receive any errors.
-    *    @return TRUE if there is a match
-    *    @stable ICU 2.4
-    */
-    virtual UBool matches(UErrorCode &status);
-
-   /**
-    *   Resets the matcher, then attempts to match the input beginning 
-    *   at the specified startIndex, and extending to the end of the input.
-    *   The input region is reset to include the entire input string.
-    *   A successful match must extend to the end of the input.
-    *    @param   startIndex The input string index at which to begin matching.
-    *    @param   status     A reference to a UErrorCode to receive any errors.
-    *    @return TRUE if there is a match
-    *    @stable ICU 2.8
-    */
-    virtual UBool matches(int32_t startIndex, UErrorCode &status);
-
-
-
-
-   /**
-    *   Attempts to match the input string, starting from the beginning of the region,
-    *   against the pattern.  Like the matches() method, this function 
-    *   always starts at the beginning of the input region;
-    *   unlike that function, it does not require that the entire region be matched.
-    *
-    *   <p>If the match succeeds then more information can be obtained via the <code>start()</code>,
-    *     <code>end()</code>, and <code>group()</code> functions.</p>
-    *
-    *    @param   status     A reference to a UErrorCode to receive any errors.
-    *    @return  TRUE if there is a match at the start of the input string.
-    *    @stable ICU 2.4
-    */
-    virtual UBool lookingAt(UErrorCode &status);
-
-
-  /**
-    *   Attempts to match the input string, starting from the specified index, against the pattern.
-    *   The match may be of any length, and is not required to extend to the end
-    *   of the input string.  Contrast with match().
-    *
-    *   <p>If the match succeeds then more information can be obtained via the <code>start()</code>,
-    *     <code>end()</code>, and <code>group()</code> functions.</p>
-    *
-    *    @param   startIndex The input string index at which to begin matching.
-    *    @param   status     A reference to a UErrorCode to receive any errors.
-    *    @return  TRUE if there is a match.
-    *    @stable ICU 2.8
-    */
-    virtual UBool lookingAt(int32_t startIndex, UErrorCode &status);
-
-   /**
-    *  Find the next pattern match in the input string.
-    *  The find begins searching the input at the location following the end of
-    *  the previous match, or at the start of the string if there is no previous match.
-    *  If a match is found, <code>start(), end()</code> and <code>group()</code>
-    *  will provide more information regarding the match.
-    *  <p>Note that if the input string is changed by the application,
-    *     use find(startPos, status) instead of find(), because the saved starting
-    *     position may not be valid with the altered input string.</p>
-    *  @return  TRUE if a match is found.
-    *  @stable ICU 2.4
-    */
-    virtual UBool find();
-
-
-   /**
-    *   Resets this RegexMatcher and then attempts to find the next substring of the
-    *   input string that matches the pattern, starting at the specified index.
-    *
-    *   @param   start     the position in the input string to begin the search
-    *   @param   status    A reference to a UErrorCode to receive any errors.
-    *   @return  TRUE if a match is found.
-    *   @stable ICU 2.4
-    */
-    virtual UBool find(int32_t start, UErrorCode &status);
-
-
-   /**
-    *   Returns a string containing the text matched by the previous match.
-    *   If the pattern can match an empty string, an empty string may be returned.
-    *   @param   status      A reference to a UErrorCode to receive any errors.
-    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
-    *                        has been attempted or the last match failed.
-    *   @return  a string containing the matched input text.
-    *   @stable ICU 2.4
-    */
-    virtual UnicodeString group(UErrorCode &status) const;
-
-
-   /**
-    *    Returns a string containing the text captured by the given group
-    *    during the previous match operation.  Group(0) is the entire match.
-    *
-    *    @param groupNum the capture group number
-    *    @param   status     A reference to a UErrorCode to receive any errors.
-    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
-    *                        has been attempted or the last match failed and
-    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
-    *    @return the captured text
-    *    @stable ICU 2.4
-    */
-    virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
-
-
-   /**
-    *   Returns the number of capturing groups in this matcher's pattern.
-    *   @return the number of capture groups
-    *   @stable ICU 2.4
-    */
-    virtual int32_t groupCount() const;
-
-
-   /**
-    *   Returns the index in the input string of the start of the text matched
-    *   during the previous match operation.
-    *    @param   status      a reference to a UErrorCode to receive any errors.
-    *    @return              The position in the input string of the start of the last match.
-    *    @stable ICU 2.4
-    */
-    virtual int32_t start(UErrorCode &status) const;
-
-
-   /**
-    *   Returns the index in the input string of the start of the text matched by the
-    *    specified capture group during the previous match operation.  Return -1 if
-    *    the capture group exists in the pattern, but was not part of the last match.
-    *
-    *    @param  group       the capture group number
-    *    @param  status      A reference to a UErrorCode to receive any errors.  Possible
-    *                        errors are  U_REGEX_INVALID_STATE if no match has been
-    *                        attempted or the last match failed, and
-    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
-    *    @return the start position of substring matched by the specified group.
-    *    @stable ICU 2.4
-    */
-    virtual int32_t start(int32_t group, UErrorCode &status) const;
-
-
-   /**
-    *    Returns the index in the input string of the first character following the
-    *    text matched during the previous match operation.
-    *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
-    *                        errors are  U_REGEX_INVALID_STATE if no match has been
-    *                        attempted or the last match failed.
-    *    @return the index of the last character matched, plus one.
-    *   @stable ICU 2.4
-    */
-    virtual int32_t end(UErrorCode &status) const;
-
-
-   /**
-    *    Returns the index in the input string of the character following the
-    *    text matched by the specified capture group during the previous match operation.
-    *    @param group  the capture group number
-    *    @param   status      A reference to a UErrorCode to receive any errors.  Possible
-    *                        errors are  U_REGEX_INVALID_STATE if no match has been
-    *                        attempted or the last match failed and
-    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
-    *    @return  the index of the first character following the text
-    *              captured by the specifed group during the previous match operation.
-    *              Return -1 if the capture group exists in the pattern but was not part of the match.
-    *    @stable ICU 2.4
-    */
-    virtual int32_t end(int32_t group, UErrorCode &status) const;
-
-
-   /**
-    *   Resets this matcher.  The effect is to remove any memory of previous matches,
-    *       and to cause subsequent find() operations to begin at the beginning of
-    *       the input string.
-    *
-    *   @return this RegexMatcher.
-    *   @stable ICU 2.4
-    */
-    virtual RegexMatcher &reset();
-
-
-   /**
-    *   Resets this matcher, and set the current input position.
-    *   The effect is to remove any memory of previous matches,
-    *       and to cause subsequent find() operations to begin at
-    *       the specified position in the input string.
-    * <p>
-    *   The matcher's region is reset to its default, which is the entire
-    *   input string.
-    * <p>
-    *   An alternative to this function is to set a match region
-    *   beginning at the desired index.
-    *
-    *   @return this RegexMatcher.
-    *   @stable ICU 2.8
-    */
-    virtual RegexMatcher &reset(int32_t index, UErrorCode &status);
-
-
-   /**
-    *   Resets this matcher with a new input string.  This allows instances of RegexMatcher
-    *     to be reused, which is more efficient than creating a new RegexMatcher for
-    *     each input string to be processed.
-    *   @param input The new string on which subsequent pattern matches will operate.
-    *                The matcher retains a reference to the callers string, and operates
-    *                directly on that.  Ownership of the string remains with the caller.
-    *                Because no copy of the string is made, it is essential that the
-    *                caller not delete the string until after regexp operations on it
-    *                are done.
-    *   @return this RegexMatcher.
-    *   @stable ICU 2.4
-    */
-    virtual RegexMatcher &reset(const UnicodeString &input);
-
-private:
-    /**
-     * Cause a compilation error if an application accidently attempts to
-     *   reset a matcher with a (UChar *) string as input rather than
-     *   a UnicodeString.    Avoids a dangling reference to a temporary string.
-     * <p>
-     * To efficiently work with UChar *strings, wrap the data in a UnicodeString
-     * using one of the aliasing constructors, such as
-     * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code>
-     *
-     * @internal
-     */
-    RegexMatcher &reset(const UChar *input);
-public:
-
-   /**
-    *   Returns the input string being matched.  The returned string is not a copy,
-    *   but the live input string.  It should not be altered or deleted.
-    *   @return the input string
-    *   @stable ICU 2.4
-    */
-    virtual const UnicodeString &input() const;
-    
-    
-
-   /** Sets the limits of this matcher's region.
-     * The region is the part of the input string that will be searched to find a match.
-     * Invoking this method resets the matcher, and then sets the region to start
-     * at the index specified by the start parameter and end at the index specified
-     * by the end parameter.
-     *
-     * Depending on the transparency and anchoring being used (see useTransparentBounds
-     * and useAnchoringBounds), certain constructs such as anchors may behave differently
-     * at or around the boundaries of the region
-     *
-     * The function will fail if start is greater than limit, or if either index
-     *  is less than zero or greater than the length of the string being matched.
-     *
-     * @param start  The index to begin searches at.
-     * @param limit  The index to end searches at (exclusive).
-     * @param status A reference to a UErrorCode to receive any errors.
-     * @draft ICU 4.0
-     */
-     virtual RegexMatcher &region(int32_t start, int32_t limit, UErrorCode &status);
-
-
-   /**
-     * Reports the start index of this matcher's region. The searches this matcher
-     * conducts are limited to finding matches within regionStart (inclusive) and
-     * regionEnd (exclusive).
-     *
-     * @return The starting index of this matcher's region.
-     * @draft ICU 4.0
-     */
-     virtual int32_t regionStart() const;
-
-
-    /**
-      * Reports the end (limit) index (exclusive) of this matcher's region. The searches
-      * this matcher conducts are limited to finding matches within regionStart
-      * (inclusive) and regionEnd (exclusive).
-      *
-      * @return The ending point of this matcher's region.
-      * @draft ICU 4.0
-      */
-      virtual int32_t regionEnd() const;
-
-    /**
-      * Queries the transparency of region bounds for this matcher.
-      * See useTransparentBounds for a description of transparent and opaque bounds.
-      * By default, a matcher uses opaque region boundaries.
-      *
-      * @return TRUE if this matcher is using opaque bounds, false if it is not.
-      * @draft ICU 4.0
-      */
-      virtual UBool hasTransparentBounds() const;
-
-    /**
-      * Sets the transparency of region bounds for this matcher.
-      * Invoking this function with an argument of true will set this matcher to use transparent bounds.
-      * If the boolean argument is false, then opaque bounds will be used.
-      *
-      * Using transparent bounds, the boundaries of this matcher's region are transparent
-      * to lookahead, lookbehind, and boundary matching constructs. Those constructs can
-      * see text beyond the boundaries of the region while checking for a match.
-      *
-      * With opaque bounds, no text outside of the matcher's region is visible to lookahead,
-      * lookbehind, and boundary matching constructs.
-      *
-      * By default, a matcher uses opaque bounds.
-      *
-      * @param   b TRUE for transparent bounds; FALSE for opaque bounds
-      * @return  This Matcher;
-      * @draft   ICU 4.0
-      **/
-      virtual RegexMatcher &useTransparentBounds(UBool b);
-
-     
-    /**
-      * Return true if this matcher is using anchoring bounds.
-      * By default, matchers use anchoring region boounds.
-      *
-      * @return TRUE if this matcher is using anchoring bounds.
-      * @draft  ICU 4.0
-      */    
-      virtual UBool hasAnchoringBounds() const;
-
-    /**
-      * Set whether this matcher is using Anchoring Bounds for its region.
-      * With anchoring bounds, pattern anchors such as ^ and $ will match at the start
-      * and end of the region.  Without Anchoring Bounds, anchors will only match at
-      * the positions they would in the complete text.
-      *
-      * Anchoring Bounds are the default for regions.
-      *
-      * @param b TRUE if to enable anchoring bounds; FALSE to disable them.
-      * @return  This Matcher
-      * @draft   ICU 4.0
-      */
-      virtual RegexMatcher &useAnchoringBounds(UBool b);
-
-    /**
-      * Return TRUE if the most recent matching operation touched the
-      *  end of the text being processed.  In this case, additional input text could
-      *  change the results of that match.
-      *
-      *  hitEnd() is defined for both successful and unsuccessful matches.
-      *  In either case hitEnd() will return TRUE if if the end of the text was
-      *  reached at any point during the matching process.
-      *
-      *  @return  TRUE if the most recent match hit the end of input
-      *  @draft   ICU 4.0
-      */
-      virtual UBool hitEnd() const;
-
-    /**
-      * Return TRUE the most recent match succeeded and additional input could cause
-      * it to fail. If this method returns false and a match was found, then more input
-      * might change the match but the match won't be lost. If a match was not found,
-      * then requireEnd has no meaning.
-      *
-      * @return TRUE if more input could cause the most recent match to no longer match.
-      * @draft  ICU 4.0
-      */
-      virtual UBool requireEnd() const;
-
-
-
-
-
-   /**
-    *    Returns the pattern that is interpreted by this matcher.
-    *    @return  the RegexPattern for this RegexMatcher
-    *    @stable ICU 2.4
-    */
-    virtual const RegexPattern &pattern() const;
-
-
-   /**
-    *    Replaces every substring of the input that matches the pattern
-    *    with the given replacement string.  This is a convenience function that
-    *    provides a complete find-and-replace-all operation.
-    *
-    *    This method first resets this matcher. It then scans the input string
-    *    looking for matches of the pattern. Input that is not part of any
-    *    match is left unchanged; each match is replaced in the result by the
-    *    replacement string. The replacement string may contain references to
-    *    capture groups.
-    *
-    *    @param   replacement a string containing the replacement text.
-    *    @param   status      a reference to a UErrorCode to receive any errors.
-    *    @return              a string containing the results of the find and replace.
-    *    @stable ICU 2.4
-    */
-    virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
-
-
-   /**
-    * Replaces the first substring of the input that matches
-    * the pattern with the replacement string.   This is a convenience
-    * function that provides a complete find-and-replace operation.
-    *
-    * <p>This function first resets this RegexMatcher. It then scans the input string
-    * looking for a match of the pattern. Input that is not part
-    * of the match is appended directly to the result string; the match is replaced
-    * in the result by the replacement string. The replacement string may contain
-    * references to captured groups.</p>
-    *
-    * <p>The state of the matcher (the position at which a subsequent find()
-    *    would begin) after completing a replaceFirst() is not specified.  The
-    *    RegexMatcher should be reset before doing additional find() operations.</p>
-    *
-    *    @param   replacement a string containing the replacement text.
-    *    @param   status      a reference to a UErrorCode to receive any errors.
-    *    @return              a string containing the results of the find and replace.
-    *    @stable ICU 2.4
-    */
-    virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
-
-   /**
-    *   Implements a replace operation intended to be used as part of an
-    *   incremental find-and-replace.
-    *
-    *   <p>The input string, starting from the end of the previous replacement and ending at
-    *   the start of the current match, is appended to the destination string.  Then the
-    *   replacement string is appended to the output string,
-    *   including handling any substitutions of captured text.</p>
-    *
-    *   <p>For simple, prepackaged, non-incremental find-and-replace
-    *   operations, see replaceFirst() or replaceAll().</p>
-    *
-    *   @param   dest        A UnicodeString to which the results of the find-and-replace are appended.
-    *   @param   replacement A UnicodeString that provides the text to be substituted for
-    *                        the input text that matched the regexp pattern.  The replacement
-    *                        text may contain references to captured text from the
-    *                        input.
-    *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
-    *                        errors are  U_REGEX_INVALID_STATE if no match has been
-    *                        attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR
-    *                        if the replacement text specifies a capture group that
-    *                        does not exist in the pattern.
-    *
-    *   @return  this  RegexMatcher
-    *   @stable ICU 2.4
-    *
-    */
-    virtual RegexMatcher &appendReplacement(UnicodeString &dest,
-        const UnicodeString &replacement, UErrorCode &status);
-
-
-   /**
-    * As the final step in a find-and-replace operation, append the remainder
-    * of the input string, starting at the position following the last appendReplacement(),
-    * to the destination string. <code>appendTail()</code> is intended to be invoked after one
-    * or more invocations of the <code>RegexMatcher::appendReplacement()</code>.
-    *
-    *  @param dest A UnicodeString to which the results of the find-and-replace are appended.
-    *  @return  the destination string.
-    *  @stable ICU 2.4
-    */
-    virtual UnicodeString &appendTail(UnicodeString &dest);
-
-
-
-    /**
-     * Split a string into fields.  Somewhat like split() from Perl.
-     * The pattern matches identify delimiters that separate the input
-     *  into fields.  The input data between the matches becomes the
-     *  fields themselves.
-     * <p>
-     *
-     * @param input   The string to be split into fields.  The field delimiters
-     *                match the pattern (in the "this" object).  This matcher
-     *                will be reset to this input string.
-     * @param dest    An array of UnicodeStrings to receive the results of the split.
-     *                This is an array of actual UnicodeString objects, not an
-     *                array of pointers to strings.  Local (stack based) arrays can
-     *                work well here.
-     * @param destCapacity  The number of elements in the destination array.
-     *                If the number of fields found is less than destCapacity, the
-     *                extra strings in the destination array are not altered.
-     *                If the number of destination strings is less than the number
-     *                of fields, the trailing part of the input string, including any
-     *                field delimiters, is placed in the last destination string.
-     * @param status  A reference to a UErrorCode to receive any errors.
-     * @return        The number of fields into which the input string was split.
-     * @stable ICU 2.6
-     */
-    virtual int32_t  split(const UnicodeString &input,
-        UnicodeString    dest[],
-        int32_t          destCapacity,
-        UErrorCode       &status);
-
-  /**
-    *   Set a processing time limit for match operations with this Matcher.
-    *  
-    *   Some patterns, when matching certain strings, can run in exponential time.
-    *   For practical purposes, the match operation may appear to be in an
-    *   infinite loop.
-    *   When a limit is set a match operation will fail with an error if the
-    *   limit is exceeded.
-    *   <p>
-    *   The units of the limit are steps of the match engine.
-    *   Correspondence with actual processor time will depend on the speed
-    *   of the processor and the details of the specific pattern, but will
-    *   typically be on the order of milliseconds.
-    *   <p>
-    *   By default, the matching time is not limited.
-    *   <p>
-    *
-    *   @param   limit       The limit value, or 0 for no limit.
-    *   @param   status      A reference to a UErrorCode to receive any errors.
-    *   @draft ICU 4.0
-    */
-    virtual void setTimeLimit(int32_t limit, UErrorCode &status);
-
-  /**
-    * Get the time limit, if any, for match operations made with this Matcher.
-    *
-    *   @return the maximum allowed time for a match, in units of processing steps.
-    *   @draft ICU 4.0
-    */
-    virtual int32_t getTimeLimit() const;
-
-  /**
-    *  Set the amount of heap storage avaliable for use by the match backtracking stack.
-    *  The matcher is also reset, discarding any results from previous matches.
-    *  <p>
-    *  ICU uses a backtracking regular expression engine, with the backtrack stack
-    *  maintained on the heap.  This function sets the limit to the amount of memory
-    *  that can be used  for this purpose.  A backtracking stack overflow will
-    *  result in an error from the match operation that caused it.
-    *  <p>
-    *  A limit is desirable because a malicious or poorly designed pattern can use
-    *  excessive memory, potentially crashing the process.  A limit is enabled
-    *  by default.
-    *  <p>
-    *  @param limit  The maximum size, in bytes, of the matching backtrack stack.
-    *                A value of zero means no limit.
-    *                The limit must be greater or equal to zero.
-    *
-    *  @param status   A reference to a UErrorCode to receive any errors.
-    *
-    *  @draft ICU 4.0
-    */
-    virtual void setStackLimit(int32_t  limit, UErrorCode &status);
-    
-  /**
-    *  Get the size of the heap storage available for use by the back tracking stack.
-    *
-    *  @return  the maximum backtracking stack size, in bytes, or zero if the
-    *           stack size is unlimited.
-    *  @draft ICU 4.0
-    */
-    virtual int32_t  getStackLimit() const;
-
-
-  /**
-    * Set a callback function for use with this Matcher.
-    * During matching operations the function will be called periodically,
-    * giving the application the opportunity to terminate a long-running
-    * match.
-    *
-    *    @param   callback    A pointer to the user-supplied callback function.
-    *    @param   context     User context pointer.  The value supplied at the
-    *                         time the callback function is set will be saved
-    *                         and passed to the callback each time that it is called.
-    *    @param   status      A reference to a UErrorCode to receive any errors.
-    *  @draft ICU 4.0
-    */
-    virtual void setMatchCallback(URegexMatchCallback     *callback,
-                                  const void              *context,
-                                  UErrorCode              &status);
-
-
-
-  /**
-    *  Get the callback function for this URegularExpression.
-    *
-    *    @param   callback    Out paramater, receives a pointer to the user-supplied 
-    *                         callback function.
-    *    @param   context     Out parameter, receives the user context pointer that
-    *                         was set when uregex_setMatchCallback() was called.
-    *    @param   status      A reference to a UErrorCode to receive any errors.
-    *    @draft ICU 4.0
-    */
-    virtual void getMatchCallback(URegexMatchCallback     *&callback,
-                                  const void              *&context,
-                                  UErrorCode              &status);
-
-
-   /**
-     *   setTrace   Debug function, enable/disable tracing of the matching engine.
-     *              For internal ICU development use only.  DO NO USE!!!!
-     *   @internal
-     */
-    void setTrace(UBool state);
-
-
-    /**
-    * ICU "poor man's RTTI", returns a UClassID for this class.
-    *
-    * @stable ICU 2.2
-    */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.2
-     */
-    virtual UClassID getDynamicClassID() const;
-
-private:
-    // Constructors and other object boilerplate are private.
-    // Instances of RegexMatcher can not be assigned, copied, cloned, etc.
-    RegexMatcher();                  // default constructor not implemented
-    RegexMatcher(const RegexPattern *pat);
-    RegexMatcher(const RegexMatcher &other);
-    RegexMatcher &operator =(const RegexMatcher &rhs);
-    void init(UErrorCode &status);                      // Common initialization
-    void init2(const UnicodeString &s, UErrorCode &e);  // Common initialization, part 2.
-
-    friend class RegexPattern;
-    friend class RegexCImpl;
-public:
-    /** @internal  */
-    void resetPreserveRegion();  // Reset matcher state, but preserve any region.
-private:
-
-    //
-    //  MatchAt   This is the internal interface to the match engine itself.
-    //            Match status comes back in matcher member variables.
-    //
-    void                 MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
-    inline void          backTrack(int32_t &inputIdx, int32_t &patIdx);
-    UBool                isWordBoundary(int32_t pos);         // perform Perl-like  \b test
-    UBool                isUWordBoundary(int32_t pos);        // perform RBBI based \b test
-    REStackFrame        *resetStack();
-    inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx, UErrorCode &status);
-    void                 IncrementTime(UErrorCode &status);
-
-
-    const RegexPattern  *fPattern;
-    RegexPattern        *fPatternOwned;    // Non-NULL if this matcher owns the pattern, and
-                                           //   should delete it when through.
-
-    const UnicodeString *fInput;           // The text being matched. Is never NULL.
-    int32_t              fFrameSize;       // The size of a frame in the backtrack stack.
-    
-    int32_t              fRegionStart;     // Start of the input region, default = 0.
-    int32_t              fRegionLimit;     // End of input region, default to input.length.
-    
-    int32_t              fAnchorStart;     // Region bounds for anchoring operations (^ or $).
-    int32_t              fAnchorLimit;     //   See useAnchoringBounds
-    
-    int32_t              fLookStart;       // Region bounds for look-ahead/behind and
-    int32_t              fLookLimit;       //   and other boundary tests.  See
-                                           //   useTransparentBounds
-
-    int32_t              fActiveStart;     // Currently active bounds for matching.
-    int32_t              fActiveLimit;     //   Usually is the same as region, but
-                                           //   is changed to fLookStart/Limit when
-                                           //   entering look around regions.
-
-    UBool                fTransparentBounds;  // True if using transparent bounds.
-    UBool                fAnchoringBounds; // True if using anchoring bounds.
-
-    UBool                fMatch;           // True if the last attempted match was successful.
-    int32_t              fMatchStart;      // Position of the start of the most recent match
-    int32_t              fMatchEnd;        // First position after the end of the most recent match
-                                           //   Zero if no previous match, even when a region
-                                           //   is active.
-    int32_t              fLastMatchEnd;    // First position after the end of the previous match,
-                                           //   or -1 if there was no previous match.
-    int32_t              fAppendPosition;  // First position after the end of the previous
-                                           //   appendReplacement().  As described by the
-                                           //   JavaDoc for Java Matcher, where it is called 
-                                           //   "append position"
-    UBool                fHitEnd;          // True if the last match touched the end of input.
-    UBool                fRequireEnd;      // True if the last match required end-of-input
-                                           //    (matched $ or Z)
-
-    UVector32           *fStack;
-    REStackFrame        *fFrame;           // After finding a match, the last active stack frame,
-                                           //   which will contain the capture group results.
-                                           //   NOT valid while match engine is running.
-
-    int32_t             *fData;            // Data area for use by the compiled pattern.
-    int32_t             fSmallData[8];     //   Use this for data if it's enough.
-
-    int32_t             fTimeLimit;        // Max time (in arbitrary steps) to let the
-                                           //   match engine run.  Zero for unlimited.
-    
-    int32_t             fTime;             // Match time, accumulates while matching.
-    int32_t             fTickCounter;      // Low bits counter for time.  Counts down StateSaves.
-                                           //   Kept separately from fTime to keep as much
-                                           //   code as possible out of the inline
-                                           //   StateSave function.
-
-    int32_t             fStackLimit;       // Maximum memory size to use for the backtrack
-                                           //   stack, in bytes.  Zero for unlimited.
-
-    URegexMatchCallback *fCallbackFn;       // Pointer to match progress callback funct.
-                                           //   NULL if there is no callback.
-    const void         *fCallbackContext;  // User Context ptr for callback function.
-
-    UBool               fTraceDebug;       // Set true for debug tracing of match engine.
-
-    UErrorCode          fDeferredStatus;   // Save error state that cannot be immediately
-                                           //   reported, or that permanently disables this matcher.
-
-    RuleBasedBreakIterator  *fWordBreakItr;
-
-
-};
-
-U_NAMESPACE_END
-#endif  // UCONFIG_NO_REGULAR_EXPRESSIONS
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/regex.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/regex.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/regex.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/regex.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1232 @@
+/*
+**********************************************************************
+*   Copyright (C) 2002-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  regex.h
+*   encoding:   US-ASCII
+*   indentation:4
+*
+*   created on: 2002oct22
+*   created by: Andy Heninger
+*
+*   ICU Regular Expressions, API for C++
+*/
+
+#ifndef REGEX_H
+#define REGEX_H
+
+//#define REGEX_DEBUG
+
+/**
+ * \file
+ * \brief  C++ API:  Regular Expressions
+ *
+ * <h2>Regular Expression API</h2>
+ *
+ * <p>The ICU API for processing regular expressions consists of two classes,
+ *  <code>RegexPattern</code> and <code>RegexMatcher</code>.
+ *  <code>RegexPattern</code> objects represent a pre-processed, or compiled
+ *  regular expression.  They are created from a regular expression pattern string,
+ *  and can be used to create <code>RegexMatcher</code> objects for the pattern.</p>
+ *
+ * <p>Class <code>RegexMatcher</code> bundles together a regular expression
+ *  pattern and a target string to which the search pattern will be applied.
+ *  <code>RegexMatcher</code> includes API for doing plain find or search
+ *  operations, for search and replace operations, and for obtaining detailed
+ *  information about bounds of a match. </p>
+ *
+ * <p>Note that by constructing <code>RegexMatcher</code> objects directly from regular
+ * expression pattern strings application code can be simplified and the explicit
+ * need for <code>RegexPattern</code> objects can usually be eliminated.
+ * </p>
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/parseerr.h"
+
+#include "unicode/uregex.h"
+
+U_NAMESPACE_BEGIN
+
+
+// Forward Declarations...
+
+class RegexMatcher;
+class RegexPattern;
+class UVector;
+class UVector32;
+class UnicodeSet;
+struct REStackFrame;
+struct Regex8BitSet;
+class  RuleBasedBreakIterator;
+class  RegexCImpl;
+
+
+
+
+/**
+ *   RBBIPatternDump   Debug function, displays the compiled form of a pattern.
+ *   @internal
+ */
+#ifdef REGEX_DEBUG
+U_INTERNAL void U_EXPORT2
+    RegexPatternDump(const RegexPattern *pat);
+#else
+    #define RegexPatternDump(pat)
+#endif
+
+
+
+/**
+  * Class <code>RegexPattern</code> represents a compiled regular expression.  It includes
+  * factory methods for creating a RegexPattern object from the source (string) form
+  * of a regular expression, methods for creating RegexMatchers that allow the pattern
+  * to be applied to input text, and a few convenience methods for simple common
+  * uses of regular expressions.
+  *
+  * <p>Class RegexPattern is not intended to be subclassed.</p>
+  *
+  * @stable ICU 2.4
+  */
+class U_I18N_API RegexPattern: public UObject {
+public:
+
+    /**
+     * default constructor.  Create a RegexPattern object that refers to no actual
+     *   pattern.  Not normally needed; RegexPattern objects are usually
+     *   created using the factory method <code>compile()</code>.
+     *
+     * @stable ICU 2.4
+     */
+    RegexPattern();
+
+    /**
+     * Copy Constructor.  Create a new RegexPattern object that is equivalent
+     *                    to the source object.
+     * @param source the pattern object to be copied.
+     * @stable ICU 2.4
+     */
+    RegexPattern(const RegexPattern &source);
+
+    /**
+     * Destructor.  Note that a RegexPattern object must persist so long as any
+     *  RegexMatcher objects that were created from the RegexPattern are active.
+     * @stable ICU 2.4
+     */
+    virtual ~RegexPattern();
+
+    /**
+     * Comparison operator.  Two RegexPattern objects are considered equal if they
+     * were constructed from identical source patterns using the same match flag
+     * settings.
+     * @param that a RegexPattern object to compare with "this".
+     * @return TRUE if the objects are equivalent.
+     * @stable ICU 2.4
+     */
+    UBool           operator==(const RegexPattern& that) const;
+
+    /**
+     * Comparison operator.  Two RegexPattern objects are considered equal if they
+     * were constructed from identical source patterns using the same match flag
+     * settings.
+     * @param that a RegexPattern object to compare with "this".
+     * @return TRUE if the objects are different.
+     * @stable ICU 2.4
+     */
+    inline UBool    operator!=(const RegexPattern& that) const {return ! operator ==(that);};
+
+    /**
+     * Assignment operator.  After assignment, this RegexPattern will behave identically
+     *     to the source object.
+     * @stable ICU 2.4
+     */
+    RegexPattern  &operator =(const RegexPattern &source);
+
+    /**
+     * Create an exact copy of this RegexPattern object.  Since RegexPattern is not
+     * intended to be subclasses, <code>clone()</code> and the copy construction are
+     * equivalent operations.
+     * @return the copy of this RegexPattern
+     * @stable ICU 2.4
+     */
+    virtual RegexPattern  *clone() const;
+
+
+   /**
+    * Compiles the regular expression in string form into a RegexPattern
+    * object.  These compile methods, rather than the constructors, are the usual
+    * way that RegexPattern objects are created.
+    *
+    * <p>Note that RegexPattern objects must not be deleted while RegexMatcher
+    * objects created from the pattern are active.  RegexMatchers keep a pointer
+    * back to their pattern, so premature deletion of the pattern is a
+    * catastrophic error.</p>
+    *
+    * <p>All pattern match mode flags are set to their default values.</p>
+    *
+    * <p>Note that it is often more convenient to construct a RegexMatcher directly
+    *    from a pattern string rather than separately compiling the pattern and
+    *    then creating a RegexMatcher object from the pattern.</p>
+    *
+    * @param regex The regular expression to be compiled.
+    * @param pe    Receives the position (line and column nubers) of any error
+    *              within the regular expression.)
+    * @param status A reference to a UErrorCode to receive any errors.
+    * @return      A regexPattern object for the compiled pattern.
+    *
+    * @stable ICU 2.4
+    */
+    static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
+        UParseError          &pe,
+        UErrorCode           &status);
+
+   /**
+    * Compiles the regular expression in string form into a RegexPattern
+    * object using the specified match mode flags.  These compile methods,
+    * rather than the constructors, are the usual way that RegexPattern objects
+    * are created.
+    *
+    * <p>Note that RegexPattern objects must not be deleted while RegexMatcher
+    * objects created from the pattern are active.  RegexMatchers keep a pointer
+    * back to their pattern, so premature deletion of the pattern is a
+    * catastrophic error.</p>
+    *
+    * <p>Note that it is often more convenient to construct a RegexMatcher directly
+    *    from a pattern string instead of than separately compiling the pattern and
+    *    then creating a RegexMatcher object from the pattern.</p>
+    *
+    * @param regex The regular expression to be compiled.
+    * @param flags The match mode flags to be used.
+    * @param pe    Receives the position (line and column nubers) of any error
+    *              within the regular expression.)
+    * @param status   A reference to a UErrorCode to receive any errors.
+    * @return      A regexPattern object for the compiled pattern.
+    *
+    * @stable ICU 2.4
+    */
+    static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
+        uint32_t             flags,
+        UParseError          &pe,
+        UErrorCode           &status);
+
+
+   /**
+    * Compiles the regular expression in string form into a RegexPattern
+    * object using the specified match mode flags.  These compile methods,
+    * rather than the constructors, are the usual way that RegexPattern objects
+    * are created.
+    *
+    * <p>Note that RegexPattern objects must not be deleted while RegexMatcher
+    * objects created from the pattern are active.  RegexMatchers keep a pointer
+    * back to their pattern, so premature deletion of the pattern is a
+    * catastrophic error.</p>
+    *
+    * <p>Note that it is often more convenient to construct a RegexMatcher directly
+    *    from a pattern string instead of than separately compiling the pattern and
+    *    then creating a RegexMatcher object from the pattern.</p>
+    *
+    * @param regex The regular expression to be compiled.
+    * @param flags The match mode flags to be used.
+    * @param status   A reference to a UErrorCode to receive any errors.
+    * @return      A regexPattern object for the compiled pattern.
+    *
+    * @stable ICU 2.6
+    */
+    static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
+        uint32_t             flags,
+        UErrorCode           &status);
+
+
+   /**
+    * Get the match mode flags that were used when compiling this pattern.
+    * @return  the match mode flags
+    * @stable ICU 2.4
+    */
+    virtual uint32_t flags() const;
+
+   /**
+    * Creates a RegexMatcher that will match the given input against this pattern.  The
+    * RegexMatcher can then be used to perform match, find or replace operations
+    * on the input.  Note that a RegexPattern object must not be deleted while
+    * RegexMatchers created from it still exist and might possibly be used again.
+    * <p>
+    * The matcher will retain a reference to the supplied input string, and all regexp
+    * pattern matching operations happen directly on this original string.  It is
+    * critical that the string not be altered or deleted before use by the regular
+    * expression operations is complete.
+    *
+    * @param input    The input string to which the regular expression will be applied.
+    * @param status   A reference to a UErrorCode to receive any errors.
+    * @return         A RegexMatcher object for this pattern and input.
+    *
+    * @stable ICU 2.4
+    */
+    virtual RegexMatcher *matcher(const UnicodeString &input,
+        UErrorCode          &status) const;
+
+private:
+    /**
+     * Cause a compilation error if an application accidently attempts to
+     *   create a matcher with a (UChar *) string as input rather than
+     *   a UnicodeString.  Avoids a dangling reference to a temporary string.
+     * <p>
+     * To efficiently work with UChar *strings, wrap the data in a UnicodeString
+     * using one of the aliasing constructors, such as
+     * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code>
+     *
+     * @internal
+     */
+    RegexMatcher *matcher(const UChar *input,
+        UErrorCode          &status) const;
+public:
+
+
+   /**
+    * Creates a RegexMatcher that will match against this pattern.  The
+    * RegexMatcher can be used to perform match, find or replace operations.
+    * Note that a RegexPattern object must not be deleted while
+    * RegexMatchers created from it still exist and might possibly be used again.
+    *
+    * @param status   A reference to a UErrorCode to receive any errors.
+    * @return      A RegexMatcher object for this pattern and input.
+    *
+    * @stable ICU 2.6
+    */
+    virtual RegexMatcher *matcher(UErrorCode  &status) const;
+
+
+   /**
+    * Test whether a string matches a regular expression.  This convenience function
+    * both compiles the reguluar expression and applies it in a single operation.
+    * Note that if the same pattern needs to be applied repeatedly, this method will be
+    * less efficient than creating and reusing a RegexMatcher object.
+    *
+    * @param regex The regular expression
+    * @param input The string data to be matched
+    * @param pe Receives the position of any syntax errors within the regular expression
+    * @param status A reference to a UErrorCode to receive any errors.
+    * @return True if the regular expression exactly matches the full input string.
+    *
+    * @stable ICU 2.4
+    */
+    static UBool U_EXPORT2 matches(const UnicodeString   &regex,
+        const UnicodeString   &input,
+        UParseError     &pe,
+        UErrorCode      &status);
+
+
+   /**
+    *    Returns the regular expression from which this pattern was compiled.
+    *    @stable ICU 2.4
+    */
+    virtual UnicodeString pattern() const;
+
+
+    /**
+     * Split a string into fields.  Somewhat like split() from Perl.
+     * The pattern matches identify delimiters that separate the input
+     *  into fields.  The input data between the matches becomes the
+     *  fields themselves.
+     * <p>
+     *  For the best performance on split() operations,
+     *  <code>RegexMatcher::split</code> is perferable to this function
+     *
+     * @param input   The string to be split into fields.  The field delimiters
+     *                match the pattern (in the "this" object)
+     * @param dest    An array of UnicodeStrings to receive the results of the split.
+     *                This is an array of actual UnicodeString objects, not an
+     *                array of pointers to strings.  Local (stack based) arrays can
+     *                work well here.
+     * @param destCapacity  The number of elements in the destination array.
+     *                If the number of fields found is less than destCapacity, the
+     *                extra strings in the destination array are not altered.
+     *                If the number of destination strings is less than the number
+     *                of fields, the trailing part of the input string, including any
+     *                field delimiters, is placed in the last destination string.
+     * @param status  A reference to a UErrorCode to receive any errors.
+     * @return        The number of fields into which the input string was split.
+     * @stable ICU 2.4
+     */
+    virtual int32_t  split(const UnicodeString &input,
+        UnicodeString    dest[],
+        int32_t          destCapacity,
+        UErrorCode       &status) const;
+
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.4
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.4
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+    //
+    //  Implementation Data
+    //
+    UnicodeString   fPattern;      // The original pattern string.
+    uint32_t        fFlags;        // The flags used when compiling the pattern.
+                                   //
+    UVector32       *fCompiledPat; // The compiled pattern p-code.
+    UnicodeString   fLiteralText;  // Any literal string data from the pattern,
+                                   //   after un-escaping, for use during the match.
+
+    UVector         *fSets;        // Any UnicodeSets referenced from the pattern.
+    Regex8BitSet    *fSets8;       //      (and fast sets for latin-1 range.)
+
+
+    UErrorCode      fDeferredStatus; // status if some prior error has left this
+                                   //  RegexPattern in an unusable state.
+
+    int32_t         fMinMatchLen;  // Minimum Match Length.  All matches will have length
+                                   //   >= this value.  For some patterns, this calculated
+                                   //   value may be less than the true shortest
+                                   //   possible match.
+
+    int32_t         fFrameSize;    // Size of a state stack frame in the
+                                   //   execution engine.
+
+    int32_t         fDataSize;     // The size of the data needed by the pattern that
+                                   //   does not go on the state stack, but has just
+                                   //   a single copy per matcher.
+
+    UVector32       *fGroupMap;    // Map from capture group number to position of
+                                   //   the group's variables in the matcher stack frame.
+
+    int32_t         fMaxCaptureDigits;
+
+    UnicodeSet     **fStaticSets;  // Ptr to static (shared) sets for predefined
+                                   //   regex character classes, e.g. Word.
+
+    Regex8BitSet   *fStaticSets8;  // Ptr to the static (shared) latin-1 only
+                                   //  sets for predefined regex classes.
+
+    int32_t         fStartType;    // Info on how a match must start.
+    int32_t         fInitialStringIdx;     //
+    int32_t         fInitialStringLen;
+    UnicodeSet     *fInitialChars;
+    UChar32         fInitialChar;
+    Regex8BitSet   *fInitialChars8;
+
+    friend class RegexCompile;
+    friend class RegexMatcher;
+    friend class RegexCImpl;
+
+    //
+    //  Implementation Methods
+    //
+    void        init();            // Common initialization, for use by constructors.
+    void        zap();             // Common cleanup
+#ifdef REGEX_DEBUG
+    void        dumpOp(int32_t index) const;
+    friend     void U_EXPORT2 RegexPatternDump(const RegexPattern *);
+#endif
+
+};
+
+
+
+/**
+ *  class RegexMatcher bundles together a reular expression pattern and
+ *  input text to which the expression can be applied.  It includes methods
+ *  for testing for matches, and for find and replace operations.
+ *
+ * <p>Class RegexMatcher is not intended to be subclassed.</p>
+ *
+ * @stable ICU 2.4
+ */
+class U_I18N_API RegexMatcher: public UObject {
+public:
+
+    /**
+      * Construct a RegexMatcher for a regular expression.
+      * This is a convenience method that avoids the need to explicitly create
+      * a RegexPattern object.  Note that if several RegexMatchers need to be
+      * created for the same expression, it will be more efficient to
+      * separately create and cache a RegexPattern object, and use
+      * its matcher() method to create the RegexMatcher objects.
+      *
+      *  @param regexp The Regular Expression to be compiled.
+      *  @param flags  Regular expression options, such as case insensitive matching.
+      *                @see UREGEX_CASE_INSENSITIVE
+      *  @param status Any errors are reported by setting this UErrorCode variable.
+      *  @stable ICU 2.6
+      */
+    RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status);
+
+    /**
+      * Construct a RegexMatcher for a regular expression.
+      * This is a convenience method that avoids the need to explicitly create
+      * a RegexPattern object.  Note that if several RegexMatchers need to be
+      * created for the same expression, it will be more efficient to
+      * separately create and cache a RegexPattern object, and use
+      * its matcher() method to create the RegexMatcher objects.
+      * <p>
+      * The matcher will retain a reference to the supplied input string, and all regexp
+      * pattern matching operations happen directly on the original string.  It is
+      * critical that the string not be altered or deleted before use by the regular
+      * expression operations is complete.
+      *
+      *  @param regexp The Regular Expression to be compiled.
+      *  @param input  The string to match.  The matcher retains a reference to the
+      *                caller's string; mo copy is made.
+      *  @param flags  Regular expression options, such as case insensitive matching.
+      *                @see UREGEX_CASE_INSENSITIVE
+      *  @param status Any errors are reported by setting this UErrorCode variable.
+      *  @stable ICU 2.6
+      */
+    RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
+        uint32_t flags, UErrorCode &status);
+
+private:
+    /**
+     * Cause a compilation error if an application accidently attempts to
+     *   create a matcher with a (UChar *) string as input rather than
+     *   a UnicodeString.    Avoids a dangling reference to a temporary string.
+     * <p>
+     * To efficiently work with UChar *strings, wrap the data in a UnicodeString
+     * using one of the aliasing constructors, such as
+     * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code>
+     *
+     * @internal
+     */
+    RegexMatcher(const UnicodeString &regexp, const UChar *input,
+        uint32_t flags, UErrorCode &status);
+public:
+
+
+   /**
+    *   Destructor.
+    *
+    *  @stable ICU 2.4
+    */
+    virtual ~RegexMatcher();
+
+
+   /**
+    *   Attempts to match the entire input region against the pattern.
+    *    @param   status     A reference to a UErrorCode to receive any errors.
+    *    @return TRUE if there is a match
+    *    @stable ICU 2.4
+    */
+    virtual UBool matches(UErrorCode &status);
+
+   /**
+    *   Resets the matcher, then attempts to match the input beginning 
+    *   at the specified startIndex, and extending to the end of the input.
+    *   The input region is reset to include the entire input string.
+    *   A successful match must extend to the end of the input.
+    *    @param   startIndex The input string index at which to begin matching.
+    *    @param   status     A reference to a UErrorCode to receive any errors.
+    *    @return TRUE if there is a match
+    *    @stable ICU 2.8
+    */
+    virtual UBool matches(int32_t startIndex, UErrorCode &status);
+
+
+
+
+   /**
+    *   Attempts to match the input string, starting from the beginning of the region,
+    *   against the pattern.  Like the matches() method, this function 
+    *   always starts at the beginning of the input region;
+    *   unlike that function, it does not require that the entire region be matched.
+    *
+    *   <p>If the match succeeds then more information can be obtained via the <code>start()</code>,
+    *     <code>end()</code>, and <code>group()</code> functions.</p>
+    *
+    *    @param   status     A reference to a UErrorCode to receive any errors.
+    *    @return  TRUE if there is a match at the start of the input string.
+    *    @stable ICU 2.4
+    */
+    virtual UBool lookingAt(UErrorCode &status);
+
+
+  /**
+    *   Attempts to match the input string, starting from the specified index, against the pattern.
+    *   The match may be of any length, and is not required to extend to the end
+    *   of the input string.  Contrast with match().
+    *
+    *   <p>If the match succeeds then more information can be obtained via the <code>start()</code>,
+    *     <code>end()</code>, and <code>group()</code> functions.</p>
+    *
+    *    @param   startIndex The input string index at which to begin matching.
+    *    @param   status     A reference to a UErrorCode to receive any errors.
+    *    @return  TRUE if there is a match.
+    *    @stable ICU 2.8
+    */
+    virtual UBool lookingAt(int32_t startIndex, UErrorCode &status);
+
+   /**
+    *  Find the next pattern match in the input string.
+    *  The find begins searching the input at the location following the end of
+    *  the previous match, or at the start of the string if there is no previous match.
+    *  If a match is found, <code>start(), end()</code> and <code>group()</code>
+    *  will provide more information regarding the match.
+    *  <p>Note that if the input string is changed by the application,
+    *     use find(startPos, status) instead of find(), because the saved starting
+    *     position may not be valid with the altered input string.</p>
+    *  @return  TRUE if a match is found.
+    *  @stable ICU 2.4
+    */
+    virtual UBool find();
+
+
+   /**
+    *   Resets this RegexMatcher and then attempts to find the next substring of the
+    *   input string that matches the pattern, starting at the specified index.
+    *
+    *   @param   start     the position in the input string to begin the search
+    *   @param   status    A reference to a UErrorCode to receive any errors.
+    *   @return  TRUE if a match is found.
+    *   @stable ICU 2.4
+    */
+    virtual UBool find(int32_t start, UErrorCode &status);
+
+
+   /**
+    *   Returns a string containing the text matched by the previous match.
+    *   If the pattern can match an empty string, an empty string may be returned.
+    *   @param   status      A reference to a UErrorCode to receive any errors.
+    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
+    *                        has been attempted or the last match failed.
+    *   @return  a string containing the matched input text.
+    *   @stable ICU 2.4
+    */
+    virtual UnicodeString group(UErrorCode &status) const;
+
+
+   /**
+    *    Returns a string containing the text captured by the given group
+    *    during the previous match operation.  Group(0) is the entire match.
+    *
+    *    @param groupNum the capture group number
+    *    @param   status     A reference to a UErrorCode to receive any errors.
+    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
+    *                        has been attempted or the last match failed and
+    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
+    *    @return the captured text
+    *    @stable ICU 2.4
+    */
+    virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
+
+
+   /**
+    *   Returns the number of capturing groups in this matcher's pattern.
+    *   @return the number of capture groups
+    *   @stable ICU 2.4
+    */
+    virtual int32_t groupCount() const;
+
+
+   /**
+    *   Returns the index in the input string of the start of the text matched
+    *   during the previous match operation.
+    *    @param   status      a reference to a UErrorCode to receive any errors.
+    *    @return              The position in the input string of the start of the last match.
+    *    @stable ICU 2.4
+    */
+    virtual int32_t start(UErrorCode &status) const;
+
+
+   /**
+    *   Returns the index in the input string of the start of the text matched by the
+    *    specified capture group during the previous match operation.  Return -1 if
+    *    the capture group exists in the pattern, but was not part of the last match.
+    *
+    *    @param  group       the capture group number
+    *    @param  status      A reference to a UErrorCode to receive any errors.  Possible
+    *                        errors are  U_REGEX_INVALID_STATE if no match has been
+    *                        attempted or the last match failed, and
+    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
+    *    @return the start position of substring matched by the specified group.
+    *    @stable ICU 2.4
+    */
+    virtual int32_t start(int32_t group, UErrorCode &status) const;
+
+
+   /**
+    *    Returns the index in the input string of the first character following the
+    *    text matched during the previous match operation.
+    *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
+    *                        errors are  U_REGEX_INVALID_STATE if no match has been
+    *                        attempted or the last match failed.
+    *    @return the index of the last character matched, plus one.
+    *   @stable ICU 2.4
+    */
+    virtual int32_t end(UErrorCode &status) const;
+
+
+   /**
+    *    Returns the index in the input string of the character following the
+    *    text matched by the specified capture group during the previous match operation.
+    *    @param group  the capture group number
+    *    @param   status      A reference to a UErrorCode to receive any errors.  Possible
+    *                        errors are  U_REGEX_INVALID_STATE if no match has been
+    *                        attempted or the last match failed and
+    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
+    *    @return  the index of the first character following the text
+    *              captured by the specifed group during the previous match operation.
+    *              Return -1 if the capture group exists in the pattern but was not part of the match.
+    *    @stable ICU 2.4
+    */
+    virtual int32_t end(int32_t group, UErrorCode &status) const;
+
+
+   /**
+    *   Resets this matcher.  The effect is to remove any memory of previous matches,
+    *       and to cause subsequent find() operations to begin at the beginning of
+    *       the input string.
+    *
+    *   @return this RegexMatcher.
+    *   @stable ICU 2.4
+    */
+    virtual RegexMatcher &reset();
+
+
+   /**
+    *   Resets this matcher, and set the current input position.
+    *   The effect is to remove any memory of previous matches,
+    *       and to cause subsequent find() operations to begin at
+    *       the specified position in the input string.
+    * <p>
+    *   The matcher's region is reset to its default, which is the entire
+    *   input string.
+    * <p>
+    *   An alternative to this function is to set a match region
+    *   beginning at the desired index.
+    *
+    *   @return this RegexMatcher.
+    *   @stable ICU 2.8
+    */
+    virtual RegexMatcher &reset(int32_t index, UErrorCode &status);
+
+
+   /**
+    *   Resets this matcher with a new input string.  This allows instances of RegexMatcher
+    *     to be reused, which is more efficient than creating a new RegexMatcher for
+    *     each input string to be processed.
+    *   @param input The new string on which subsequent pattern matches will operate.
+    *                The matcher retains a reference to the callers string, and operates
+    *                directly on that.  Ownership of the string remains with the caller.
+    *                Because no copy of the string is made, it is essential that the
+    *                caller not delete the string until after regexp operations on it
+    *                are done.
+    *   @return this RegexMatcher.
+    *   @stable ICU 2.4
+    */
+    virtual RegexMatcher &reset(const UnicodeString &input);
+
+private:
+    /**
+     * Cause a compilation error if an application accidently attempts to
+     *   reset a matcher with a (UChar *) string as input rather than
+     *   a UnicodeString.    Avoids a dangling reference to a temporary string.
+     * <p>
+     * To efficiently work with UChar *strings, wrap the data in a UnicodeString
+     * using one of the aliasing constructors, such as
+     * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code>
+     *
+     * @internal
+     */
+    RegexMatcher &reset(const UChar *input);
+public:
+
+   /**
+    *   Returns the input string being matched.  The returned string is not a copy,
+    *   but the live input string.  It should not be altered or deleted.
+    *   @return the input string
+    *   @stable ICU 2.4
+    */
+    virtual const UnicodeString &input() const;
+    
+    
+
+   /** Sets the limits of this matcher's region.
+     * The region is the part of the input string that will be searched to find a match.
+     * Invoking this method resets the matcher, and then sets the region to start
+     * at the index specified by the start parameter and end at the index specified
+     * by the end parameter.
+     *
+     * Depending on the transparency and anchoring being used (see useTransparentBounds
+     * and useAnchoringBounds), certain constructs such as anchors may behave differently
+     * at or around the boundaries of the region
+     *
+     * The function will fail if start is greater than limit, or if either index
+     *  is less than zero or greater than the length of the string being matched.
+     *
+     * @param start  The index to begin searches at.
+     * @param limit  The index to end searches at (exclusive).
+     * @param status A reference to a UErrorCode to receive any errors.
+     * @draft ICU 4.0
+     */
+     virtual RegexMatcher &region(int32_t start, int32_t limit, UErrorCode &status);
+
+
+   /**
+     * Reports the start index of this matcher's region. The searches this matcher
+     * conducts are limited to finding matches within regionStart (inclusive) and
+     * regionEnd (exclusive).
+     *
+     * @return The starting index of this matcher's region.
+     * @draft ICU 4.0
+     */
+     virtual int32_t regionStart() const;
+
+
+    /**
+      * Reports the end (limit) index (exclusive) of this matcher's region. The searches
+      * this matcher conducts are limited to finding matches within regionStart
+      * (inclusive) and regionEnd (exclusive).
+      *
+      * @return The ending point of this matcher's region.
+      * @draft ICU 4.0
+      */
+      virtual int32_t regionEnd() const;
+
+    /**
+      * Queries the transparency of region bounds for this matcher.
+      * See useTransparentBounds for a description of transparent and opaque bounds.
+      * By default, a matcher uses opaque region boundaries.
+      *
+      * @return TRUE if this matcher is using opaque bounds, false if it is not.
+      * @draft ICU 4.0
+      */
+      virtual UBool hasTransparentBounds() const;
+
+    /**
+      * Sets the transparency of region bounds for this matcher.
+      * Invoking this function with an argument of true will set this matcher to use transparent bounds.
+      * If the boolean argument is false, then opaque bounds will be used.
+      *
+      * Using transparent bounds, the boundaries of this matcher's region are transparent
+      * to lookahead, lookbehind, and boundary matching constructs. Those constructs can
+      * see text beyond the boundaries of the region while checking for a match.
+      *
+      * With opaque bounds, no text outside of the matcher's region is visible to lookahead,
+      * lookbehind, and boundary matching constructs.
+      *
+      * By default, a matcher uses opaque bounds.
+      *
+      * @param   b TRUE for transparent bounds; FALSE for opaque bounds
+      * @return  This Matcher;
+      * @draft   ICU 4.0
+      **/
+      virtual RegexMatcher &useTransparentBounds(UBool b);
+
+     
+    /**
+      * Return true if this matcher is using anchoring bounds.
+      * By default, matchers use anchoring region boounds.
+      *
+      * @return TRUE if this matcher is using anchoring bounds.
+      * @draft  ICU 4.0
+      */    
+      virtual UBool hasAnchoringBounds() const;
+
+    /**
+      * Set whether this matcher is using Anchoring Bounds for its region.
+      * With anchoring bounds, pattern anchors such as ^ and $ will match at the start
+      * and end of the region.  Without Anchoring Bounds, anchors will only match at
+      * the positions they would in the complete text.
+      *
+      * Anchoring Bounds are the default for regions.
+      *
+      * @param b TRUE if to enable anchoring bounds; FALSE to disable them.
+      * @return  This Matcher
+      * @draft   ICU 4.0
+      */
+      virtual RegexMatcher &useAnchoringBounds(UBool b);
+
+    /**
+      * Return TRUE if the most recent matching operation touched the
+      *  end of the text being processed.  In this case, additional input text could
+      *  change the results of that match.
+      *
+      *  hitEnd() is defined for both successful and unsuccessful matches.
+      *  In either case hitEnd() will return TRUE if if the end of the text was
+      *  reached at any point during the matching process.
+      *
+      *  @return  TRUE if the most recent match hit the end of input
+      *  @draft   ICU 4.0
+      */
+      virtual UBool hitEnd() const;
+
+    /**
+      * Return TRUE the most recent match succeeded and additional input could cause
+      * it to fail. If this method returns false and a match was found, then more input
+      * might change the match but the match won't be lost. If a match was not found,
+      * then requireEnd has no meaning.
+      *
+      * @return TRUE if more input could cause the most recent match to no longer match.
+      * @draft  ICU 4.0
+      */
+      virtual UBool requireEnd() const;
+
+
+
+
+
+   /**
+    *    Returns the pattern that is interpreted by this matcher.
+    *    @return  the RegexPattern for this RegexMatcher
+    *    @stable ICU 2.4
+    */
+    virtual const RegexPattern &pattern() const;
+
+
+   /**
+    *    Replaces every substring of the input that matches the pattern
+    *    with the given replacement string.  This is a convenience function that
+    *    provides a complete find-and-replace-all operation.
+    *
+    *    This method first resets this matcher. It then scans the input string
+    *    looking for matches of the pattern. Input that is not part of any
+    *    match is left unchanged; each match is replaced in the result by the
+    *    replacement string. The replacement string may contain references to
+    *    capture groups.
+    *
+    *    @param   replacement a string containing the replacement text.
+    *    @param   status      a reference to a UErrorCode to receive any errors.
+    *    @return              a string containing the results of the find and replace.
+    *    @stable ICU 2.4
+    */
+    virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
+
+
+   /**
+    * Replaces the first substring of the input that matches
+    * the pattern with the replacement string.   This is a convenience
+    * function that provides a complete find-and-replace operation.
+    *
+    * <p>This function first resets this RegexMatcher. It then scans the input string
+    * looking for a match of the pattern. Input that is not part
+    * of the match is appended directly to the result string; the match is replaced
+    * in the result by the replacement string. The replacement string may contain
+    * references to captured groups.</p>
+    *
+    * <p>The state of the matcher (the position at which a subsequent find()
+    *    would begin) after completing a replaceFirst() is not specified.  The
+    *    RegexMatcher should be reset before doing additional find() operations.</p>
+    *
+    *    @param   replacement a string containing the replacement text.
+    *    @param   status      a reference to a UErrorCode to receive any errors.
+    *    @return              a string containing the results of the find and replace.
+    *    @stable ICU 2.4
+    */
+    virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
+
+   /**
+    *   Implements a replace operation intended to be used as part of an
+    *   incremental find-and-replace.
+    *
+    *   <p>The input string, starting from the end of the previous replacement and ending at
+    *   the start of the current match, is appended to the destination string.  Then the
+    *   replacement string is appended to the output string,
+    *   including handling any substitutions of captured text.</p>
+    *
+    *   <p>For simple, prepackaged, non-incremental find-and-replace
+    *   operations, see replaceFirst() or replaceAll().</p>
+    *
+    *   @param   dest        A UnicodeString to which the results of the find-and-replace are appended.
+    *   @param   replacement A UnicodeString that provides the text to be substituted for
+    *                        the input text that matched the regexp pattern.  The replacement
+    *                        text may contain references to captured text from the
+    *                        input.
+    *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
+    *                        errors are  U_REGEX_INVALID_STATE if no match has been
+    *                        attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR
+    *                        if the replacement text specifies a capture group that
+    *                        does not exist in the pattern.
+    *
+    *   @return  this  RegexMatcher
+    *   @stable ICU 2.4
+    *
+    */
+    virtual RegexMatcher &appendReplacement(UnicodeString &dest,
+        const UnicodeString &replacement, UErrorCode &status);
+
+
+   /**
+    * As the final step in a find-and-replace operation, append the remainder
+    * of the input string, starting at the position following the last appendReplacement(),
+    * to the destination string. <code>appendTail()</code> is intended to be invoked after one
+    * or more invocations of the <code>RegexMatcher::appendReplacement()</code>.
+    *
+    *  @param dest A UnicodeString to which the results of the find-and-replace are appended.
+    *  @return  the destination string.
+    *  @stable ICU 2.4
+    */
+    virtual UnicodeString &appendTail(UnicodeString &dest);
+
+
+
+    /**
+     * Split a string into fields.  Somewhat like split() from Perl.
+     * The pattern matches identify delimiters that separate the input
+     *  into fields.  The input data between the matches becomes the
+     *  fields themselves.
+     * <p>
+     *
+     * @param input   The string to be split into fields.  The field delimiters
+     *                match the pattern (in the "this" object).  This matcher
+     *                will be reset to this input string.
+     * @param dest    An array of UnicodeStrings to receive the results of the split.
+     *                This is an array of actual UnicodeString objects, not an
+     *                array of pointers to strings.  Local (stack based) arrays can
+     *                work well here.
+     * @param destCapacity  The number of elements in the destination array.
+     *                If the number of fields found is less than destCapacity, the
+     *                extra strings in the destination array are not altered.
+     *                If the number of destination strings is less than the number
+     *                of fields, the trailing part of the input string, including any
+     *                field delimiters, is placed in the last destination string.
+     * @param status  A reference to a UErrorCode to receive any errors.
+     * @return        The number of fields into which the input string was split.
+     * @stable ICU 2.6
+     */
+    virtual int32_t  split(const UnicodeString &input,
+        UnicodeString    dest[],
+        int32_t          destCapacity,
+        UErrorCode       &status);
+
+  /**
+    *   Set a processing time limit for match operations with this Matcher.
+    *  
+    *   Some patterns, when matching certain strings, can run in exponential time.
+    *   For practical purposes, the match operation may appear to be in an
+    *   infinite loop.
+    *   When a limit is set a match operation will fail with an error if the
+    *   limit is exceeded.
+    *   <p>
+    *   The units of the limit are steps of the match engine.
+    *   Correspondence with actual processor time will depend on the speed
+    *   of the processor and the details of the specific pattern, but will
+    *   typically be on the order of milliseconds.
+    *   <p>
+    *   By default, the matching time is not limited.
+    *   <p>
+    *
+    *   @param   limit       The limit value, or 0 for no limit.
+    *   @param   status      A reference to a UErrorCode to receive any errors.
+    *   @draft ICU 4.0
+    */
+    virtual void setTimeLimit(int32_t limit, UErrorCode &status);
+
+  /**
+    * Get the time limit, if any, for match operations made with this Matcher.
+    *
+    *   @return the maximum allowed time for a match, in units of processing steps.
+    *   @draft ICU 4.0
+    */
+    virtual int32_t getTimeLimit() const;
+
+  /**
+    *  Set the amount of heap storage avaliable for use by the match backtracking stack.
+    *  The matcher is also reset, discarding any results from previous matches.
+    *  <p>
+    *  ICU uses a backtracking regular expression engine, with the backtrack stack
+    *  maintained on the heap.  This function sets the limit to the amount of memory
+    *  that can be used  for this purpose.  A backtracking stack overflow will
+    *  result in an error from the match operation that caused it.
+    *  <p>
+    *  A limit is desirable because a malicious or poorly designed pattern can use
+    *  excessive memory, potentially crashing the process.  A limit is enabled
+    *  by default.
+    *  <p>
+    *  @param limit  The maximum size, in bytes, of the matching backtrack stack.
+    *                A value of zero means no limit.
+    *                The limit must be greater or equal to zero.
+    *
+    *  @param status   A reference to a UErrorCode to receive any errors.
+    *
+    *  @draft ICU 4.0
+    */
+    virtual void setStackLimit(int32_t  limit, UErrorCode &status);
+    
+  /**
+    *  Get the size of the heap storage available for use by the back tracking stack.
+    *
+    *  @return  the maximum backtracking stack size, in bytes, or zero if the
+    *           stack size is unlimited.
+    *  @draft ICU 4.0
+    */
+    virtual int32_t  getStackLimit() const;
+
+
+  /**
+    * Set a callback function for use with this Matcher.
+    * During matching operations the function will be called periodically,
+    * giving the application the opportunity to terminate a long-running
+    * match.
+    *
+    *    @param   callback    A pointer to the user-supplied callback function.
+    *    @param   context     User context pointer.  The value supplied at the
+    *                         time the callback function is set will be saved
+    *                         and passed to the callback each time that it is called.
+    *    @param   status      A reference to a UErrorCode to receive any errors.
+    *  @draft ICU 4.0
+    */
+    virtual void setMatchCallback(URegexMatchCallback     *callback,
+                                  const void              *context,
+                                  UErrorCode              &status);
+
+
+
+  /**
+    *  Get the callback function for this URegularExpression.
+    *
+    *    @param   callback    Out paramater, receives a pointer to the user-supplied 
+    *                         callback function.
+    *    @param   context     Out parameter, receives the user context pointer that
+    *                         was set when uregex_setMatchCallback() was called.
+    *    @param   status      A reference to a UErrorCode to receive any errors.
+    *    @draft ICU 4.0
+    */
+    virtual void getMatchCallback(URegexMatchCallback     *&callback,
+                                  const void              *&context,
+                                  UErrorCode              &status);
+
+
+   /**
+     *   setTrace   Debug function, enable/disable tracing of the matching engine.
+     *              For internal ICU development use only.  DO NO USE!!!!
+     *   @internal
+     */
+    void setTrace(UBool state);
+
+
+    /**
+    * ICU "poor man's RTTI", returns a UClassID for this class.
+    *
+    * @stable ICU 2.2
+    */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const;
+
+private:
+    // Constructors and other object boilerplate are private.
+    // Instances of RegexMatcher can not be assigned, copied, cloned, etc.
+    RegexMatcher();                  // default constructor not implemented
+    RegexMatcher(const RegexPattern *pat);
+    RegexMatcher(const RegexMatcher &other);
+    RegexMatcher &operator =(const RegexMatcher &rhs);
+    void init(UErrorCode &status);                      // Common initialization
+    void init2(const UnicodeString &s, UErrorCode &e);  // Common initialization, part 2.
+
+    friend class RegexPattern;
+    friend class RegexCImpl;
+public:
+    /** @internal  */
+    void resetPreserveRegion();  // Reset matcher state, but preserve any region.
+private:
+
+    //
+    //  MatchAt   This is the internal interface to the match engine itself.
+    //            Match status comes back in matcher member variables.
+    //
+    void                 MatchAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
+    inline void          backTrack(int32_t &inputIdx, int32_t &patIdx);
+    UBool                isWordBoundary(int32_t pos);         // perform Perl-like  \b test
+    UBool                isUWordBoundary(int32_t pos);        // perform RBBI based \b test
+    REStackFrame        *resetStack();
+    inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx, UErrorCode &status);
+    void                 IncrementTime(UErrorCode &status);
+
+
+    const RegexPattern  *fPattern;
+    RegexPattern        *fPatternOwned;    // Non-NULL if this matcher owns the pattern, and
+                                           //   should delete it when through.
+
+    const UnicodeString *fInput;           // The text being matched. Is never NULL.
+    int32_t              fFrameSize;       // The size of a frame in the backtrack stack.
+    
+    int32_t              fRegionStart;     // Start of the input region, default = 0.
+    int32_t              fRegionLimit;     // End of input region, default to input.length.
+    
+    int32_t              fAnchorStart;     // Region bounds for anchoring operations (^ or $).
+    int32_t              fAnchorLimit;     //   See useAnchoringBounds
+    
+    int32_t              fLookStart;       // Region bounds for look-ahead/behind and
+    int32_t              fLookLimit;       //   and other boundary tests.  See
+                                           //   useTransparentBounds
+
+    int32_t              fActiveStart;     // Currently active bounds for matching.
+    int32_t              fActiveLimit;     //   Usually is the same as region, but
+                                           //   is changed to fLookStart/Limit when
+                                           //   entering look around regions.
+
+    UBool                fTransparentBounds;  // True if using transparent bounds.
+    UBool                fAnchoringBounds; // True if using anchoring bounds.
+
+    UBool                fMatch;           // True if the last attempted match was successful.
+    int32_t              fMatchStart;      // Position of the start of the most recent match
+    int32_t              fMatchEnd;        // First position after the end of the most recent match
+                                           //   Zero if no previous match, even when a region
+                                           //   is active.
+    int32_t              fLastMatchEnd;    // First position after the end of the previous match,
+                                           //   or -1 if there was no previous match.
+    int32_t              fAppendPosition;  // First position after the end of the previous
+                                           //   appendReplacement().  As described by the
+                                           //   JavaDoc for Java Matcher, where it is called 
+                                           //   "append position"
+    UBool                fHitEnd;          // True if the last match touched the end of input.
+    UBool                fRequireEnd;      // True if the last match required end-of-input
+                                           //    (matched $ or Z)
+
+    UVector32           *fStack;
+    REStackFrame        *fFrame;           // After finding a match, the last active stack frame,
+                                           //   which will contain the capture group results.
+                                           //   NOT valid while match engine is running.
+
+    int32_t             *fData;            // Data area for use by the compiled pattern.
+    int32_t             fSmallData[8];     //   Use this for data if it's enough.
+
+    int32_t             fTimeLimit;        // Max time (in arbitrary steps) to let the
+                                           //   match engine run.  Zero for unlimited.
+    
+    int32_t             fTime;             // Match time, accumulates while matching.
+    int32_t             fTickCounter;      // Low bits counter for time.  Counts down StateSaves.
+                                           //   Kept separately from fTime to keep as much
+                                           //   code as possible out of the inline
+                                           //   StateSave function.
+
+    int32_t             fStackLimit;       // Maximum memory size to use for the backtrack
+                                           //   stack, in bytes.  Zero for unlimited.
+
+    URegexMatchCallback *fCallbackFn;       // Pointer to match progress callback funct.
+                                           //   NULL if there is no callback.
+    const void         *fCallbackContext;  // User Context ptr for callback function.
+
+    UBool               fTraceDebug;       // Set true for debug tracing of match engine.
+
+    UErrorCode          fDeferredStatus;   // Save error state that cannot be immediately
+                                           //   reported, or that permanently disables this matcher.
+
+    RuleBasedBreakIterator  *fWordBreakItr;
+
+
+};
+
+U_NAMESPACE_END
+#endif  // UCONFIG_NO_REGULAR_EXPRESSIONS
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/rep.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/rep.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/rep.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,259 +0,0 @@
-/*
-**************************************************************************
-* Copyright (C) 1999-2005, International Business Machines Corporation and
-* others. All Rights Reserved.
-**************************************************************************
-*   Date        Name        Description
-*   11/17/99    aliu        Creation.  Ported from java.  Modified to
-*                           match current UnicodeString API.  Forced
-*                           to use name "handleReplaceBetween" because
-*                           of existing methods in UnicodeString.
-**************************************************************************
-*/
-
-#ifndef REP_H
-#define REP_H
-
-#include "unicode/uobject.h"
-
-/**
- * \file 
- * \brief C++ API: Replaceable String
- */
- 
-U_NAMESPACE_BEGIN
-
-class UnicodeString;
-
-/**
- * <code>Replaceable</code> is an abstract base class representing a
- * string of characters that supports the replacement of a range of
- * itself with a new string of characters.  It is used by APIs that
- * change a piece of text while retaining metadata.  Metadata is data
- * other than the Unicode characters returned by char32At().  One
- * example of metadata is style attributes; another is an edit
- * history, marking each character with an author and revision number.
- *
- * <p>An implicit aspect of the <code>Replaceable</code> API is that
- * during a replace operation, new characters take on the metadata of
- * the old characters.  For example, if the string "the <b>bold</b>
- * font" has range (4, 8) replaced with "strong", then it becomes "the
- * <b>strong</b> font".
- *
- * <p><code>Replaceable</code> specifies ranges using a start
- * offset and a limit offset.  The range of characters thus specified
- * includes the characters at offset start..limit-1.  That is, the
- * start offset is inclusive, and the limit offset is exclusive.
- *
- * <p><code>Replaceable</code> also includes API to access characters
- * in the string: <code>length()</code>, <code>charAt()</code>,
- * <code>char32At()</code>, and <code>extractBetween()</code>.
- *
- * <p>For a subclass to support metadata, typical behavior of
- * <code>replace()</code> is the following:
- * <ul>
- *   <li>Set the metadata of the new text to the metadata of the first
- *   character replaced</li>
- *   <li>If no characters are replaced, use the metadata of the
- *   previous character</li>
- *   <li>If there is no previous character (i.e. start == 0), use the
- *   following character</li>
- *   <li>If there is no following character (i.e. the replaceable was
- *   empty), use default metadata.<br>
- *   <li>If the code point U+FFFF is seen, it should be interpreted as
- *   a special marker having no metadata<li>
- *   </li>
- * </ul>
- * If this is not the behavior, the subclass should document any differences.
- * @author Alan Liu
- * @stable ICU 2.0
- */
-class U_COMMON_API Replaceable : public UObject {
-
-public:
-    /**
-     * Destructor.
-     * @stable ICU 2.0
-     */
-    virtual ~Replaceable();
-
-    /**
-     * Returns the number of 16-bit code units in the text.
-     * @return number of 16-bit code units in text
-     * @stable ICU 1.8
-     */ 
-    inline int32_t length() const;
-
-    /**
-     * Returns the 16-bit code unit at the given offset into the text.
-     * @param offset an integer between 0 and <code>length()</code>-1
-     * inclusive
-     * @return 16-bit code unit of text at given offset
-     * @stable ICU 1.8
-     */
-    inline UChar charAt(int32_t offset) const;
-
-    /**
-     * Returns the 32-bit code point at the given 16-bit offset into
-     * the text.  This assumes the text is stored as 16-bit code units
-     * with surrogate pairs intermixed.  If the offset of a leading or
-     * trailing code unit of a surrogate pair is given, return the
-     * code point of the surrogate pair.
-     *
-     * @param offset an integer between 0 and <code>length()</code>-1
-     * inclusive
-     * @return 32-bit code point of text at given offset
-     * @stable ICU 1.8
-     */
-    inline UChar32 char32At(int32_t offset) const;
-
-    /**
-     * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>) 
-     * into the UnicodeString <tt>target</tt>.
-     * @param start offset of first character which will be copied
-     * @param limit offset immediately following the last character to
-     * be copied
-     * @param target UnicodeString into which to copy characters.
-     * @return A reference to <TT>target</TT>
-     * @stable ICU 2.1
-     */
-    virtual void extractBetween(int32_t start,
-                                int32_t limit,
-                                UnicodeString& target) const = 0;
-
-    /**
-     * Replaces a substring of this object with the given text.  If the
-     * characters being replaced have metadata, the new characters
-     * that replace them should be given the same metadata.
-     *
-     * <p>Subclasses must ensure that if the text between start and
-     * limit is equal to the replacement text, that replace has no
-     * effect. That is, any metadata
-     * should be unaffected. In addition, subclasses are encouraged to
-     * check for initial and trailing identical characters, and make a
-     * smaller replacement if possible. This will preserve as much
-     * metadata as possible.
-     * @param start the beginning index, inclusive; <code>0 <= start
-     * <= limit</code>.
-     * @param limit the ending index, exclusive; <code>start <= limit
-     * <= length()</code>.
-     * @param text the text to replace characters <code>start</code>
-     * to <code>limit - 1</code> 
-     * @stable ICU 2.0
-     */
-    virtual void handleReplaceBetween(int32_t start,
-                                      int32_t limit,
-                                      const UnicodeString& text) = 0;
-    // Note: All other methods in this class take the names of
-    // existing UnicodeString methods.  This method is the exception.
-    // It is named differently because all replace methods of
-    // UnicodeString return a UnicodeString&.  The 'between' is
-    // required in order to conform to the UnicodeString naming
-    // convention; API taking start/length are named <operation>, and
-    // those taking start/limit are named <operationBetween>.  The
-    // 'handle' is added because 'replaceBetween' and
-    // 'doReplaceBetween' are already taken.
-
-    /**
-     * Copies a substring of this object, retaining metadata.
-     * This method is used to duplicate or reorder substrings.
-     * The destination index must not overlap the source range.
-     * 
-     * @param start the beginning index, inclusive; <code>0 <= start <=
-     * limit</code>.
-     * @param limit the ending index, exclusive; <code>start <= limit <=
-     * length()</code>.
-     * @param dest the destination index.  The characters from
-     * <code>start..limit-1</code> will be copied to <code>dest</code>.
-     * Implementations of this method may assume that <code>dest <= start ||
-     * dest >= limit</code>.
-     * @stable ICU 2.0
-     */
-    virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
-
-    /**
-     * Returns true if this object contains metadata.  If a
-     * Replaceable object has metadata, calls to the Replaceable API
-     * must be made so as to preserve metadata.  If it does not, calls
-     * to the Replaceable API may be optimized to improve performance.
-     * The default implementation returns true.
-     * @return true if this object contains metadata
-     * @stable ICU 2.2
-     */
-    virtual UBool hasMetaData() const;
-
-    /**
-     * Clone this object, an instance of a subclass of Replaceable.
-     * Clones can be used concurrently in multiple threads.
-     * If a subclass does not implement clone(), or if an error occurs,
-     * then NULL is returned.
-     * The clone functions in all subclasses return a pointer to a Replaceable
-     * because some compilers do not support covariant (same-as-this)
-     * return types; cast to the appropriate subclass if necessary.
-     * The caller must delete the clone.
-     *
-     * @return a clone of this object
-     *
-     * @see getDynamicClassID
-     * @stable ICU 2.6
-     */
-    virtual Replaceable *clone() const;
-
-protected:
-
-    /**
-     * Default constructor.
-     * @stable ICU 2.4
-     */
-    Replaceable();
-
-    /*
-     * Assignment operator not declared. The compiler will provide one
-     * which does nothing since this class does not contain any data members.
-     * API/code coverage may show the assignment operator as present and
-     * untested - ignore.
-     * Subclasses need this assignment operator if they use compiler-provided
-     * assignment operators of their own. An alternative to not declaring one
-     * here would be to declare and empty-implement a protected or public one.
-    Replaceable &Replaceable::operator=(const Replaceable &);
-     */
-
-    /**
-     * Virtual version of length().
-     * @stable ICU 2.4
-     */ 
-    virtual int32_t getLength() const = 0;
-
-    /**
-     * Virtual version of charAt().
-     * @stable ICU 2.4
-     */
-    virtual UChar getCharAt(int32_t offset) const = 0;
-
-    /**
-     * Virtual version of char32At().
-     * @stable ICU 2.4
-     */
-    virtual UChar32 getChar32At(int32_t offset) const = 0;
-};
-
-inline int32_t
-Replaceable::length() const {
-    return getLength();
-}
-
-inline UChar
-Replaceable::charAt(int32_t offset) const {
-    return getCharAt(offset);
-}
-
-inline UChar32
-Replaceable::char32At(int32_t offset) const {
-    return getChar32At(offset);
-}
-
-// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
-
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/rep.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/rep.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/rep.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/rep.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,259 @@
+/*
+**************************************************************************
+* Copyright (C) 1999-2005, International Business Machines Corporation and
+* others. All Rights Reserved.
+**************************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.  Ported from java.  Modified to
+*                           match current UnicodeString API.  Forced
+*                           to use name "handleReplaceBetween" because
+*                           of existing methods in UnicodeString.
+**************************************************************************
+*/
+
+#ifndef REP_H
+#define REP_H
+
+#include "unicode/uobject.h"
+
+/**
+ * \file 
+ * \brief C++ API: Replaceable String
+ */
+ 
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * <code>Replaceable</code> is an abstract base class representing a
+ * string of characters that supports the replacement of a range of
+ * itself with a new string of characters.  It is used by APIs that
+ * change a piece of text while retaining metadata.  Metadata is data
+ * other than the Unicode characters returned by char32At().  One
+ * example of metadata is style attributes; another is an edit
+ * history, marking each character with an author and revision number.
+ *
+ * <p>An implicit aspect of the <code>Replaceable</code> API is that
+ * during a replace operation, new characters take on the metadata of
+ * the old characters.  For example, if the string "the <b>bold</b>
+ * font" has range (4, 8) replaced with "strong", then it becomes "the
+ * <b>strong</b> font".
+ *
+ * <p><code>Replaceable</code> specifies ranges using a start
+ * offset and a limit offset.  The range of characters thus specified
+ * includes the characters at offset start..limit-1.  That is, the
+ * start offset is inclusive, and the limit offset is exclusive.
+ *
+ * <p><code>Replaceable</code> also includes API to access characters
+ * in the string: <code>length()</code>, <code>charAt()</code>,
+ * <code>char32At()</code>, and <code>extractBetween()</code>.
+ *
+ * <p>For a subclass to support metadata, typical behavior of
+ * <code>replace()</code> is the following:
+ * <ul>
+ *   <li>Set the metadata of the new text to the metadata of the first
+ *   character replaced</li>
+ *   <li>If no characters are replaced, use the metadata of the
+ *   previous character</li>
+ *   <li>If there is no previous character (i.e. start == 0), use the
+ *   following character</li>
+ *   <li>If there is no following character (i.e. the replaceable was
+ *   empty), use default metadata.<br>
+ *   <li>If the code point U+FFFF is seen, it should be interpreted as
+ *   a special marker having no metadata<li>
+ *   </li>
+ * </ul>
+ * If this is not the behavior, the subclass should document any differences.
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+class U_COMMON_API Replaceable : public UObject {
+
+public:
+    /**
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~Replaceable();
+
+    /**
+     * Returns the number of 16-bit code units in the text.
+     * @return number of 16-bit code units in text
+     * @stable ICU 1.8
+     */ 
+    inline int32_t length() const;
+
+    /**
+     * Returns the 16-bit code unit at the given offset into the text.
+     * @param offset an integer between 0 and <code>length()</code>-1
+     * inclusive
+     * @return 16-bit code unit of text at given offset
+     * @stable ICU 1.8
+     */
+    inline UChar charAt(int32_t offset) const;
+
+    /**
+     * Returns the 32-bit code point at the given 16-bit offset into
+     * the text.  This assumes the text is stored as 16-bit code units
+     * with surrogate pairs intermixed.  If the offset of a leading or
+     * trailing code unit of a surrogate pair is given, return the
+     * code point of the surrogate pair.
+     *
+     * @param offset an integer between 0 and <code>length()</code>-1
+     * inclusive
+     * @return 32-bit code point of text at given offset
+     * @stable ICU 1.8
+     */
+    inline UChar32 char32At(int32_t offset) const;
+
+    /**
+     * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>) 
+     * into the UnicodeString <tt>target</tt>.
+     * @param start offset of first character which will be copied
+     * @param limit offset immediately following the last character to
+     * be copied
+     * @param target UnicodeString into which to copy characters.
+     * @return A reference to <TT>target</TT>
+     * @stable ICU 2.1
+     */
+    virtual void extractBetween(int32_t start,
+                                int32_t limit,
+                                UnicodeString& target) const = 0;
+
+    /**
+     * Replaces a substring of this object with the given text.  If the
+     * characters being replaced have metadata, the new characters
+     * that replace them should be given the same metadata.
+     *
+     * <p>Subclasses must ensure that if the text between start and
+     * limit is equal to the replacement text, that replace has no
+     * effect. That is, any metadata
+     * should be unaffected. In addition, subclasses are encouraged to
+     * check for initial and trailing identical characters, and make a
+     * smaller replacement if possible. This will preserve as much
+     * metadata as possible.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= length()</code>.
+     * @param text the text to replace characters <code>start</code>
+     * to <code>limit - 1</code> 
+     * @stable ICU 2.0
+     */
+    virtual void handleReplaceBetween(int32_t start,
+                                      int32_t limit,
+                                      const UnicodeString& text) = 0;
+    // Note: All other methods in this class take the names of
+    // existing UnicodeString methods.  This method is the exception.
+    // It is named differently because all replace methods of
+    // UnicodeString return a UnicodeString&.  The 'between' is
+    // required in order to conform to the UnicodeString naming
+    // convention; API taking start/length are named <operation>, and
+    // those taking start/limit are named <operationBetween>.  The
+    // 'handle' is added because 'replaceBetween' and
+    // 'doReplaceBetween' are already taken.
+
+    /**
+     * Copies a substring of this object, retaining metadata.
+     * This method is used to duplicate or reorder substrings.
+     * The destination index must not overlap the source range.
+     * 
+     * @param start the beginning index, inclusive; <code>0 <= start <=
+     * limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit <=
+     * length()</code>.
+     * @param dest the destination index.  The characters from
+     * <code>start..limit-1</code> will be copied to <code>dest</code>.
+     * Implementations of this method may assume that <code>dest <= start ||
+     * dest >= limit</code>.
+     * @stable ICU 2.0
+     */
+    virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
+
+    /**
+     * Returns true if this object contains metadata.  If a
+     * Replaceable object has metadata, calls to the Replaceable API
+     * must be made so as to preserve metadata.  If it does not, calls
+     * to the Replaceable API may be optimized to improve performance.
+     * The default implementation returns true.
+     * @return true if this object contains metadata
+     * @stable ICU 2.2
+     */
+    virtual UBool hasMetaData() const;
+
+    /**
+     * Clone this object, an instance of a subclass of Replaceable.
+     * Clones can be used concurrently in multiple threads.
+     * If a subclass does not implement clone(), or if an error occurs,
+     * then NULL is returned.
+     * The clone functions in all subclasses return a pointer to a Replaceable
+     * because some compilers do not support covariant (same-as-this)
+     * return types; cast to the appropriate subclass if necessary.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @stable ICU 2.6
+     */
+    virtual Replaceable *clone() const;
+
+protected:
+
+    /**
+     * Default constructor.
+     * @stable ICU 2.4
+     */
+    Replaceable();
+
+    /*
+     * Assignment operator not declared. The compiler will provide one
+     * which does nothing since this class does not contain any data members.
+     * API/code coverage may show the assignment operator as present and
+     * untested - ignore.
+     * Subclasses need this assignment operator if they use compiler-provided
+     * assignment operators of their own. An alternative to not declaring one
+     * here would be to declare and empty-implement a protected or public one.
+    Replaceable &Replaceable::operator=(const Replaceable &);
+     */
+
+    /**
+     * Virtual version of length().
+     * @stable ICU 2.4
+     */ 
+    virtual int32_t getLength() const = 0;
+
+    /**
+     * Virtual version of charAt().
+     * @stable ICU 2.4
+     */
+    virtual UChar getCharAt(int32_t offset) const = 0;
+
+    /**
+     * Virtual version of char32At().
+     * @stable ICU 2.4
+     */
+    virtual UChar32 getChar32At(int32_t offset) const = 0;
+};
+
+inline int32_t
+Replaceable::length() const {
+    return getLength();
+}
+
+inline UChar
+Replaceable::charAt(int32_t offset) const {
+    return getCharAt(offset);
+}
+
+inline UChar32
+Replaceable::char32At(int32_t offset) const {
+    return getChar32At(offset);
+}
+
+// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
+
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/resbund.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/resbund.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/resbund.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,485 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 1996-2007, International Business Machines Corporation
-*   and others.  All Rights Reserved.
-*
-******************************************************************************
-*
-* File resbund.h
-*
-*   CREATED BY
-*       Richard Gillam
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   2/5/97      aliu        Added scanForLocaleInFile.  Added
-*                           constructor which attempts to read resource bundle
-*                           from a specific file, without searching other files.
-*   2/11/97     aliu        Added UErrorCode return values to constructors.  Fixed
-*                           infinite loops in scanForFile and scanForLocale.
-*                           Modified getRawResourceData to not delete storage
-*                           in localeData and resourceData which it doesn't own.
-*                           Added Mac compatibility #ifdefs for tellp() and
-*                           ios::nocreate.
-*   2/18/97     helena      Updated with 100% documentation coverage.
-*   3/13/97     aliu        Rewrote to load in entire resource bundle and store
-*                           it as a Hashtable of ResourceBundleData objects.
-*                           Added state table to govern parsing of files.
-*                           Modified to load locale index out of new file
-*                           distinct from default.txt.
-*   3/25/97     aliu        Modified to support 2-d arrays, needed for timezone
-*                           data. Added support for custom file suffixes.  Again,
-*                           needed to support timezone data.
-*   4/7/97      aliu        Cleaned up.
-* 03/02/99      stephen     Removed dependency on FILE*.
-* 03/29/99      helena      Merged Bertrand and Stephen's changes.
-* 06/11/99      stephen     Removed parsing of .txt files.
-*                           Reworked to use new binary format.
-*                           Cleaned up.
-* 06/14/99      stephen     Removed methods taking a filename suffix.
-* 11/09/99      weiv        Added getLocale(), fRealLocale, removed fRealLocaleID
-******************************************************************************
-*/
-
-#ifndef RESBUND_H
-#define RESBUND_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "unicode/ures.h"
-#include "unicode/unistr.h"
-#include "unicode/locid.h"
-
-/**
- * \file 
- * \brief C++ API: Resource Bundle
- */
- 
-U_NAMESPACE_BEGIN
-
-/**
- * A class representing a collection of resource information pertaining to a given
- * locale. A resource bundle provides a way of accessing locale- specfic information in
- * a data file. You create a resource bundle that manages the resources for a given
- * locale and then ask it for individual resources.
- * <P>
- * Resource bundles in ICU4C are currently defined using text files which conform to the following
- * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/bnf_rb.txt">BNF definition</a>.
- * More on resource bundle concepts and syntax can be found in the
- * <a href="http://icu-project.org/userguide/ResourceManagement.html">Users Guide</a>.
- * <P>
- *
- * The ResourceBundle class is not suitable for subclassing.
- *
- * @stable ICU 2.0
- */
-class U_COMMON_API ResourceBundle : public UObject {
-public:
-    /**
-     * Constructor
-     *
-     * @param packageName   The packageName and locale together point to an ICU udata object, 
-     *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
-     *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
-     *                      a package registered with udata_setAppData(). Using a full file or directory
-     *                      pathname for packageName is deprecated.
-     * @param locale  This is the locale this resource bundle is for. To get resources
-     *                for the French locale, for example, you would create a
-     *                ResourceBundle passing Locale::FRENCH for the "locale" parameter,
-     *                and all subsequent calls to that resource bundle will return
-     *                resources that pertain to the French locale. If the caller doesn't
-     *                pass a locale parameter, the default locale for the system (as
-     *                returned by Locale::getDefault()) will be used.
-     * @param err     The Error Code.
-     * The UErrorCode& err parameter is used to return status information to the user. To
-     * check whether the construction succeeded or not, you should check the value of
-     * U_SUCCESS(err). If you wish more detailed information, you can check for
-     * informational error results which still indicate success. U_USING_FALLBACK_WARNING
-     * indicates that a fall back locale was used. For example, 'de_CH' was requested,
-     * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
-     * the default locale data was used; neither the requested locale nor any of its
-     * fall back locales could be found.
-     * @stable ICU 2.0
-     */
-    ResourceBundle(const UnicodeString&    packageName,
-                   const Locale&           locale,
-                   UErrorCode&              err);
-
-    /**
-     * Construct a resource bundle for the default bundle in the specified package.
-     *
-     * @param packageName   The packageName and locale together point to an ICU udata object, 
-     *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
-     *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
-     *                      a package registered with udata_setAppData(). Using a full file or directory
-     *                      pathname for packageName is deprecated.
-     * @param err A UErrorCode value
-     * @stable ICU 2.0
-     */
-    ResourceBundle(const UnicodeString&    packageName,
-                   UErrorCode&              err);
-
-    /**
-     * Construct a resource bundle for the ICU default bundle.
-     *
-     * @param err A UErrorCode value
-     * @stable ICU 2.0
-     */
-    ResourceBundle(UErrorCode &err);
-
-    /**
-     * Standard constructor, onstructs a resource bundle for the locale-specific
-     * bundle in the specified package.
-     *
-     * @param packageName   The packageName and locale together point to an ICU udata object, 
-     *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
-     *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
-     *                      a package registered with udata_setAppData(). Using a full file or directory
-     *                      pathname for packageName is deprecated.
-     *                      NULL is used to refer to ICU data.
-     * @param locale The locale for which to open a resource bundle.
-     * @param err A UErrorCode value
-     * @stable ICU 2.0
-     */
-    ResourceBundle(const char* packageName,
-                   const Locale& locale,
-                   UErrorCode& err);
-
-    /**
-     * Copy constructor.
-     *
-     * @param original The resource bundle to copy.
-     * @stable ICU 2.0
-     */
-    ResourceBundle(const ResourceBundle &original);
-
-    /**
-     * Constructor from a C UResourceBundle. The resource bundle is
-     * copied and not adopted. ures_close will still need to be used on the
-     * original resource bundle.
-     *
-     * @param res A pointer to the C resource bundle.
-     * @param status A UErrorCode value.
-     * @stable ICU 2.0
-     */
-    ResourceBundle(UResourceBundle *res,
-                   UErrorCode &status);
-
-    /**
-     * Assignment operator.
-     *
-     * @param other The resource bundle to copy.
-     * @stable ICU 2.0
-     */
-    ResourceBundle&
-      operator=(const ResourceBundle& other);
-
-    /** Destructor.
-     * @stable ICU 2.0
-     */
-    virtual ~ResourceBundle();
-
-    /**
-     * Clone this object.
-     * Clones can be used concurrently in multiple threads.
-     * If an error occurs, then NULL is returned.
-     * The caller must delete the clone.
-     *
-     * @return a clone of this object
-     *
-     * @see getDynamicClassID
-     * @stable ICU 2.8
-     */
-    ResourceBundle *clone() const;
-
-    /**
-     * Returns the size of a resource. Size for scalar types is always 1, and for vector/table types is
-     * the number of child resources.
-     * @warning Integer array is treated as a scalar type. There are no
-     *          APIs to access individual members of an integer array. It
-     *          is always returned as a whole.
-     *
-     * @return number of resources in a given resource.
-     * @stable ICU 2.0
-     */
-    int32_t
-      getSize(void) const;
-
-    /**
-     * returns a string from a string resource type
-     *
-     * @param status  fills in the outgoing error code
-     *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
-     *                could be a warning
-     *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
-     * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
-     * @stable ICU 2.0
-     */
-    UnicodeString
-      getString(UErrorCode& status) const;
-
-    /**
-     * returns a binary data from a resource. Can be used at most primitive resource types (binaries,
-     * strings, ints)
-     *
-     * @param len     fills in the length of resulting byte chunk
-     * @param status  fills in the outgoing error code
-     *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
-     *                could be a warning
-     *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
-     * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
-     * @stable ICU 2.0
-     */
-    const uint8_t*
-      getBinary(int32_t& len, UErrorCode& status) const;
-
-
-    /**
-     * returns an integer vector from a resource.
-     *
-     * @param len     fills in the length of resulting integer vector
-     * @param status  fills in the outgoing error code
-     *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
-     *                could be a warning
-     *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
-     * @return a pointer to a vector of integers that lives in a memory mapped/DLL file.
-     * @stable ICU 2.0
-     */
-    const int32_t*
-      getIntVector(int32_t& len, UErrorCode& status) const;
-
-    /**
-     * returns an unsigned integer from a resource.
-     * This integer is originally 28 bits.
-     *
-     * @param status  fills in the outgoing error code
-     *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
-     *                could be a warning
-     *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
-     * @return an unsigned integer value
-     * @stable ICU 2.0
-     */
-    uint32_t
-      getUInt(UErrorCode& status) const;
-
-    /**
-     * returns a signed integer from a resource.
-     * This integer is originally 28 bit and the sign gets propagated.
-     *
-     * @param status  fills in the outgoing error code
-     *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
-     *                could be a warning
-     *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
-     * @return a signed integer value
-     * @stable ICU 2.0
-     */
-    int32_t
-      getInt(UErrorCode& status) const;
-
-    /**
-     * Checks whether the resource has another element to iterate over.
-     *
-     * @return TRUE if there are more elements, FALSE if there is no more elements
-     * @stable ICU 2.0
-     */
-    UBool
-      hasNext(void) const;
-
-    /**
-     * Resets the internal context of a resource so that iteration starts from the first element.
-     *
-     * @stable ICU 2.0
-     */
-    void
-      resetIterator(void);
-
-    /**
-     * Returns the key associated with this resource. Not all the resources have a key - only
-     * those that are members of a table.
-     *
-     * @return a key associated to this resource, or NULL if it doesn't have a key
-     * @stable ICU 2.0
-     */
-    const char*
-      getKey(void) const;
-
-    /**
-     * Gets the locale ID of the resource bundle as a string.
-     * Same as getLocale().getName() .
-     *
-     * @return the locale ID of the resource bundle as a string
-     * @stable ICU 2.0
-     */
-    const char*
-      getName(void) const;
-
-
-    /**
-     * Returns the type of a resource. Available types are defined in enum UResType
-     *
-     * @return type of the given resource.
-     * @stable ICU 2.0
-     */
-    UResType
-      getType(void) const;
-
-    /**
-     * Returns the next resource in a given resource or NULL if there are no more resources
-     *
-     * @param status            fills in the outgoing error code
-     * @return                  ResourceBundle object.
-     * @stable ICU 2.0
-     */
-    ResourceBundle
-      getNext(UErrorCode& status);
-
-    /**
-     * Returns the next string in a resource or NULL if there are no more resources
-     * to iterate over.
-     *
-     * @param status            fills in the outgoing error code
-     * @return an UnicodeString object.
-     * @stable ICU 2.0
-     */
-    UnicodeString
-      getNextString(UErrorCode& status);
-
-    /**
-     * Returns the next string in a resource or NULL if there are no more resources
-     * to iterate over.
-     *
-     * @param key               fill in for key associated with this string
-     * @param status            fills in the outgoing error code
-     * @return an UnicodeString object.
-     * @stable ICU 2.0
-     */
-    UnicodeString
-      getNextString(const char ** key,
-                    UErrorCode& status);
-
-    /**
-     * Returns the resource in a resource at the specified index.
-     *
-     * @param index             an index to the wanted resource.
-     * @param status            fills in the outgoing error code
-     * @return                  ResourceBundle object. If there is an error, resource is invalid.
-     * @stable ICU 2.0
-     */
-    ResourceBundle
-      get(int32_t index,
-          UErrorCode& status) const;
-
-    /**
-     * Returns the string in a given resource at the specified index.
-     *
-     * @param index             an index to the wanted string.
-     * @param status            fills in the outgoing error code
-     * @return                  an UnicodeString object. If there is an error, string is bogus
-     * @stable ICU 2.0
-     */
-    UnicodeString
-      getStringEx(int32_t index,
-                  UErrorCode& status) const;
-
-    /**
-     * Returns a resource in a resource that has a given key. This procedure works only with table
-     * resources.
-     *
-     * @param key               a key associated with the wanted resource
-     * @param status            fills in the outgoing error code.
-     * @return                  ResourceBundle object. If there is an error, resource is invalid.
-     * @stable ICU 2.0
-     */
-    ResourceBundle
-      get(const char* key,
-          UErrorCode& status) const;
-
-    /**
-     * Returns a string in a resource that has a given key. This procedure works only with table
-     * resources.
-     *
-     * @param key               a key associated with the wanted string
-     * @param status            fills in the outgoing error code
-     * @return                  an UnicodeString object. If there is an error, string is bogus
-     * @stable ICU 2.0
-     */
-    UnicodeString
-      getStringEx(const char* key,
-                  UErrorCode& status) const;
-
-    /**
-     * Return the version number associated with this ResourceBundle as a string. Please
-     * use getVersion, as this method is going to be deprecated.
-     *
-     * @return  A version number string as specified in the resource bundle or its parent.
-     *          The caller does not own this string.
-     * @see getVersion
-     * @deprecated ICU 2.8 Use getVersion instead.
-     */
-    const char*
-      getVersionNumber(void) const;
-
-    /**
-     * Return the version number associated with this ResourceBundle as a UVersionInfo array.
-     *
-     * @param versionInfo A UVersionInfo array that is filled with the version number
-     *                    as specified in the resource bundle or its parent.
-     * @stable ICU 2.0
-     */
-    void
-      getVersion(UVersionInfo versionInfo) const;
-
-    /**
-     * Return the Locale associated with this ResourceBundle.
-     *
-     * @return a Locale object
-     * @deprecated ICU 2.8 Use getLocale(ULocDataLocaleType type, UErrorCode &status) overload instead.
-     */
-    const Locale&
-      getLocale(void) const;
-
-    /**
-     * Return the Locale associated with this ResourceBundle.
-     * @param type You can choose between requested, valid and actual
-     *             locale. For description see the definition of
-     *             ULocDataLocaleType in uloc.h
-     * @param status just for catching illegal arguments
-     *
-     * @return a Locale object
-     * @stable ICU 2.8
-     */
-    const Locale
-      getLocale(ULocDataLocaleType type, UErrorCode &status) const;
-    /**
-     * This API implements multilevel fallback
-     * @internal
-     */
-    ResourceBundle
-        getWithFallback(const char* key, UErrorCode& status);
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.2
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.2
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-private:
-    ResourceBundle(); // default constructor not implemented
-
-    UResourceBundle *fResource;
-    void constructForLocale(const UnicodeString& path, const Locale& locale, UErrorCode& error);
-    Locale *fLocale;
-
-};
-
-U_NAMESPACE_END
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/resbund.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/resbund.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/resbund.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/resbund.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,485 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1996-2007, International Business Machines Corporation
+*   and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File resbund.h
+*
+*   CREATED BY
+*       Richard Gillam
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   2/5/97      aliu        Added scanForLocaleInFile.  Added
+*                           constructor which attempts to read resource bundle
+*                           from a specific file, without searching other files.
+*   2/11/97     aliu        Added UErrorCode return values to constructors.  Fixed
+*                           infinite loops in scanForFile and scanForLocale.
+*                           Modified getRawResourceData to not delete storage
+*                           in localeData and resourceData which it doesn't own.
+*                           Added Mac compatibility #ifdefs for tellp() and
+*                           ios::nocreate.
+*   2/18/97     helena      Updated with 100% documentation coverage.
+*   3/13/97     aliu        Rewrote to load in entire resource bundle and store
+*                           it as a Hashtable of ResourceBundleData objects.
+*                           Added state table to govern parsing of files.
+*                           Modified to load locale index out of new file
+*                           distinct from default.txt.
+*   3/25/97     aliu        Modified to support 2-d arrays, needed for timezone
+*                           data. Added support for custom file suffixes.  Again,
+*                           needed to support timezone data.
+*   4/7/97      aliu        Cleaned up.
+* 03/02/99      stephen     Removed dependency on FILE*.
+* 03/29/99      helena      Merged Bertrand and Stephen's changes.
+* 06/11/99      stephen     Removed parsing of .txt files.
+*                           Reworked to use new binary format.
+*                           Cleaned up.
+* 06/14/99      stephen     Removed methods taking a filename suffix.
+* 11/09/99      weiv        Added getLocale(), fRealLocale, removed fRealLocaleID
+******************************************************************************
+*/
+
+#ifndef RESBUND_H
+#define RESBUND_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "unicode/unistr.h"
+#include "unicode/locid.h"
+
+/**
+ * \file 
+ * \brief C++ API: Resource Bundle
+ */
+ 
+U_NAMESPACE_BEGIN
+
+/**
+ * A class representing a collection of resource information pertaining to a given
+ * locale. A resource bundle provides a way of accessing locale- specfic information in
+ * a data file. You create a resource bundle that manages the resources for a given
+ * locale and then ask it for individual resources.
+ * <P>
+ * Resource bundles in ICU4C are currently defined using text files which conform to the following
+ * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/bnf_rb.txt">BNF definition</a>.
+ * More on resource bundle concepts and syntax can be found in the
+ * <a href="http://icu-project.org/userguide/ResourceManagement.html">Users Guide</a>.
+ * <P>
+ *
+ * The ResourceBundle class is not suitable for subclassing.
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API ResourceBundle : public UObject {
+public:
+    /**
+     * Constructor
+     *
+     * @param packageName   The packageName and locale together point to an ICU udata object, 
+     *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
+     *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
+     *                      a package registered with udata_setAppData(). Using a full file or directory
+     *                      pathname for packageName is deprecated.
+     * @param locale  This is the locale this resource bundle is for. To get resources
+     *                for the French locale, for example, you would create a
+     *                ResourceBundle passing Locale::FRENCH for the "locale" parameter,
+     *                and all subsequent calls to that resource bundle will return
+     *                resources that pertain to the French locale. If the caller doesn't
+     *                pass a locale parameter, the default locale for the system (as
+     *                returned by Locale::getDefault()) will be used.
+     * @param err     The Error Code.
+     * The UErrorCode& err parameter is used to return status information to the user. To
+     * check whether the construction succeeded or not, you should check the value of
+     * U_SUCCESS(err). If you wish more detailed information, you can check for
+     * informational error results which still indicate success. U_USING_FALLBACK_WARNING
+     * indicates that a fall back locale was used. For example, 'de_CH' was requested,
+     * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
+     * the default locale data was used; neither the requested locale nor any of its
+     * fall back locales could be found.
+     * @stable ICU 2.0
+     */
+    ResourceBundle(const UnicodeString&    packageName,
+                   const Locale&           locale,
+                   UErrorCode&              err);
+
+    /**
+     * Construct a resource bundle for the default bundle in the specified package.
+     *
+     * @param packageName   The packageName and locale together point to an ICU udata object, 
+     *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
+     *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
+     *                      a package registered with udata_setAppData(). Using a full file or directory
+     *                      pathname for packageName is deprecated.
+     * @param err A UErrorCode value
+     * @stable ICU 2.0
+     */
+    ResourceBundle(const UnicodeString&    packageName,
+                   UErrorCode&              err);
+
+    /**
+     * Construct a resource bundle for the ICU default bundle.
+     *
+     * @param err A UErrorCode value
+     * @stable ICU 2.0
+     */
+    ResourceBundle(UErrorCode &err);
+
+    /**
+     * Standard constructor, onstructs a resource bundle for the locale-specific
+     * bundle in the specified package.
+     *
+     * @param packageName   The packageName and locale together point to an ICU udata object, 
+     *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
+     *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
+     *                      a package registered with udata_setAppData(). Using a full file or directory
+     *                      pathname for packageName is deprecated.
+     *                      NULL is used to refer to ICU data.
+     * @param locale The locale for which to open a resource bundle.
+     * @param err A UErrorCode value
+     * @stable ICU 2.0
+     */
+    ResourceBundle(const char* packageName,
+                   const Locale& locale,
+                   UErrorCode& err);
+
+    /**
+     * Copy constructor.
+     *
+     * @param original The resource bundle to copy.
+     * @stable ICU 2.0
+     */
+    ResourceBundle(const ResourceBundle &original);
+
+    /**
+     * Constructor from a C UResourceBundle. The resource bundle is
+     * copied and not adopted. ures_close will still need to be used on the
+     * original resource bundle.
+     *
+     * @param res A pointer to the C resource bundle.
+     * @param status A UErrorCode value.
+     * @stable ICU 2.0
+     */
+    ResourceBundle(UResourceBundle *res,
+                   UErrorCode &status);
+
+    /**
+     * Assignment operator.
+     *
+     * @param other The resource bundle to copy.
+     * @stable ICU 2.0
+     */
+    ResourceBundle&
+      operator=(const ResourceBundle& other);
+
+    /** Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~ResourceBundle();
+
+    /**
+     * Clone this object.
+     * Clones can be used concurrently in multiple threads.
+     * If an error occurs, then NULL is returned.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @stable ICU 2.8
+     */
+    ResourceBundle *clone() const;
+
+    /**
+     * Returns the size of a resource. Size for scalar types is always 1, and for vector/table types is
+     * the number of child resources.
+     * @warning Integer array is treated as a scalar type. There are no
+     *          APIs to access individual members of an integer array. It
+     *          is always returned as a whole.
+     *
+     * @return number of resources in a given resource.
+     * @stable ICU 2.0
+     */
+    int32_t
+      getSize(void) const;
+
+    /**
+     * returns a string from a string resource type
+     *
+     * @param status  fills in the outgoing error code
+     *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+     *                could be a warning
+     *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+     * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+     * @stable ICU 2.0
+     */
+    UnicodeString
+      getString(UErrorCode& status) const;
+
+    /**
+     * returns a binary data from a resource. Can be used at most primitive resource types (binaries,
+     * strings, ints)
+     *
+     * @param len     fills in the length of resulting byte chunk
+     * @param status  fills in the outgoing error code
+     *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+     *                could be a warning
+     *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+     * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
+     * @stable ICU 2.0
+     */
+    const uint8_t*
+      getBinary(int32_t& len, UErrorCode& status) const;
+
+
+    /**
+     * returns an integer vector from a resource.
+     *
+     * @param len     fills in the length of resulting integer vector
+     * @param status  fills in the outgoing error code
+     *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+     *                could be a warning
+     *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+     * @return a pointer to a vector of integers that lives in a memory mapped/DLL file.
+     * @stable ICU 2.0
+     */
+    const int32_t*
+      getIntVector(int32_t& len, UErrorCode& status) const;
+
+    /**
+     * returns an unsigned integer from a resource.
+     * This integer is originally 28 bits.
+     *
+     * @param status  fills in the outgoing error code
+     *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+     *                could be a warning
+     *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+     * @return an unsigned integer value
+     * @stable ICU 2.0
+     */
+    uint32_t
+      getUInt(UErrorCode& status) const;
+
+    /**
+     * returns a signed integer from a resource.
+     * This integer is originally 28 bit and the sign gets propagated.
+     *
+     * @param status  fills in the outgoing error code
+     *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+     *                could be a warning
+     *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+     * @return a signed integer value
+     * @stable ICU 2.0
+     */
+    int32_t
+      getInt(UErrorCode& status) const;
+
+    /**
+     * Checks whether the resource has another element to iterate over.
+     *
+     * @return TRUE if there are more elements, FALSE if there is no more elements
+     * @stable ICU 2.0
+     */
+    UBool
+      hasNext(void) const;
+
+    /**
+     * Resets the internal context of a resource so that iteration starts from the first element.
+     *
+     * @stable ICU 2.0
+     */
+    void
+      resetIterator(void);
+
+    /**
+     * Returns the key associated with this resource. Not all the resources have a key - only
+     * those that are members of a table.
+     *
+     * @return a key associated to this resource, or NULL if it doesn't have a key
+     * @stable ICU 2.0
+     */
+    const char*
+      getKey(void) const;
+
+    /**
+     * Gets the locale ID of the resource bundle as a string.
+     * Same as getLocale().getName() .
+     *
+     * @return the locale ID of the resource bundle as a string
+     * @stable ICU 2.0
+     */
+    const char*
+      getName(void) const;
+
+
+    /**
+     * Returns the type of a resource. Available types are defined in enum UResType
+     *
+     * @return type of the given resource.
+     * @stable ICU 2.0
+     */
+    UResType
+      getType(void) const;
+
+    /**
+     * Returns the next resource in a given resource or NULL if there are no more resources
+     *
+     * @param status            fills in the outgoing error code
+     * @return                  ResourceBundle object.
+     * @stable ICU 2.0
+     */
+    ResourceBundle
+      getNext(UErrorCode& status);
+
+    /**
+     * Returns the next string in a resource or NULL if there are no more resources
+     * to iterate over.
+     *
+     * @param status            fills in the outgoing error code
+     * @return an UnicodeString object.
+     * @stable ICU 2.0
+     */
+    UnicodeString
+      getNextString(UErrorCode& status);
+
+    /**
+     * Returns the next string in a resource or NULL if there are no more resources
+     * to iterate over.
+     *
+     * @param key               fill in for key associated with this string
+     * @param status            fills in the outgoing error code
+     * @return an UnicodeString object.
+     * @stable ICU 2.0
+     */
+    UnicodeString
+      getNextString(const char ** key,
+                    UErrorCode& status);
+
+    /**
+     * Returns the resource in a resource at the specified index.
+     *
+     * @param index             an index to the wanted resource.
+     * @param status            fills in the outgoing error code
+     * @return                  ResourceBundle object. If there is an error, resource is invalid.
+     * @stable ICU 2.0
+     */
+    ResourceBundle
+      get(int32_t index,
+          UErrorCode& status) const;
+
+    /**
+     * Returns the string in a given resource at the specified index.
+     *
+     * @param index             an index to the wanted string.
+     * @param status            fills in the outgoing error code
+     * @return                  an UnicodeString object. If there is an error, string is bogus
+     * @stable ICU 2.0
+     */
+    UnicodeString
+      getStringEx(int32_t index,
+                  UErrorCode& status) const;
+
+    /**
+     * Returns a resource in a resource that has a given key. This procedure works only with table
+     * resources.
+     *
+     * @param key               a key associated with the wanted resource
+     * @param status            fills in the outgoing error code.
+     * @return                  ResourceBundle object. If there is an error, resource is invalid.
+     * @stable ICU 2.0
+     */
+    ResourceBundle
+      get(const char* key,
+          UErrorCode& status) const;
+
+    /**
+     * Returns a string in a resource that has a given key. This procedure works only with table
+     * resources.
+     *
+     * @param key               a key associated with the wanted string
+     * @param status            fills in the outgoing error code
+     * @return                  an UnicodeString object. If there is an error, string is bogus
+     * @stable ICU 2.0
+     */
+    UnicodeString
+      getStringEx(const char* key,
+                  UErrorCode& status) const;
+
+    /**
+     * Return the version number associated with this ResourceBundle as a string. Please
+     * use getVersion, as this method is going to be deprecated.
+     *
+     * @return  A version number string as specified in the resource bundle or its parent.
+     *          The caller does not own this string.
+     * @see getVersion
+     * @deprecated ICU 2.8 Use getVersion instead.
+     */
+    const char*
+      getVersionNumber(void) const;
+
+    /**
+     * Return the version number associated with this ResourceBundle as a UVersionInfo array.
+     *
+     * @param versionInfo A UVersionInfo array that is filled with the version number
+     *                    as specified in the resource bundle or its parent.
+     * @stable ICU 2.0
+     */
+    void
+      getVersion(UVersionInfo versionInfo) const;
+
+    /**
+     * Return the Locale associated with this ResourceBundle.
+     *
+     * @return a Locale object
+     * @deprecated ICU 2.8 Use getLocale(ULocDataLocaleType type, UErrorCode &status) overload instead.
+     */
+    const Locale&
+      getLocale(void) const;
+
+    /**
+     * Return the Locale associated with this ResourceBundle.
+     * @param type You can choose between requested, valid and actual
+     *             locale. For description see the definition of
+     *             ULocDataLocaleType in uloc.h
+     * @param status just for catching illegal arguments
+     *
+     * @return a Locale object
+     * @stable ICU 2.8
+     */
+    const Locale
+      getLocale(ULocDataLocaleType type, UErrorCode &status) const;
+    /**
+     * This API implements multilevel fallback
+     * @internal
+     */
+    ResourceBundle
+        getWithFallback(const char* key, UErrorCode& status);
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.2
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+    ResourceBundle(); // default constructor not implemented
+
+    UResourceBundle *fResource;
+    void constructForLocale(const UnicodeString& path, const Locale& locale, UErrorCode& error);
+    Locale *fLocale;
+
+};
+
+U_NAMESPACE_END
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/schriter.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/schriter.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/schriter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,187 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 1998-2005, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-******************************************************************************
-*
-* File schriter.h
-*
-* Modification History:
-*
-*   Date        Name        Description
-*  05/05/99     stephen     Cleaned up.
-******************************************************************************
-*/
-
-#ifndef SCHRITER_H
-#define SCHRITER_H
-
-#include "unicode/utypes.h"
-#include "unicode/chariter.h"
-#include "unicode/uchriter.h"
-
-/**
- * \file 
- * \brief C++ API: String Character Iterator
- */
- 
-U_NAMESPACE_BEGIN
-/**
- * A concrete subclass of CharacterIterator that iterates over the
- * characters (code units or code points) in a UnicodeString.
- * It's possible not only to create an
- * iterator that iterates over an entire UnicodeString, but also to
- * create one that iterates over only a subrange of a UnicodeString
- * (iterators over different subranges of the same UnicodeString don't
- * compare equal).
- * @see CharacterIterator
- * @see ForwardCharacterIterator
- * @stable ICU 2.0
- */
-class U_COMMON_API StringCharacterIterator : public UCharCharacterIterator {
-public:
-  /**
-   * Create an iterator over the UnicodeString referred to by "textStr".
-   * The UnicodeString object is copied.
-   * The iteration range is the whole string, and the starting position is 0.
-   * @param textStr The unicode string used to create an iterator
-   * @stable ICU 2.0
-   */
-  StringCharacterIterator(const UnicodeString& textStr);
-
-  /**
-   * Create an iterator over the UnicodeString referred to by "textStr".
-   * The iteration range is the whole string, and the starting
-   * position is specified by "textPos".  If "textPos" is outside the valid
-   * iteration range, the behavior of this object is undefined.
-   * @param textStr The unicode string used to create an iterator
-   * @param textPos The starting position of the iteration
-   * @stable ICU 2.0
-   */
-  StringCharacterIterator(const UnicodeString&    textStr,
-              int32_t              textPos);
-
-  /**
-   * Create an iterator over the UnicodeString referred to by "textStr".
-   * The UnicodeString object is copied.
-   * The iteration range begins with the code unit specified by
-   * "textBegin" and ends with the code unit BEFORE the code unit specfied
-   * by "textEnd".  The starting position is specified by "textPos".  If
-   * "textBegin" and "textEnd" don't form a valid range on "text" (i.e.,
-   * textBegin >= textEnd or either is negative or greater than text.size()),
-   * or "textPos" is outside the range defined by "textBegin" and "textEnd",
-   * the behavior of this iterator is undefined.
-   * @param textStr    The unicode string used to create the StringCharacterIterator
-   * @param textBegin  The begin position of the iteration range
-   * @param textEnd    The end position of the iteration range
-   * @param textPos    The starting position of the iteration
-   * @stable ICU 2.0
-   */
-  StringCharacterIterator(const UnicodeString&    textStr,
-              int32_t              textBegin,
-              int32_t              textEnd,
-              int32_t              textPos);
-
-  /**
-   * Copy constructor.  The new iterator iterates over the same range
-   * of the same string as "that", and its initial position is the
-   * same as "that"'s current position.
-   * The UnicodeString object in "that" is copied.
-   * @param that The StringCharacterIterator to be copied
-   * @stable ICU 2.0
-   */
-  StringCharacterIterator(const StringCharacterIterator&  that);
-
-  /**
-   * Destructor.
-   * @stable ICU 2.0
-   */
-  virtual ~StringCharacterIterator();
-
-  /**
-   * Assignment operator.  *this is altered to iterate over the same
-   * range of the same string as "that", and refers to the same
-   * character within that string as "that" does.
-   * @param that The object to be copied.
-   * @return the newly created object.
-   * @stable ICU 2.0
-   */
-  StringCharacterIterator&
-  operator=(const StringCharacterIterator&    that);
-
-  /**
-   * Returns true if the iterators iterate over the same range of the
-   * same string and are pointing at the same character.
-   * @param that The ForwardCharacterIterator to be compared for equality
-   * @return true if the iterators iterate over the same range of the
-   * same string and are pointing at the same character.
-   * @stable ICU 2.0
-   */
-  virtual UBool          operator==(const ForwardCharacterIterator& that) const;
-
-  /**
-   * Returns a new StringCharacterIterator referring to the same
-   * character in the same range of the same string as this one.  The
-   * caller must delete the new iterator.
-   * @return the newly cloned object.
-   * @stable ICU 2.0
-   */
-  virtual CharacterIterator* clone(void) const;
-
-  /**
-   * Sets the iterator to iterate over the provided string.
-   * @param newText The string to be iterated over
-   * @stable ICU 2.0
-   */
-  void setText(const UnicodeString& newText);
-
-  /**
-   * Copies the UnicodeString under iteration into the UnicodeString
-   * referred to by "result".  Even if this iterator iterates across
-   * only a part of this string, the whole string is copied.
-   * @param result Receives a copy of the text under iteration.
-   * @stable ICU 2.0
-   */
-  virtual void            getText(UnicodeString& result);
-
-  /**
-   * Return a class ID for this object (not really public)
-   * @return a class ID for this object.
-   * @stable ICU 2.0
-   */
-  virtual UClassID         getDynamicClassID(void) const;
-
-  /**
-   * Return a class ID for this class (not really public)
-   * @return a class ID for this class
-   * @stable ICU 2.0
-   */
-  static UClassID   U_EXPORT2 getStaticClassID(void);
-
-protected:
-  /**
-   * Default constructor, iteration over empty string.
-   * @stable ICU 2.0
-   */
-  StringCharacterIterator();
-
-  /**
-   * Sets the iterator to iterate over the provided string.
-   * @param newText The string to be iterated over
-   * @param newTextLength The length of the String
-   * @stable ICU 2.0
-   */
-  void setText(const UChar* newText, int32_t newTextLength);
-
-  /**
-   * Copy of the iterated string object.
-   * @stable ICU 2.0
-   */
-  UnicodeString            text;
-
-};
-
-U_NAMESPACE_END
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/schriter.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/schriter.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/schriter.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/schriter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,187 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1998-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File schriter.h
+*
+* Modification History:
+*
+*   Date        Name        Description
+*  05/05/99     stephen     Cleaned up.
+******************************************************************************
+*/
+
+#ifndef SCHRITER_H
+#define SCHRITER_H
+
+#include "unicode/utypes.h"
+#include "unicode/chariter.h"
+#include "unicode/uchriter.h"
+
+/**
+ * \file 
+ * \brief C++ API: String Character Iterator
+ */
+ 
+U_NAMESPACE_BEGIN
+/**
+ * A concrete subclass of CharacterIterator that iterates over the
+ * characters (code units or code points) in a UnicodeString.
+ * It's possible not only to create an
+ * iterator that iterates over an entire UnicodeString, but also to
+ * create one that iterates over only a subrange of a UnicodeString
+ * (iterators over different subranges of the same UnicodeString don't
+ * compare equal).
+ * @see CharacterIterator
+ * @see ForwardCharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API StringCharacterIterator : public UCharCharacterIterator {
+public:
+  /**
+   * Create an iterator over the UnicodeString referred to by "textStr".
+   * The UnicodeString object is copied.
+   * The iteration range is the whole string, and the starting position is 0.
+   * @param textStr The unicode string used to create an iterator
+   * @stable ICU 2.0
+   */
+  StringCharacterIterator(const UnicodeString& textStr);
+
+  /**
+   * Create an iterator over the UnicodeString referred to by "textStr".
+   * The iteration range is the whole string, and the starting
+   * position is specified by "textPos".  If "textPos" is outside the valid
+   * iteration range, the behavior of this object is undefined.
+   * @param textStr The unicode string used to create an iterator
+   * @param textPos The starting position of the iteration
+   * @stable ICU 2.0
+   */
+  StringCharacterIterator(const UnicodeString&    textStr,
+              int32_t              textPos);
+
+  /**
+   * Create an iterator over the UnicodeString referred to by "textStr".
+   * The UnicodeString object is copied.
+   * The iteration range begins with the code unit specified by
+   * "textBegin" and ends with the code unit BEFORE the code unit specfied
+   * by "textEnd".  The starting position is specified by "textPos".  If
+   * "textBegin" and "textEnd" don't form a valid range on "text" (i.e.,
+   * textBegin >= textEnd or either is negative or greater than text.size()),
+   * or "textPos" is outside the range defined by "textBegin" and "textEnd",
+   * the behavior of this iterator is undefined.
+   * @param textStr    The unicode string used to create the StringCharacterIterator
+   * @param textBegin  The begin position of the iteration range
+   * @param textEnd    The end position of the iteration range
+   * @param textPos    The starting position of the iteration
+   * @stable ICU 2.0
+   */
+  StringCharacterIterator(const UnicodeString&    textStr,
+              int32_t              textBegin,
+              int32_t              textEnd,
+              int32_t              textPos);
+
+  /**
+   * Copy constructor.  The new iterator iterates over the same range
+   * of the same string as "that", and its initial position is the
+   * same as "that"'s current position.
+   * The UnicodeString object in "that" is copied.
+   * @param that The StringCharacterIterator to be copied
+   * @stable ICU 2.0
+   */
+  StringCharacterIterator(const StringCharacterIterator&  that);
+
+  /**
+   * Destructor.
+   * @stable ICU 2.0
+   */
+  virtual ~StringCharacterIterator();
+
+  /**
+   * Assignment operator.  *this is altered to iterate over the same
+   * range of the same string as "that", and refers to the same
+   * character within that string as "that" does.
+   * @param that The object to be copied.
+   * @return the newly created object.
+   * @stable ICU 2.0
+   */
+  StringCharacterIterator&
+  operator=(const StringCharacterIterator&    that);
+
+  /**
+   * Returns true if the iterators iterate over the same range of the
+   * same string and are pointing at the same character.
+   * @param that The ForwardCharacterIterator to be compared for equality
+   * @return true if the iterators iterate over the same range of the
+   * same string and are pointing at the same character.
+   * @stable ICU 2.0
+   */
+  virtual UBool          operator==(const ForwardCharacterIterator& that) const;
+
+  /**
+   * Returns a new StringCharacterIterator referring to the same
+   * character in the same range of the same string as this one.  The
+   * caller must delete the new iterator.
+   * @return the newly cloned object.
+   * @stable ICU 2.0
+   */
+  virtual CharacterIterator* clone(void) const;
+
+  /**
+   * Sets the iterator to iterate over the provided string.
+   * @param newText The string to be iterated over
+   * @stable ICU 2.0
+   */
+  void setText(const UnicodeString& newText);
+
+  /**
+   * Copies the UnicodeString under iteration into the UnicodeString
+   * referred to by "result".  Even if this iterator iterates across
+   * only a part of this string, the whole string is copied.
+   * @param result Receives a copy of the text under iteration.
+   * @stable ICU 2.0
+   */
+  virtual void            getText(UnicodeString& result);
+
+  /**
+   * Return a class ID for this object (not really public)
+   * @return a class ID for this object.
+   * @stable ICU 2.0
+   */
+  virtual UClassID         getDynamicClassID(void) const;
+
+  /**
+   * Return a class ID for this class (not really public)
+   * @return a class ID for this class
+   * @stable ICU 2.0
+   */
+  static UClassID   U_EXPORT2 getStaticClassID(void);
+
+protected:
+  /**
+   * Default constructor, iteration over empty string.
+   * @stable ICU 2.0
+   */
+  StringCharacterIterator();
+
+  /**
+   * Sets the iterator to iterate over the provided string.
+   * @param newText The string to be iterated over
+   * @param newTextLength The length of the String
+   * @stable ICU 2.0
+   */
+  void setText(const UChar* newText, int32_t newTextLength);
+
+  /**
+   * Copy of the iterated string object.
+   * @stable ICU 2.0
+   */
+  UnicodeString            text;
+
+};
+
+U_NAMESPACE_END
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/search.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/search.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/search.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,569 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
-**********************************************************************
-*   Date        Name        Description
-*  03/22/2000   helena      Creation.
-**********************************************************************
-*/
-
-#ifndef SEARCH_H
-#define SEARCH_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: SearchIterator object.
- */
- 
-#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-#include "unicode/chariter.h"
-#include "unicode/brkiter.h"
-#include "unicode/usearch.h"
-
-/**
-* @stable ICU 2.0
-*/
-struct USearch;
-/**
-* @stable ICU 2.0
-*/
-typedef struct USearch USearch;
-
-U_NAMESPACE_BEGIN
-
-/**
- *
- * <tt>SearchIterator</tt> is an abstract base class that provides 
- * methods to search for a pattern within a text string. Instances of
- * <tt>SearchIterator</tt> maintain a current position and scans over the 
- * target text, returning the indices the pattern is matched and the length 
- * of each match.
- * <p>
- * <tt>SearchIterator</tt> defines a protocol for text searching. 
- * Subclasses provide concrete implementations of various search algorithms. 
- * For example, <tt>StringSearch</tt> implements language-sensitive pattern 
- * matching based on the comparison rules defined in a 
- * <tt>RuleBasedCollator</tt> object. 
- * <p> 
- * Other options for searching includes using a BreakIterator to restrict 
- * the points at which matches are detected.
- * <p>
- * <tt>SearchIterator</tt> provides an API that is similar to that of
- * other text iteration classes such as <tt>BreakIterator</tt>. Using 
- * this class, it is easy to scan through text looking for all occurances of 
- * a given pattern. The following example uses a <tt>StringSearch</tt> 
- * object to find all instances of "fox" in the target string. Any other 
- * subclass of <tt>SearchIterator</tt> can be used in an identical 
- * manner.
- * <pre><code>
- * UnicodeString target("The quick brown fox jumped over the lazy fox");
- * UnicodeString pattern("fox");
- *
- * SearchIterator *iter  = new StringSearch(pattern, target);
- * UErrorCode      error = U_ZERO_ERROR;
- * for (int pos = iter->first(error); pos != USEARCH_DONE; 
- *                               pos = iter->next(error)) {
- *     printf("Found match at %d pos, length is %d\n", pos, 
- *                                             iter.getMatchLength());
- * }
- * </code></pre>
- *
- * @see StringSearch
- * @see RuleBasedCollator
- */
-class U_I18N_API SearchIterator : public UObject {
-
-public:
-
-    // public constructors and destructors -------------------------------
-
-    /** 
-    * Copy constructor that creates a SearchIterator instance with the same 
-    * behavior, and iterating over the same text. 
-    * @param other the SearchIterator instance to be copied.
-    * @stable ICU 2.0
-    */
-    SearchIterator(const SearchIterator &other);
-
-    /**
-     * Destructor. Cleans up the search iterator data struct.
-     * @stable ICU 2.0
-     */
-    virtual ~SearchIterator();
-
-    // public get and set methods ----------------------------------------
-
-    /**
-     * Sets the index to point to the given position, and clears any state 
-     * that's affected.
-     * <p>
-     * This method takes the argument index and sets the position in the text 
-     * string accordingly without checking if the index is pointing to a 
-     * valid starting point to begin searching. 
-     * @param position within the text to be set. If position is less
-     *             than or greater than the text range for searching, 
-     *          an U_INDEX_OUTOFBOUNDS_ERROR will be returned
-     * @param status for errors if it occurs
-     * @stable ICU 2.0
-     */
-    virtual void setOffset(int32_t position, UErrorCode &status) = 0;
-
-    /**
-     * Return the current index in the text being searched.
-     * If the iteration has gone past the end of the text
-     * (or past the beginning for a backwards search), USEARCH_DONE
-     * is returned.
-     * @return current index in the text being searched.
-     * @stable ICU 2.0
-     */
-    virtual int32_t getOffset(void) const = 0;
-
-    /**
-    * Sets the text searching attributes located in the enum 
-    * USearchAttribute with values from the enum USearchAttributeValue.
-    * USEARCH_DEFAULT can be used for all attributes for resetting.
-    * @param attribute text attribute (enum USearchAttribute) to be set
-    * @param value text attribute value
-    * @param status for errors if it occurs
-    * @stable ICU 2.0
-    */
-    void setAttribute(USearchAttribute       attribute,
-                      USearchAttributeValue  value,
-                      UErrorCode            &status);
-
-    /**    
-    * Gets the text searching attributes
-    * @param attribute text attribute (enum USearchAttribute) to be retrieve
-    * @return text attribute value
-    * @stable ICU 2.0
-    */
-    USearchAttributeValue getAttribute(USearchAttribute  attribute) const;
-    
-    /**
-    * Returns the index to the match in the text string that was searched.
-    * This call returns a valid result only after a successful call to 
-    * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
-    * Just after construction, or after a searching method returns 
-    * <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
-    * <p>
-    * Use getMatchedLength to get the matched string length.
-    * @return index of a substring within the text string that is being 
-    *         searched.
-    * @see #first
-    * @see #next
-    * @see #previous
-    * @see #last
-    * @stable ICU 2.0
-    */
-    int32_t getMatchedStart(void) const;
-
-    /**
-     * Returns the length of text in the string which matches the search 
-     * pattern. This call returns a valid result only after a successful call 
-     * to <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
-     * Just after construction, or after a searching method returns 
-     * <tt>USEARCH_DONE</tt>, this method will return 0.
-     * @return The length of the match in the target text, or 0 if there
-     *         is no match currently.
-     * @see #first
-     * @see #next
-     * @see #previous
-     * @see #last
-     * @stable ICU 2.0
-     */
-    int32_t getMatchedLength(void) const;
-    
-    /**
-     * Returns the text that was matched by the most recent call to 
-     * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
-     * If the iterator is not pointing at a valid match (e.g. just after 
-     * construction or after <tt>USEARCH_DONE</tt> has been returned, 
-     * returns an empty string. 
-     * @param result stores the matched string or an empty string if a match
-     *        is not found.
-     * @see #first
-     * @see #next
-     * @see #previous
-     * @see #last
-     * @stable ICU 2.0
-     */
-    void getMatchedText(UnicodeString &result) const;
-    
-    /**
-     * Set the BreakIterator that will be used to restrict the points
-     * at which matches are detected. The user is responsible for deleting 
-     * the breakiterator.
-     * @param breakiter A BreakIterator that will be used to restrict the 
-     *                points at which matches are detected. If a match is 
-     *                found, but the match's start or end index is not a 
-     *                boundary as determined by the <tt>BreakIterator</tt>, 
-     *                the match will be rejected and another will be searched 
-     *                for. If this parameter is <tt>NULL</tt>, no break
-     *                detection is attempted.
-     * @param status for errors if it occurs
-     * @see BreakIterator
-     * @stable ICU 2.0
-     */
-    void setBreakIterator(BreakIterator *breakiter, UErrorCode &status);
-    
-    /**
-     * Returns the BreakIterator that is used to restrict the points at 
-     * which matches are detected.  This will be the same object that was 
-     * passed to the constructor or to <tt>setBreakIterator</tt>.
-     * Note that <tt>NULL</tt> is a legal value; it means that break
-     * detection should not be attempted.
-     * @return BreakIterator used to restrict matchings.
-     * @see #setBreakIterator
-     * @stable ICU 2.0
-     */
-    const BreakIterator * getBreakIterator(void) const;
-
-    /**
-     * Set the string text to be searched. Text iteration will hence begin at 
-     * the start of the text string. This method is useful if you want to 
-     * re-use an iterator to search for the same pattern within a different 
-     * body of text. The user is responsible for deleting the text.
-     * @param text string to be searched.
-     * @param status for errors. If the text length is 0, 
-     *        an U_ILLEGAL_ARGUMENT_ERROR is returned.
-     * @stable ICU 2.0
-     */
-    virtual void setText(const UnicodeString &text, UErrorCode &status);    
-
-    /**
-     * Set the string text to be searched. Text iteration will hence begin at 
-     * the start of the text string. This method is useful if you want to 
-     * re-use an iterator to search for the same pattern within a different 
-     * body of text.
-     * <p>
-     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
-     * will be done during searching for this version. The block of text 
-     * in <tt>CharacterIterator</tt> will be used as it is.
-     * The user is responsible for deleting the text.
-     * @param text string iterator to be searched.
-     * @param status for errors if any. If the text length is 0 then an 
-     *        U_ILLEGAL_ARGUMENT_ERROR is returned.
-     * @stable ICU 2.0
-     */
-    virtual void setText(CharacterIterator &text, UErrorCode &status);
-    
-    /**
-     * Return the string text to be searched.
-     * @return text string to be searched.
-     * @stable ICU 2.0
-     */
-    const UnicodeString & getText(void) const;
-
-    // operator overloading ----------------------------------------------
-
-    /**
-     * Equality operator. 
-     * @param that SearchIterator instance to be compared.
-     * @return TRUE if both BreakIterators are of the same class, have the 
-     *         same behavior, terates over the same text and have the same
-     *         attributes. FALSE otherwise.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const SearchIterator &that) const;
-
-    /**
-     * Not-equal operator. 
-     * @param that SearchIterator instance to be compared.
-     * @return FALSE if operator== returns TRUE, and vice versa.
-     * @stable ICU 2.0
-     */
-    UBool operator!=(const SearchIterator &that) const;
-
-    // public methods ----------------------------------------------------
-
-    /**
-     * Returns a copy of SearchIterator with the same behavior, and 
-     * iterating over the same text, as this one. Note that all data will be
-     * replicated, except for the text string to be searched.
-     * @return cloned object
-     * @stable ICU 2.0
-     */
-    virtual SearchIterator* safeClone(void) const = 0;
-
-    /**
-     * Returns the first index at which the string text matches the search 
-     * pattern. The iterator is adjusted so that its current index (as 
-     * returned by <tt>getOffset</tt>) is the match position if one 
-     * was found.
-     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
-     * the iterator will be adjusted to the index USEARCH_DONE
-     * @param  status for errors if it occurs
-     * @return The character index of the first match, or 
-     *         <tt>USEARCH_DONE</tt> if there are no matches.
-     * @see #getOffset
-     * @stable ICU 2.0
-     */
-    int32_t first(UErrorCode &status);
-
-    /**
-     * Returns the first index greater than <tt>position</tt> at which the 
-     * string text matches the search pattern. The iterator is adjusted so 
-     * that its current index (as returned by <tt>getOffset</tt>) is the 
-     * match position if one was found. If a match is not found, 
-     * <tt>USEARCH_DONE</tt> will be returned and the iterator will be 
-     * adjusted to the index USEARCH_DONE
-     * @param  position where search if to start from. If position is less
-     *             than or greater than the text range for searching, 
-     *          an U_INDEX_OUTOFBOUNDS_ERROR will be returned
-     * @param  status for errors if it occurs
-     * @return The character index of the first match following 
-     *         <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are no 
-     *         matches.
-     * @see #getOffset
-     * @stable ICU 2.0
-     */
-    int32_t following(int32_t position, UErrorCode &status);
-    
-    /**
-     * Returns the last index in the target text at which it matches the 
-     * search pattern. The iterator is adjusted so that its current index 
-     * (as returned by <tt>getOffset</tt>) is the match position if one was 
-     * found.
-     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
-     * the iterator will be adjusted to the index USEARCH_DONE.
-     * @param  status for errors if it occurs
-     * @return The index of the first match, or <tt>USEARCH_DONE</tt> if 
-     *         there are no matches.
-     * @see #getOffset
-     * @stable ICU 2.0
-     */
-    int32_t last(UErrorCode &status);
-
-    /**
-     * Returns the first index less than <tt>position</tt> at which the string 
-     * text matches the search pattern. The iterator is adjusted so that its 
-     * current index (as returned by <tt>getOffset</tt>) is the match 
-     * position if one was found. If a match is not found, 
-     * <tt>USEARCH_DONE</tt> will be returned and the iterator will be 
-     * adjusted to the index USEARCH_DONE
-     * @param  position where search is to start from. If position is less
-     *             than or greater than the text range for searching, 
-     *          an U_INDEX_OUTOFBOUNDS_ERROR will be returned
-     * @param  status for errors if it occurs
-     * @return The character index of the first match preceding 
-     *         <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are 
-     *         no matches.
-     * @see #getOffset
-     * @stable ICU 2.0
-     */
-    int32_t preceding(int32_t position, UErrorCode &status);
-
-    /**
-     * Returns the index of the next point at which the text matches the
-     * search pattern, starting from the current position
-     * The iterator is adjusted so that its current index (as returned by 
-     * <tt>getOffset</tt>) is the match position if one was found.
-     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
-     * the iterator will be adjusted to a position after the end of the text 
-     * string.
-     * @param  status for errors if it occurs
-     * @return The index of the next match after the current position,
-     *          or <tt>USEARCH_DONE</tt> if there are no more matches.
-     * @see #getOffset
-     * @stable ICU 2.0
-     */
-     int32_t next(UErrorCode &status);
-
-    /**
-     * Returns the index of the previous point at which the string text 
-     * matches the search pattern, starting at the current position.
-     * The iterator is adjusted so that its current index (as returned by 
-     * <tt>getOffset</tt>) is the match position if one was found.
-     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
-     * the iterator will be adjusted to the index USEARCH_DONE
-     * @param  status for errors if it occurs
-     * @return The index of the previous match before the current position,
-     *          or <tt>USEARCH_DONE</tt> if there are no more matches.
-     * @see #getOffset
-     * @stable ICU 2.0
-     */
-    int32_t previous(UErrorCode &status);
-
-    /** 
-    * Resets the iteration.
-    * Search will begin at the start of the text string if a forward 
-    * iteration is initiated before a backwards iteration. Otherwise if a 
-    * backwards iteration is initiated before a forwards iteration, the 
-    * search will begin at the end of the text string.    
-    * @stable ICU 2.0
-    */
-    virtual void reset();
-
-protected:
-    // protected data members ---------------------------------------------
-
-    /**
-    * C search data struct
-    * @stable ICU 2.0
-    */
-    USearch *m_search_;
-
-    /**
-    * Break iterator.
-    * Currently the C++ breakiterator does not have getRules etc to reproduce
-    * another in C. Hence we keep the original around and do the verification
-    * at the end of the match. The user is responsible for deleting this
-    * break iterator.
-    * @stable ICU 2.0
-    */
-    BreakIterator *m_breakiterator_;
-    
-    /**
-    * Unicode string version of the search text
-    * @stable ICU 2.0
-    */
-    UnicodeString  m_text_;
-
-    // protected constructors and destructors -----------------------------
-
-    /**
-    * Default constructor.
-    * Initializes data to the default values.
-    * @stable ICU 2.0
-    */
-    SearchIterator();
-
-    /**
-     * Constructor for use by subclasses.
-     * @param text The target text to be searched.
-     * @param breakiter A {@link BreakIterator} that is used to restrict the 
-     *                points at which matches are detected. If 
-     *                <tt>handleNext</tt> or <tt>handlePrev</tt> finds a 
-     *                match, but the match's start or end index is not a 
-     *                boundary as determined by the <tt>BreakIterator</tt>, 
-     *                the match is rejected and <tt>handleNext</tt> or 
-     *                <tt>handlePrev</tt> is called again. If this parameter 
-     *                is <tt>NULL</tt>, no break detection is attempted.  
-     * @see #handleNext
-     * @see #handlePrev
-     * @stable ICU 2.0
-     */
-    SearchIterator(const UnicodeString &text, 
-                         BreakIterator *breakiter = NULL);
-
-    /**
-     * Constructor for use by subclasses.
-     * <p>
-     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
-     * will be done during searching for this version. The block of text 
-     * in <tt>CharacterIterator</tt> will be used as it is.
-     * @param text The target text to be searched.
-     * @param breakiter A {@link BreakIterator} that is used to restrict the 
-     *                points at which matches are detected. If 
-     *                <tt>handleNext</tt> or <tt>handlePrev</tt> finds a 
-     *                match, but the match's start or end index is not a 
-     *                boundary as determined by the <tt>BreakIterator</tt>, 
-     *                the match is rejected and <tt>handleNext</tt> or 
-     *                <tt>handlePrev</tt> is called again. If this parameter 
-     *                is <tt>NULL</tt>, no break detection is attempted.
-     * @see #handleNext
-     * @see #handlePrev
-     * @stable ICU 2.0
-     */
-    SearchIterator(CharacterIterator &text, BreakIterator *breakiter = NULL);
-
-    // protected methods --------------------------------------------------
-
-    /**
-     * Assignment operator. Sets this iterator to have the same behavior,
-     * and iterate over the same text, as the one passed in.
-     * @param that instance to be copied.
-     * @stable ICU 2.0
-     */
-    SearchIterator & operator=(const SearchIterator &that);
-
-    /**
-     * Abstract method which subclasses override to provide the mechanism
-     * for finding the next match in the target text. This allows different
-     * subclasses to provide different search algorithms.
-     * <p>
-     * If a match is found, the implementation should return the index at
-     * which the match starts and should call 
-     * <tt>setMatchLength</tt> with the number of characters 
-     * in the target text that make up the match. If no match is found, the 
-     * method should return USEARCH_DONE.
-     * <p>
-     * @param position The index in the target text at which the search 
-     *                 should start.
-     * @param status for error codes if it occurs.
-     * @return index at which the match starts, else if match is not found 
-     *         USEARCH_DONE is returned
-     * @see #setMatchLength
-     * @stable ICU 2.0
-     */
-    virtual int32_t handleNext(int32_t position, UErrorCode &status) 
-                                                                         = 0;
-
-    /**
-     * Abstract method which subclasses override to provide the mechanism for
-     * finding the previous match in the target text. This allows different
-     * subclasses to provide different search algorithms.
-     * <p>
-     * If a match is found, the implementation should return the index at
-     * which the match starts and should call 
-     * <tt>setMatchLength</tt> with the number of characters 
-     * in the target text that make up the match. If no match is found, the 
-     * method should return USEARCH_DONE.
-     * <p>
-     * @param position The index in the target text at which the search 
-     *                 should start.
-     * @param status for error codes if it occurs.
-     * @return index at which the match starts, else if match is not found 
-     *         USEARCH_DONE is returned
-     * @see #setMatchLength
-     * @stable ICU 2.0
-     */
-     virtual int32_t handlePrev(int32_t position, UErrorCode &status) 
-                                                                         = 0;
-
-    /**
-     * Sets the length of the currently matched string in the text string to
-     * be searched.
-     * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt>
-     * methods should call this when they find a match in the target text.
-     * @param length length of the matched text.
-     * @see #handleNext
-     * @see #handlePrev
-     * @stable ICU 2.0
-     */
-    virtual void setMatchLength(int32_t length);
-
-    /**
-     * Sets the offset of the currently matched string in the text string to
-     * be searched.
-     * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt>
-     * methods should call this when they find a match in the target text.
-     * @param position start offset of the matched text.
-     * @see #handleNext
-     * @see #handlePrev
-     * @stable ICU 2.0
-     */
-    virtual void setMatchStart(int32_t position);
-
-    /**
-    * sets match not found 
-    * @stable ICU 2.0
-    */
-    void setMatchNotFound();
-};
-
-inline UBool SearchIterator::operator!=(const SearchIterator &that) const
-{
-   return !operator==(that); 
-}
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-#endif
-

Copied: MacRuby/trunk/icu-1060/unicode/search.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/search.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/search.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/search.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,569 @@
+/*
+**********************************************************************
+*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  03/22/2000   helena      Creation.
+**********************************************************************
+*/
+
+#ifndef SEARCH_H
+#define SEARCH_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: SearchIterator object.
+ */
+ 
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#include "unicode/brkiter.h"
+#include "unicode/usearch.h"
+
+/**
+* @stable ICU 2.0
+*/
+struct USearch;
+/**
+* @stable ICU 2.0
+*/
+typedef struct USearch USearch;
+
+U_NAMESPACE_BEGIN
+
+/**
+ *
+ * <tt>SearchIterator</tt> is an abstract base class that provides 
+ * methods to search for a pattern within a text string. Instances of
+ * <tt>SearchIterator</tt> maintain a current position and scans over the 
+ * target text, returning the indices the pattern is matched and the length 
+ * of each match.
+ * <p>
+ * <tt>SearchIterator</tt> defines a protocol for text searching. 
+ * Subclasses provide concrete implementations of various search algorithms. 
+ * For example, <tt>StringSearch</tt> implements language-sensitive pattern 
+ * matching based on the comparison rules defined in a 
+ * <tt>RuleBasedCollator</tt> object. 
+ * <p> 
+ * Other options for searching includes using a BreakIterator to restrict 
+ * the points at which matches are detected.
+ * <p>
+ * <tt>SearchIterator</tt> provides an API that is similar to that of
+ * other text iteration classes such as <tt>BreakIterator</tt>. Using 
+ * this class, it is easy to scan through text looking for all occurances of 
+ * a given pattern. The following example uses a <tt>StringSearch</tt> 
+ * object to find all instances of "fox" in the target string. Any other 
+ * subclass of <tt>SearchIterator</tt> can be used in an identical 
+ * manner.
+ * <pre><code>
+ * UnicodeString target("The quick brown fox jumped over the lazy fox");
+ * UnicodeString pattern("fox");
+ *
+ * SearchIterator *iter  = new StringSearch(pattern, target);
+ * UErrorCode      error = U_ZERO_ERROR;
+ * for (int pos = iter->first(error); pos != USEARCH_DONE; 
+ *                               pos = iter->next(error)) {
+ *     printf("Found match at %d pos, length is %d\n", pos, 
+ *                                             iter.getMatchLength());
+ * }
+ * </code></pre>
+ *
+ * @see StringSearch
+ * @see RuleBasedCollator
+ */
+class U_I18N_API SearchIterator : public UObject {
+
+public:
+
+    // public constructors and destructors -------------------------------
+
+    /** 
+    * Copy constructor that creates a SearchIterator instance with the same 
+    * behavior, and iterating over the same text. 
+    * @param other the SearchIterator instance to be copied.
+    * @stable ICU 2.0
+    */
+    SearchIterator(const SearchIterator &other);
+
+    /**
+     * Destructor. Cleans up the search iterator data struct.
+     * @stable ICU 2.0
+     */
+    virtual ~SearchIterator();
+
+    // public get and set methods ----------------------------------------
+
+    /**
+     * Sets the index to point to the given position, and clears any state 
+     * that's affected.
+     * <p>
+     * This method takes the argument index and sets the position in the text 
+     * string accordingly without checking if the index is pointing to a 
+     * valid starting point to begin searching. 
+     * @param position within the text to be set. If position is less
+     *             than or greater than the text range for searching, 
+     *          an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+     * @param status for errors if it occurs
+     * @stable ICU 2.0
+     */
+    virtual void setOffset(int32_t position, UErrorCode &status) = 0;
+
+    /**
+     * Return the current index in the text being searched.
+     * If the iteration has gone past the end of the text
+     * (or past the beginning for a backwards search), USEARCH_DONE
+     * is returned.
+     * @return current index in the text being searched.
+     * @stable ICU 2.0
+     */
+    virtual int32_t getOffset(void) const = 0;
+
+    /**
+    * Sets the text searching attributes located in the enum 
+    * USearchAttribute with values from the enum USearchAttributeValue.
+    * USEARCH_DEFAULT can be used for all attributes for resetting.
+    * @param attribute text attribute (enum USearchAttribute) to be set
+    * @param value text attribute value
+    * @param status for errors if it occurs
+    * @stable ICU 2.0
+    */
+    void setAttribute(USearchAttribute       attribute,
+                      USearchAttributeValue  value,
+                      UErrorCode            &status);
+
+    /**    
+    * Gets the text searching attributes
+    * @param attribute text attribute (enum USearchAttribute) to be retrieve
+    * @return text attribute value
+    * @stable ICU 2.0
+    */
+    USearchAttributeValue getAttribute(USearchAttribute  attribute) const;
+    
+    /**
+    * Returns the index to the match in the text string that was searched.
+    * This call returns a valid result only after a successful call to 
+    * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
+    * Just after construction, or after a searching method returns 
+    * <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
+    * <p>
+    * Use getMatchedLength to get the matched string length.
+    * @return index of a substring within the text string that is being 
+    *         searched.
+    * @see #first
+    * @see #next
+    * @see #previous
+    * @see #last
+    * @stable ICU 2.0
+    */
+    int32_t getMatchedStart(void) const;
+
+    /**
+     * Returns the length of text in the string which matches the search 
+     * pattern. This call returns a valid result only after a successful call 
+     * to <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
+     * Just after construction, or after a searching method returns 
+     * <tt>USEARCH_DONE</tt>, this method will return 0.
+     * @return The length of the match in the target text, or 0 if there
+     *         is no match currently.
+     * @see #first
+     * @see #next
+     * @see #previous
+     * @see #last
+     * @stable ICU 2.0
+     */
+    int32_t getMatchedLength(void) const;
+    
+    /**
+     * Returns the text that was matched by the most recent call to 
+     * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
+     * If the iterator is not pointing at a valid match (e.g. just after 
+     * construction or after <tt>USEARCH_DONE</tt> has been returned, 
+     * returns an empty string. 
+     * @param result stores the matched string or an empty string if a match
+     *        is not found.
+     * @see #first
+     * @see #next
+     * @see #previous
+     * @see #last
+     * @stable ICU 2.0
+     */
+    void getMatchedText(UnicodeString &result) const;
+    
+    /**
+     * Set the BreakIterator that will be used to restrict the points
+     * at which matches are detected. The user is responsible for deleting 
+     * the breakiterator.
+     * @param breakiter A BreakIterator that will be used to restrict the 
+     *                points at which matches are detected. If a match is 
+     *                found, but the match's start or end index is not a 
+     *                boundary as determined by the <tt>BreakIterator</tt>, 
+     *                the match will be rejected and another will be searched 
+     *                for. If this parameter is <tt>NULL</tt>, no break
+     *                detection is attempted.
+     * @param status for errors if it occurs
+     * @see BreakIterator
+     * @stable ICU 2.0
+     */
+    void setBreakIterator(BreakIterator *breakiter, UErrorCode &status);
+    
+    /**
+     * Returns the BreakIterator that is used to restrict the points at 
+     * which matches are detected.  This will be the same object that was 
+     * passed to the constructor or to <tt>setBreakIterator</tt>.
+     * Note that <tt>NULL</tt> is a legal value; it means that break
+     * detection should not be attempted.
+     * @return BreakIterator used to restrict matchings.
+     * @see #setBreakIterator
+     * @stable ICU 2.0
+     */
+    const BreakIterator * getBreakIterator(void) const;
+
+    /**
+     * Set the string text to be searched. Text iteration will hence begin at 
+     * the start of the text string. This method is useful if you want to 
+     * re-use an iterator to search for the same pattern within a different 
+     * body of text. The user is responsible for deleting the text.
+     * @param text string to be searched.
+     * @param status for errors. If the text length is 0, 
+     *        an U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    virtual void setText(const UnicodeString &text, UErrorCode &status);    
+
+    /**
+     * Set the string text to be searched. Text iteration will hence begin at 
+     * the start of the text string. This method is useful if you want to 
+     * re-use an iterator to search for the same pattern within a different 
+     * body of text.
+     * <p>
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * The user is responsible for deleting the text.
+     * @param text string iterator to be searched.
+     * @param status for errors if any. If the text length is 0 then an 
+     *        U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    virtual void setText(CharacterIterator &text, UErrorCode &status);
+    
+    /**
+     * Return the string text to be searched.
+     * @return text string to be searched.
+     * @stable ICU 2.0
+     */
+    const UnicodeString & getText(void) const;
+
+    // operator overloading ----------------------------------------------
+
+    /**
+     * Equality operator. 
+     * @param that SearchIterator instance to be compared.
+     * @return TRUE if both BreakIterators are of the same class, have the 
+     *         same behavior, terates over the same text and have the same
+     *         attributes. FALSE otherwise.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const SearchIterator &that) const;
+
+    /**
+     * Not-equal operator. 
+     * @param that SearchIterator instance to be compared.
+     * @return FALSE if operator== returns TRUE, and vice versa.
+     * @stable ICU 2.0
+     */
+    UBool operator!=(const SearchIterator &that) const;
+
+    // public methods ----------------------------------------------------
+
+    /**
+     * Returns a copy of SearchIterator with the same behavior, and 
+     * iterating over the same text, as this one. Note that all data will be
+     * replicated, except for the text string to be searched.
+     * @return cloned object
+     * @stable ICU 2.0
+     */
+    virtual SearchIterator* safeClone(void) const = 0;
+
+    /**
+     * Returns the first index at which the string text matches the search 
+     * pattern. The iterator is adjusted so that its current index (as 
+     * returned by <tt>getOffset</tt>) is the match position if one 
+     * was found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the iterator will be adjusted to the index USEARCH_DONE
+     * @param  status for errors if it occurs
+     * @return The character index of the first match, or 
+     *         <tt>USEARCH_DONE</tt> if there are no matches.
+     * @see #getOffset
+     * @stable ICU 2.0
+     */
+    int32_t first(UErrorCode &status);
+
+    /**
+     * Returns the first index greater than <tt>position</tt> at which the 
+     * string text matches the search pattern. The iterator is adjusted so 
+     * that its current index (as returned by <tt>getOffset</tt>) is the 
+     * match position if one was found. If a match is not found, 
+     * <tt>USEARCH_DONE</tt> will be returned and the iterator will be 
+     * adjusted to the index USEARCH_DONE
+     * @param  position where search if to start from. If position is less
+     *             than or greater than the text range for searching, 
+     *          an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+     * @param  status for errors if it occurs
+     * @return The character index of the first match following 
+     *         <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are no 
+     *         matches.
+     * @see #getOffset
+     * @stable ICU 2.0
+     */
+    int32_t following(int32_t position, UErrorCode &status);
+    
+    /**
+     * Returns the last index in the target text at which it matches the 
+     * search pattern. The iterator is adjusted so that its current index 
+     * (as returned by <tt>getOffset</tt>) is the match position if one was 
+     * found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the iterator will be adjusted to the index USEARCH_DONE.
+     * @param  status for errors if it occurs
+     * @return The index of the first match, or <tt>USEARCH_DONE</tt> if 
+     *         there are no matches.
+     * @see #getOffset
+     * @stable ICU 2.0
+     */
+    int32_t last(UErrorCode &status);
+
+    /**
+     * Returns the first index less than <tt>position</tt> at which the string 
+     * text matches the search pattern. The iterator is adjusted so that its 
+     * current index (as returned by <tt>getOffset</tt>) is the match 
+     * position if one was found. If a match is not found, 
+     * <tt>USEARCH_DONE</tt> will be returned and the iterator will be 
+     * adjusted to the index USEARCH_DONE
+     * @param  position where search is to start from. If position is less
+     *             than or greater than the text range for searching, 
+     *          an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+     * @param  status for errors if it occurs
+     * @return The character index of the first match preceding 
+     *         <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are 
+     *         no matches.
+     * @see #getOffset
+     * @stable ICU 2.0
+     */
+    int32_t preceding(int32_t position, UErrorCode &status);
+
+    /**
+     * Returns the index of the next point at which the text matches the
+     * search pattern, starting from the current position
+     * The iterator is adjusted so that its current index (as returned by 
+     * <tt>getOffset</tt>) is the match position if one was found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the iterator will be adjusted to a position after the end of the text 
+     * string.
+     * @param  status for errors if it occurs
+     * @return The index of the next match after the current position,
+     *          or <tt>USEARCH_DONE</tt> if there are no more matches.
+     * @see #getOffset
+     * @stable ICU 2.0
+     */
+     int32_t next(UErrorCode &status);
+
+    /**
+     * Returns the index of the previous point at which the string text 
+     * matches the search pattern, starting at the current position.
+     * The iterator is adjusted so that its current index (as returned by 
+     * <tt>getOffset</tt>) is the match position if one was found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the iterator will be adjusted to the index USEARCH_DONE
+     * @param  status for errors if it occurs
+     * @return The index of the previous match before the current position,
+     *          or <tt>USEARCH_DONE</tt> if there are no more matches.
+     * @see #getOffset
+     * @stable ICU 2.0
+     */
+    int32_t previous(UErrorCode &status);
+
+    /** 
+    * Resets the iteration.
+    * Search will begin at the start of the text string if a forward 
+    * iteration is initiated before a backwards iteration. Otherwise if a 
+    * backwards iteration is initiated before a forwards iteration, the 
+    * search will begin at the end of the text string.    
+    * @stable ICU 2.0
+    */
+    virtual void reset();
+
+protected:
+    // protected data members ---------------------------------------------
+
+    /**
+    * C search data struct
+    * @stable ICU 2.0
+    */
+    USearch *m_search_;
+
+    /**
+    * Break iterator.
+    * Currently the C++ breakiterator does not have getRules etc to reproduce
+    * another in C. Hence we keep the original around and do the verification
+    * at the end of the match. The user is responsible for deleting this
+    * break iterator.
+    * @stable ICU 2.0
+    */
+    BreakIterator *m_breakiterator_;
+    
+    /**
+    * Unicode string version of the search text
+    * @stable ICU 2.0
+    */
+    UnicodeString  m_text_;
+
+    // protected constructors and destructors -----------------------------
+
+    /**
+    * Default constructor.
+    * Initializes data to the default values.
+    * @stable ICU 2.0
+    */
+    SearchIterator();
+
+    /**
+     * Constructor for use by subclasses.
+     * @param text The target text to be searched.
+     * @param breakiter A {@link BreakIterator} that is used to restrict the 
+     *                points at which matches are detected. If 
+     *                <tt>handleNext</tt> or <tt>handlePrev</tt> finds a 
+     *                match, but the match's start or end index is not a 
+     *                boundary as determined by the <tt>BreakIterator</tt>, 
+     *                the match is rejected and <tt>handleNext</tt> or 
+     *                <tt>handlePrev</tt> is called again. If this parameter 
+     *                is <tt>NULL</tt>, no break detection is attempted.  
+     * @see #handleNext
+     * @see #handlePrev
+     * @stable ICU 2.0
+     */
+    SearchIterator(const UnicodeString &text, 
+                         BreakIterator *breakiter = NULL);
+
+    /**
+     * Constructor for use by subclasses.
+     * <p>
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * @param text The target text to be searched.
+     * @param breakiter A {@link BreakIterator} that is used to restrict the 
+     *                points at which matches are detected. If 
+     *                <tt>handleNext</tt> or <tt>handlePrev</tt> finds a 
+     *                match, but the match's start or end index is not a 
+     *                boundary as determined by the <tt>BreakIterator</tt>, 
+     *                the match is rejected and <tt>handleNext</tt> or 
+     *                <tt>handlePrev</tt> is called again. If this parameter 
+     *                is <tt>NULL</tt>, no break detection is attempted.
+     * @see #handleNext
+     * @see #handlePrev
+     * @stable ICU 2.0
+     */
+    SearchIterator(CharacterIterator &text, BreakIterator *breakiter = NULL);
+
+    // protected methods --------------------------------------------------
+
+    /**
+     * Assignment operator. Sets this iterator to have the same behavior,
+     * and iterate over the same text, as the one passed in.
+     * @param that instance to be copied.
+     * @stable ICU 2.0
+     */
+    SearchIterator & operator=(const SearchIterator &that);
+
+    /**
+     * Abstract method which subclasses override to provide the mechanism
+     * for finding the next match in the target text. This allows different
+     * subclasses to provide different search algorithms.
+     * <p>
+     * If a match is found, the implementation should return the index at
+     * which the match starts and should call 
+     * <tt>setMatchLength</tt> with the number of characters 
+     * in the target text that make up the match. If no match is found, the 
+     * method should return USEARCH_DONE.
+     * <p>
+     * @param position The index in the target text at which the search 
+     *                 should start.
+     * @param status for error codes if it occurs.
+     * @return index at which the match starts, else if match is not found 
+     *         USEARCH_DONE is returned
+     * @see #setMatchLength
+     * @stable ICU 2.0
+     */
+    virtual int32_t handleNext(int32_t position, UErrorCode &status) 
+                                                                         = 0;
+
+    /**
+     * Abstract method which subclasses override to provide the mechanism for
+     * finding the previous match in the target text. This allows different
+     * subclasses to provide different search algorithms.
+     * <p>
+     * If a match is found, the implementation should return the index at
+     * which the match starts and should call 
+     * <tt>setMatchLength</tt> with the number of characters 
+     * in the target text that make up the match. If no match is found, the 
+     * method should return USEARCH_DONE.
+     * <p>
+     * @param position The index in the target text at which the search 
+     *                 should start.
+     * @param status for error codes if it occurs.
+     * @return index at which the match starts, else if match is not found 
+     *         USEARCH_DONE is returned
+     * @see #setMatchLength
+     * @stable ICU 2.0
+     */
+     virtual int32_t handlePrev(int32_t position, UErrorCode &status) 
+                                                                         = 0;
+
+    /**
+     * Sets the length of the currently matched string in the text string to
+     * be searched.
+     * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt>
+     * methods should call this when they find a match in the target text.
+     * @param length length of the matched text.
+     * @see #handleNext
+     * @see #handlePrev
+     * @stable ICU 2.0
+     */
+    virtual void setMatchLength(int32_t length);
+
+    /**
+     * Sets the offset of the currently matched string in the text string to
+     * be searched.
+     * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt>
+     * methods should call this when they find a match in the target text.
+     * @param position start offset of the matched text.
+     * @see #handleNext
+     * @see #handlePrev
+     * @stable ICU 2.0
+     */
+    virtual void setMatchStart(int32_t position);
+
+    /**
+    * sets match not found 
+    * @stable ICU 2.0
+    */
+    void setMatchNotFound();
+};
+
+inline UBool SearchIterator::operator!=(const SearchIterator &that) const
+{
+   return !operator==(that); 
+}
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif
+

Deleted: MacRuby/trunk/icu-1060/unicode/simpletz.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/simpletz.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/simpletz.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,927 +0,0 @@
-/*
- ********************************************************************************
- * Copyright (C) 1997-2008, International Business Machines                     *
- * Corporation and others. All Rights Reserved.                                 *
- ********************************************************************************
- *
- * File SIMPLETZ.H
- *
- * Modification History:
- *
- *   Date        Name        Description
- *   04/21/97    aliu        Overhauled header.
- *   08/10/98    stephen     JDK 1.2 sync
- *                           Added setStartRule() / setEndRule() overloads
- *                           Added hasSameRules()
- *   09/02/98    stephen     Added getOffset(monthLen)
- *                           Changed getOffset() to take UErrorCode
- *   07/09/99    stephen     Removed millisPerHour (unused, for HP compiler)
- *   12/02/99    aliu        Added TimeMode and constructor and setStart/EndRule
- *                           methods that take TimeMode. Added to docs.
- ********************************************************************************
- */
-
-#ifndef SIMPLETZ_H
-#define SIMPLETZ_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: SimpleTimeZone is a concrete subclass of TimeZone.
- */
- 
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/basictz.h"
-
-U_NAMESPACE_BEGIN
-
-// forward declaration
-class InitialTimeZoneRule;
-class TimeZoneTransition;
-class AnnualTimeZoneRule;
-
-/**
- * <code>SimpleTimeZone</code> is a concrete subclass of <code>TimeZone</code>
- * that represents a time zone for use with a Gregorian calendar. This
- * class does not handle historical changes.
- * <P>
- * When specifying daylight-savings-time begin and end dates, use a negative value for
- * <code>dayOfWeekInMonth</code> to indicate that <code>SimpleTimeZone</code> should
- * count from the end of the month backwards. For example, in the U.S., Daylight Savings
- * Time ends at the last (dayOfWeekInMonth = -1) Sunday in October, at 2 AM in standard
- * time.
- *
- * @see      Calendar
- * @see      GregorianCalendar
- * @see      TimeZone
- * @author   D. Goldsmith, Mark Davis, Chen-Lieh Huang, Alan Liu
- */
-class U_I18N_API SimpleTimeZone: public BasicTimeZone {
-public:
-
-    /**
-     * TimeMode is used, together with a millisecond offset after
-     * midnight, to specify a rule transition time.  Most rules
-     * transition at a local wall time, that is, according to the
-     * current time in effect, either standard, or DST.  However, some
-     * rules transition at local standard time, and some at a specific
-     * UTC time.  Although it might seem that all times could be
-     * converted to wall time, thus eliminating the need for this
-     * parameter, this is not the case.
-     * @stable ICU 2.0
-     */
-    enum TimeMode {
-        WALL_TIME = 0,
-        STANDARD_TIME,
-        UTC_TIME
-    };
-
-    /**
-     * Copy constructor
-     * @param source the object to be copied.
-     * @stable ICU 2.0
-     */
-    SimpleTimeZone(const SimpleTimeZone& source);
-
-    /**
-     * Default assignment operator
-     * @param right    the object to be copied.
-     * @stable ICU 2.0
-     */
-    SimpleTimeZone& operator=(const SimpleTimeZone& right);
-
-    /**
-     * Destructor
-     * @stable ICU 2.0
-     */
-    virtual ~SimpleTimeZone();
-
-    /**
-     * Returns true if the two TimeZone objects are equal; that is, they have
-     * the same ID, raw GMT offset, and DST rules.
-     *
-     * @param that  The SimpleTimeZone object to be compared with.
-     * @return      True if the given time zone is equal to this time zone; false
-     *              otherwise.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const TimeZone& that) const;
-
-    /**
-     * Constructs a SimpleTimeZone with the given raw GMT offset and time zone ID,
-     * and which doesn't observe daylight savings time.  Normally you should use
-     * TimeZone::createInstance() to create a TimeZone instead of creating a
-     * SimpleTimeZone directly with this constructor.
-     *
-     * @param rawOffsetGMT  The given base time zone offset to GMT.
-     * @param ID         The timezone ID which is obtained from
-     *                   TimeZone.getAvailableIDs.
-     * @stable ICU 2.0
-     */
-    SimpleTimeZone(int32_t rawOffsetGMT, const UnicodeString& ID);
-
-    /**
-     * Construct a SimpleTimeZone with the given raw GMT offset, time zone ID,
-     * and times to start and end daylight savings time. To create a TimeZone that
-     * doesn't observe daylight savings time, don't use this constructor; use
-     * SimpleTimeZone(rawOffset, ID) instead. Normally, you should use
-     * TimeZone.createInstance() to create a TimeZone instead of creating a
-     * SimpleTimeZone directly with this constructor.
-     * <P>
-     * Various types of daylight-savings time rules can be specfied by using different
-     * values for startDay and startDayOfWeek and endDay and endDayOfWeek.  For a
-     * complete explanation of how these parameters work, see the documentation for
-     * setStartRule().
-     *
-     * @param rawOffsetGMT      The new SimpleTimeZone's raw GMT offset
-     * @param ID                The new SimpleTimeZone's time zone ID.
-     * @param savingsStartMonth The daylight savings starting month. Month is
-     *                          0-based. eg, 0 for January.
-     * @param savingsStartDayOfWeekInMonth   The daylight savings starting
-     *                          day-of-week-in-month. See setStartRule() for a
-     *                          complete explanation.
-     * @param savingsStartDayOfWeek The daylight savings starting day-of-week.
-     *                          See setStartRule() for a complete explanation.
-     * @param savingsStartTime  The daylight savings starting time, expressed as the
-     *                          number of milliseconds after midnight.
-     * @param savingsEndMonth   The daylight savings ending month. Month is
-     *                          0-based. eg, 0 for January.
-     * @param savingsEndDayOfWeekInMonth     The daylight savings ending day-of-week-in-month.
-     *                          See setStartRule() for a complete explanation.
-     * @param savingsEndDayOfWeek The daylight savings ending day-of-week.
-     *                          See setStartRule() for a complete explanation.
-     * @param savingsEndTime    The daylight savings ending time, expressed as the
-     *                          number of milliseconds after midnight.
-     * @param status            An UErrorCode to receive the status.
-     * @stable ICU 2.0
-     */
-    SimpleTimeZone(int32_t rawOffsetGMT, const UnicodeString& ID,
-        int8_t savingsStartMonth, int8_t savingsStartDayOfWeekInMonth,
-        int8_t savingsStartDayOfWeek, int32_t savingsStartTime,
-        int8_t savingsEndMonth, int8_t savingsEndDayOfWeekInMonth,
-        int8_t savingsEndDayOfWeek, int32_t savingsEndTime,
-        UErrorCode& status);
-    /**
-     * Construct a SimpleTimeZone with the given raw GMT offset, time zone ID,
-     * and times to start and end daylight savings time. To create a TimeZone that
-     * doesn't observe daylight savings time, don't use this constructor; use
-     * SimpleTimeZone(rawOffset, ID) instead. Normally, you should use
-     * TimeZone.createInstance() to create a TimeZone instead of creating a
-     * SimpleTimeZone directly with this constructor.
-     * <P>
-     * Various types of daylight-savings time rules can be specfied by using different
-     * values for startDay and startDayOfWeek and endDay and endDayOfWeek.  For a
-     * complete explanation of how these parameters work, see the documentation for
-     * setStartRule().
-     *
-     * @param rawOffsetGMT      The new SimpleTimeZone's raw GMT offset
-     * @param ID                The new SimpleTimeZone's time zone ID.
-     * @param savingsStartMonth The daylight savings starting month. Month is
-     *                          0-based. eg, 0 for January.
-     * @param savingsStartDayOfWeekInMonth   The daylight savings starting
-     *                          day-of-week-in-month. See setStartRule() for a
-     *                          complete explanation.
-     * @param savingsStartDayOfWeek The daylight savings starting day-of-week.
-     *                          See setStartRule() for a complete explanation.
-     * @param savingsStartTime  The daylight savings starting time, expressed as the
-     *                          number of milliseconds after midnight.
-     * @param savingsEndMonth   The daylight savings ending month. Month is
-     *                          0-based. eg, 0 for January.
-     * @param savingsEndDayOfWeekInMonth     The daylight savings ending day-of-week-in-month.
-     *                          See setStartRule() for a complete explanation.
-     * @param savingsEndDayOfWeek The daylight savings ending day-of-week.
-     *                          See setStartRule() for a complete explanation.
-     * @param savingsEndTime    The daylight savings ending time, expressed as the
-     *                          number of milliseconds after midnight.
-     * @param savingsDST        The number of milliseconds added to standard time
-     *                          to get DST time. Default is one hour.
-     * @param status            An UErrorCode to receive the status.
-     * @stable ICU 2.0
-     */
-    SimpleTimeZone(int32_t rawOffsetGMT, const UnicodeString& ID,
-        int8_t savingsStartMonth, int8_t savingsStartDayOfWeekInMonth,
-        int8_t savingsStartDayOfWeek, int32_t savingsStartTime,
-        int8_t savingsEndMonth, int8_t savingsEndDayOfWeekInMonth,
-        int8_t savingsEndDayOfWeek, int32_t savingsEndTime,
-        int32_t savingsDST, UErrorCode& status);
-
-    /**
-     * Construct a SimpleTimeZone with the given raw GMT offset, time zone ID,
-     * and times to start and end daylight savings time. To create a TimeZone that
-     * doesn't observe daylight savings time, don't use this constructor; use
-     * SimpleTimeZone(rawOffset, ID) instead. Normally, you should use
-     * TimeZone.createInstance() to create a TimeZone instead of creating a
-     * SimpleTimeZone directly with this constructor.
-     * <P>
-     * Various types of daylight-savings time rules can be specfied by using different
-     * values for startDay and startDayOfWeek and endDay and endDayOfWeek.  For a
-     * complete explanation of how these parameters work, see the documentation for
-     * setStartRule().
-     *
-     * @param rawOffsetGMT      The new SimpleTimeZone's raw GMT offset
-     * @param ID                The new SimpleTimeZone's time zone ID.
-     * @param savingsStartMonth The daylight savings starting month. Month is
-     *                          0-based. eg, 0 for January.
-     * @param savingsStartDayOfWeekInMonth   The daylight savings starting
-     *                          day-of-week-in-month. See setStartRule() for a
-     *                          complete explanation.
-     * @param savingsStartDayOfWeek The daylight savings starting day-of-week.
-     *                          See setStartRule() for a complete explanation.
-     * @param savingsStartTime  The daylight savings starting time, expressed as the
-     *                          number of milliseconds after midnight.
-     * @param savingsStartTimeMode Whether the start time is local wall time, local
-     *                          standard time, or UTC time. Default is local wall time.
-     * @param savingsEndMonth   The daylight savings ending month. Month is
-     *                          0-based. eg, 0 for January.
-     * @param savingsEndDayOfWeekInMonth     The daylight savings ending day-of-week-in-month.
-     *                          See setStartRule() for a complete explanation.
-     * @param savingsEndDayOfWeek The daylight savings ending day-of-week.
-     *                          See setStartRule() for a complete explanation.
-     * @param savingsEndTime    The daylight savings ending time, expressed as the
-     *                          number of milliseconds after midnight.
-     * @param savingsEndTimeMode Whether the end time is local wall time, local
-     *                          standard time, or UTC time. Default is local wall time.
-     * @param savingsDST        The number of milliseconds added to standard time
-     *                          to get DST time. Default is one hour.
-     * @param status            An UErrorCode to receive the status.
-     * @stable ICU 2.0
-     */
-    SimpleTimeZone(int32_t rawOffsetGMT, const UnicodeString& ID,
-        int8_t savingsStartMonth, int8_t savingsStartDayOfWeekInMonth,
-        int8_t savingsStartDayOfWeek, int32_t savingsStartTime,
-        TimeMode savingsStartTimeMode,
-        int8_t savingsEndMonth, int8_t savingsEndDayOfWeekInMonth,
-        int8_t savingsEndDayOfWeek, int32_t savingsEndTime, TimeMode savingsEndTimeMode,
-        int32_t savingsDST, UErrorCode& status);
-
-    /**
-     * Sets the daylight savings starting year, that is, the year this time zone began
-     * observing its specified daylight savings time rules.  The time zone is considered
-     * not to observe daylight savings time prior to that year; SimpleTimeZone doesn't
-     * support historical daylight-savings-time rules.
-     * @param year the daylight savings starting year.
-     * @stable ICU 2.0
-     */
-    void setStartYear(int32_t year);
-
-    /**
-     * Sets the daylight savings starting rule. For example, in the U.S., Daylight Savings
-     * Time starts at the first Sunday in April, at 2 AM in standard time.
-     * Therefore, you can set the start rule by calling:
-     * setStartRule(TimeFields.APRIL, 1, TimeFields.SUNDAY, 2*60*60*1000);
-     * The dayOfWeekInMonth and dayOfWeek parameters together specify how to calculate
-     * the exact starting date.  Their exact meaning depend on their respective signs,
-     * allowing various types of rules to be constructed, as follows:
-     * <ul>
-     *   <li>If both dayOfWeekInMonth and dayOfWeek are positive, they specify the
-     *       day of week in the month (e.g., (2, WEDNESDAY) is the second Wednesday
-     *       of the month).</li>
-     *   <li>If dayOfWeek is positive and dayOfWeekInMonth is negative, they specify
-     *       the day of week in the month counting backward from the end of the month.
-     *       (e.g., (-1, MONDAY) is the last Monday in the month)</li>
-     *   <li>If dayOfWeek is zero and dayOfWeekInMonth is positive, dayOfWeekInMonth
-     *       specifies the day of the month, regardless of what day of the week it is.
-     *       (e.g., (10, 0) is the tenth day of the month)</li>
-     *   <li>If dayOfWeek is zero and dayOfWeekInMonth is negative, dayOfWeekInMonth
-     *       specifies the day of the month counting backward from the end of the
-     *       month, regardless of what day of the week it is (e.g., (-2, 0) is the
-     *       next-to-last day of the month).</li>
-     *   <li>If dayOfWeek is negative and dayOfWeekInMonth is positive, they specify the
-     *       first specified day of the week on or after the specfied day of the month.
-     *       (e.g., (15, -SUNDAY) is the first Sunday after the 15th of the month
-     *       [or the 15th itself if the 15th is a Sunday].)</li>
-     *   <li>If dayOfWeek and DayOfWeekInMonth are both negative, they specify the
-     *       last specified day of the week on or before the specified day of the month.
-     *       (e.g., (-20, -TUESDAY) is the last Tuesday before the 20th of the month
-     *       [or the 20th itself if the 20th is a Tuesday].)</li>
-     * </ul>
-     * @param month the daylight savings starting month. Month is 0-based.
-     * eg, 0 for January.
-     * @param dayOfWeekInMonth the daylight savings starting
-     * day-of-week-in-month. Please see the member description for an example.
-     * @param dayOfWeek the daylight savings starting day-of-week. Please see
-     * the member description for an example.
-     * @param time the daylight savings starting time. Please see the member
-     * description for an example.
-     * @param status An UErrorCode
-     * @stable ICU 2.0
-     */
-    void setStartRule(int32_t month, int32_t dayOfWeekInMonth, int32_t dayOfWeek,
-                      int32_t time, UErrorCode& status);
-    /**
-     * Sets the daylight savings starting rule. For example, in the U.S., Daylight Savings
-     * Time starts at the first Sunday in April, at 2 AM in standard time.
-     * Therefore, you can set the start rule by calling:
-     * setStartRule(TimeFields.APRIL, 1, TimeFields.SUNDAY, 2*60*60*1000);
-     * The dayOfWeekInMonth and dayOfWeek parameters together specify how to calculate
-     * the exact starting date.  Their exact meaning depend on their respective signs,
-     * allowing various types of rules to be constructed, as follows:
-     * <ul>
-     *   <li>If both dayOfWeekInMonth and dayOfWeek are positive, they specify the
-     *       day of week in the month (e.g., (2, WEDNESDAY) is the second Wednesday
-     *       of the month).</li>
-     *   <li>If dayOfWeek is positive and dayOfWeekInMonth is negative, they specify
-     *       the day of week in the month counting backward from the end of the month.
-     *       (e.g., (-1, MONDAY) is the last Monday in the month)</li>
-     *   <li>If dayOfWeek is zero and dayOfWeekInMonth is positive, dayOfWeekInMonth
-     *       specifies the day of the month, regardless of what day of the week it is.
-     *       (e.g., (10, 0) is the tenth day of the month)</li>
-     *   <li>If dayOfWeek is zero and dayOfWeekInMonth is negative, dayOfWeekInMonth
-     *       specifies the day of the month counting backward from the end of the
-     *       month, regardless of what day of the week it is (e.g., (-2, 0) is the
-     *       next-to-last day of the month).</li>
-     *   <li>If dayOfWeek is negative and dayOfWeekInMonth is positive, they specify the
-     *       first specified day of the week on or after the specfied day of the month.
-     *       (e.g., (15, -SUNDAY) is the first Sunday after the 15th of the month
-     *       [or the 15th itself if the 15th is a Sunday].)</li>
-     *   <li>If dayOfWeek and DayOfWeekInMonth are both negative, they specify the
-     *       last specified day of the week on or before the specified day of the month.
-     *       (e.g., (-20, -TUESDAY) is the last Tuesday before the 20th of the month
-     *       [or the 20th itself if the 20th is a Tuesday].)</li>
-     * </ul>
-     * @param month the daylight savings starting month. Month is 0-based.
-     * eg, 0 for January.
-     * @param dayOfWeekInMonth the daylight savings starting
-     * day-of-week-in-month. Please see the member description for an example.
-     * @param dayOfWeek the daylight savings starting day-of-week. Please see
-     * the member description for an example.
-     * @param time the daylight savings starting time. Please see the member
-     * description for an example.
-     * @param mode whether the time is local wall time, local standard time,
-     * or UTC time. Default is local wall time.
-     * @param status An UErrorCode
-     * @stable ICU 2.0
-     */
-    void setStartRule(int32_t month, int32_t dayOfWeekInMonth, int32_t dayOfWeek,
-                      int32_t time, TimeMode mode, UErrorCode& status);
-
-    /**
-     * Sets the DST start rule to a fixed date within a month.
-     *
-     * @param month         The month in which this rule occurs (0-based).
-     * @param dayOfMonth    The date in that month (1-based).
-     * @param time          The time of that day (number of millis after midnight)
-     *                      when DST takes effect in local wall time, which is
-     *                      standard time in this case.
-     * @param status An UErrorCode
-     * @stable ICU 2.0
-     */
-    void setStartRule(int32_t month, int32_t dayOfMonth, int32_t time,
-                      UErrorCode& status);
-    /**
-     * Sets the DST start rule to a fixed date within a month.
-     *
-     * @param month         The month in which this rule occurs (0-based).
-     * @param dayOfMonth    The date in that month (1-based).
-     * @param time          The time of that day (number of millis after midnight)
-     *                      when DST takes effect in local wall time, which is
-     *                      standard time in this case.
-     * @param mode whether the time is local wall time, local standard time,
-     * or UTC time. Default is local wall time.
-     * @param status An UErrorCode
-     * @stable ICU 2.0
-     */
-    void setStartRule(int32_t month, int32_t dayOfMonth, int32_t time,
-                      TimeMode mode, UErrorCode& status);
-
-    /**
-     * Sets the DST start rule to a weekday before or after a give date within
-     * a month, e.g., the first Monday on or after the 8th.
-     *
-     * @param month         The month in which this rule occurs (0-based).
-     * @param dayOfMonth    A date within that month (1-based).
-     * @param dayOfWeek     The day of the week on which this rule occurs.
-     * @param time          The time of that day (number of millis after midnight)
-     *                      when DST takes effect in local wall time, which is
-     *                      standard time in this case.
-     * @param after         If true, this rule selects the first dayOfWeek on
-     *                      or after dayOfMonth.  If false, this rule selects
-     *                      the last dayOfWeek on or before dayOfMonth.
-     * @param status An UErrorCode
-     * @stable ICU 2.0
-     */
-    void setStartRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
-                      int32_t time, UBool after, UErrorCode& status);
-    /**
-     * Sets the DST start rule to a weekday before or after a give date within
-     * a month, e.g., the first Monday on or after the 8th.
-     *
-     * @param month         The month in which this rule occurs (0-based).
-     * @param dayOfMonth    A date within that month (1-based).
-     * @param dayOfWeek     The day of the week on which this rule occurs.
-     * @param time          The time of that day (number of millis after midnight)
-     *                      when DST takes effect in local wall time, which is
-     *                      standard time in this case.
-     * @param mode whether the time is local wall time, local standard time,
-     * or UTC time. Default is local wall time.
-     * @param after         If true, this rule selects the first dayOfWeek on
-     *                      or after dayOfMonth.  If false, this rule selects
-     *                      the last dayOfWeek on or before dayOfMonth.
-     * @param status An UErrorCode
-     * @stable ICU 2.0
-     */
-    void setStartRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
-                      int32_t time, TimeMode mode, UBool after, UErrorCode& status);
-
-    /**
-     * Sets the daylight savings ending rule. For example, in the U.S., Daylight
-     * Savings Time ends at the last (-1) Sunday in October, at 2 AM in standard time.
-     * Therefore, you can set the end rule by calling:
-     * <pre>
-     * .   setEndRule(TimeFields.OCTOBER, -1, TimeFields.SUNDAY, 2*60*60*1000);
-     * </pre>
-     * Various other types of rules can be specified by manipulating the dayOfWeek
-     * and dayOfWeekInMonth parameters.  For complete details, see the documentation
-     * for setStartRule().
-     *
-     * @param month the daylight savings ending month. Month is 0-based.
-     * eg, 0 for January.
-     * @param dayOfWeekInMonth the daylight savings ending
-     * day-of-week-in-month. See setStartRule() for a complete explanation.
-     * @param dayOfWeek the daylight savings ending day-of-week. See setStartRule()
-     * for a complete explanation.
-     * @param time the daylight savings ending time. Please see the member
-     * description for an example.
-     * @param status An UErrorCode
-     * @stable ICU 2.0
-     */
-    void setEndRule(int32_t month, int32_t dayOfWeekInMonth, int32_t dayOfWeek,
-                    int32_t time, UErrorCode& status);
-
-    /**
-     * Sets the daylight savings ending rule. For example, in the U.S., Daylight
-     * Savings Time ends at the last (-1) Sunday in October, at 2 AM in standard time.
-     * Therefore, you can set the end rule by calling:
-     * <pre>
-     * .   setEndRule(TimeFields.OCTOBER, -1, TimeFields.SUNDAY, 2*60*60*1000);
-     * </pre>
-     * Various other types of rules can be specified by manipulating the dayOfWeek
-     * and dayOfWeekInMonth parameters.  For complete details, see the documentation
-     * for setStartRule().
-     *
-     * @param month the daylight savings ending month. Month is 0-based.
-     * eg, 0 for January.
-     * @param dayOfWeekInMonth the daylight savings ending
-     * day-of-week-in-month. See setStartRule() for a complete explanation.
-     * @param dayOfWeek the daylight savings ending day-of-week. See setStartRule()
-     * for a complete explanation.
-     * @param time the daylight savings ending time. Please see the member
-     * description for an example.
-     * @param mode whether the time is local wall time, local standard time,
-     * or UTC time. Default is local wall time.
-     * @param status An UErrorCode
-     * @stable ICU 2.0
-     */
-    void setEndRule(int32_t month, int32_t dayOfWeekInMonth, int32_t dayOfWeek,
-                    int32_t time, TimeMode mode, UErrorCode& status);
-
-    /**
-     * Sets the DST end rule to a fixed date within a month.
-     *
-     * @param month         The month in which this rule occurs (0-based).
-     * @param dayOfMonth    The date in that month (1-based).
-     * @param time          The time of that day (number of millis after midnight)
-     *                      when DST ends in local wall time, which is daylight
-     *                      time in this case.
-     * @param status An UErrorCode
-     * @stable ICU 2.0
-     */
-    void setEndRule(int32_t month, int32_t dayOfMonth, int32_t time, UErrorCode& status);
-
-    /**
-     * Sets the DST end rule to a fixed date within a month.
-     *
-     * @param month         The month in which this rule occurs (0-based).
-     * @param dayOfMonth    The date in that month (1-based).
-     * @param time          The time of that day (number of millis after midnight)
-     *                      when DST ends in local wall time, which is daylight
-     *                      time in this case.
-     * @param mode whether the time is local wall time, local standard time,
-     * or UTC time. Default is local wall time.
-     * @param status An UErrorCode
-     * @stable ICU 2.0
-     */
-    void setEndRule(int32_t month, int32_t dayOfMonth, int32_t time,
-                    TimeMode mode, UErrorCode& status);
-
-    /**
-     * Sets the DST end rule to a weekday before or after a give date within
-     * a month, e.g., the first Monday on or after the 8th.
-     *
-     * @param month         The month in which this rule occurs (0-based).
-     * @param dayOfMonth    A date within that month (1-based).
-     * @param dayOfWeek     The day of the week on which this rule occurs.
-     * @param time          The time of that day (number of millis after midnight)
-     *                      when DST ends in local wall time, which is daylight
-     *                      time in this case.
-     * @param after         If true, this rule selects the first dayOfWeek on
-     *                      or after dayOfMonth.  If false, this rule selects
-     *                      the last dayOfWeek on or before dayOfMonth.
-     * @param status An UErrorCode
-     * @stable ICU 2.0
-     */
-    void setEndRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
-                    int32_t time, UBool after, UErrorCode& status);
-
-    /**
-     * Sets the DST end rule to a weekday before or after a give date within
-     * a month, e.g., the first Monday on or after the 8th.
-     *
-     * @param month         The month in which this rule occurs (0-based).
-     * @param dayOfMonth    A date within that month (1-based).
-     * @param dayOfWeek     The day of the week on which this rule occurs.
-     * @param time          The time of that day (number of millis after midnight)
-     *                      when DST ends in local wall time, which is daylight
-     *                      time in this case.
-     * @param mode whether the time is local wall time, local standard time,
-     * or UTC time. Default is local wall time.
-     * @param after         If true, this rule selects the first dayOfWeek on
-     *                      or after dayOfMonth.  If false, this rule selects
-     *                      the last dayOfWeek on or before dayOfMonth.
-     * @param status An UErrorCode
-     * @stable ICU 2.0
-     */
-    void setEndRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
-                    int32_t time, TimeMode mode, UBool after, UErrorCode& status);
-
-    /**
-     * Returns the TimeZone's adjusted GMT offset (i.e., the number of milliseconds to add
-     * to GMT to get local time in this time zone, taking daylight savings time into
-     * account) as of a particular reference date.  The reference date is used to determine
-     * whether daylight savings time is in effect and needs to be figured into the offset
-     * that is returned (in other words, what is the adjusted GMT offset in this time zone
-     * at this particular date and time?).  For the time zones produced by createTimeZone(),
-     * the reference data is specified according to the Gregorian calendar, and the date
-     * and time fields are in GMT, NOT local time.
-     *
-     * @param era        The reference date's era
-     * @param year       The reference date's year
-     * @param month      The reference date's month (0-based; 0 is January)
-     * @param day        The reference date's day-in-month (1-based)
-     * @param dayOfWeek  The reference date's day-of-week (1-based; 1 is Sunday)
-     * @param millis     The reference date's milliseconds in day, UTT (NOT local time).
-     * @param status     An UErrorCode to receive the status.
-     * @return           The offset in milliseconds to add to GMT to get local time.
-     * @stable ICU 2.0
-     */
-    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
-                              uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const;
-
-    /**
-     * Gets the time zone offset, for current date, modified in case of
-     * daylight savings. This is the offset to add *to* UTC to get local time.
-     * @param era the era of the given date.
-     * @param year the year in the given date.
-     * @param month the month in the given date.
-     * Month is 0-based. e.g., 0 for January.
-     * @param day the day-in-month of the given date.
-     * @param dayOfWeek the day-of-week of the given date.
-     * @param milliseconds the millis in day in <em>standard</em> local time.
-     * @param monthLength the length of the given month in days.
-     * @param status     An UErrorCode to receive the status.
-     * @return the offset to add *to* GMT to get local time.
-     * @stable ICU 2.0
-     */
-    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
-                           uint8_t dayOfWeek, int32_t milliseconds,
-                           int32_t monthLength, UErrorCode& status) const;
-    /**
-     * Gets the time zone offset, for current date, modified in case of
-     * daylight savings. This is the offset to add *to* UTC to get local time.
-     * @param era the era of the given date.
-     * @param year the year in the given date.
-     * @param month the month in the given date.
-     * Month is 0-based. e.g., 0 for January.
-     * @param day the day-in-month of the given date.
-     * @param dayOfWeek the day-of-week of the given date.
-     * @param milliseconds the millis in day in <em>standard</em> local time.
-     * @param monthLength the length of the given month in days.
-     * @param prevMonthLength length of the previous month in days.
-     * @param status     An UErrorCode to receive the status.
-     * @return the offset to add *to* GMT to get local time.
-     * @stable ICU 2.0
-     */
-    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
-                              uint8_t dayOfWeek, int32_t milliseconds,
-                              int32_t monthLength, int32_t prevMonthLength,
-                              UErrorCode& status) const;
-
-    /**
-     * Redeclared TimeZone method.  This implementation simply calls
-     * the base class method, which otherwise would be hidden.
-     * @stable ICU 2.8
-     */
-    virtual void getOffset(UDate date, UBool local, int32_t& rawOffset,
-                           int32_t& dstOffset, UErrorCode& ec) const;
-
-    /**
-     * Get time zone offsets from local wall time.
-     * @internal
-     */
-    virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt,
-        int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) /*const*/;
-
-    /**
-     * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
-     * to GMT to get local time, before taking daylight savings time into account).
-     *
-     * @return   The TimeZone's raw GMT offset.
-     * @stable ICU 2.0
-     */
-    virtual int32_t getRawOffset(void) const;
-
-    /**
-     * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
-     * to GMT to get local time, before taking daylight savings time into account).
-     *
-     * @param offsetMillis  The new raw GMT offset for this time zone.
-     * @stable ICU 2.0
-     */
-    virtual void setRawOffset(int32_t offsetMillis);
-
-    /**
-     * Sets the amount of time in ms that the clock is advanced during DST.
-     * @param millisSavedDuringDST the number of milliseconds the time is
-     * advanced with respect to standard time when the daylight savings rules
-     * are in effect. A positive number, typically one hour (3600000).
-     * @param status  An UErrorCode to receive the status.
-     * @stable ICU 2.0
-     */
-    void setDSTSavings(int32_t millisSavedDuringDST, UErrorCode& status);
-
-    /**
-     * Returns the amount of time in ms that the clock is advanced during DST.
-     * @return the number of milliseconds the time is
-     * advanced with respect to standard time when the daylight savings rules
-     * are in effect. A positive number, typically one hour (3600000).
-     * @stable ICU 2.0
-     */
-    virtual int32_t getDSTSavings(void) const;
-
-    /**
-     * Queries if this TimeZone uses Daylight Savings Time.
-     *
-     * @return   True if this TimeZone uses Daylight Savings Time; false otherwise.
-     * @stable ICU 2.0
-     */
-    virtual UBool useDaylightTime(void) const;
-
-    /**
-     * Returns true if the given date is within the period when daylight savings time
-     * is in effect; false otherwise.  If the TimeZone doesn't observe daylight savings
-     * time, this functions always returns false.
-     * This method is wasteful since it creates a new GregorianCalendar and
-     * deletes it each time it is called. This is a deprecated method
-     * and provided only for Java compatibility.
-     *
-     * @param date The date to test.
-     * @param status  An UErrorCode to receive the status.
-     * @return true if the given date is in Daylight Savings Time;
-     * false otherwise.
-     * @deprecated ICU 2.4. Use Calendar::inDaylightTime() instead.
-     */
-    virtual UBool inDaylightTime(UDate date, UErrorCode& status) const;
-
-    /**
-     * Return true if this zone has the same rules and offset as another zone.
-     * @param other the TimeZone object to be compared with
-     * @return true if the given zone has the same rules and offset as this one
-     * @stable ICU 2.0
-     */
-    UBool hasSameRules(const TimeZone& other) const;
-
-    /**
-     * Clones TimeZone objects polymorphically. Clients are responsible for deleting
-     * the TimeZone object cloned.
-     *
-     * @return   A new copy of this TimeZone object.
-     * @stable ICU 2.0
-     */
-    virtual TimeZone* clone(void) const;
-
-    /**
-     * Gets the first time zone transition after the base time.
-     * @param base      The base time.
-     * @param inclusive Whether the base time is inclusive or not.
-     * @param result    Receives the first transition after the base time.
-     * @return  TRUE if the transition is found.
-     * @stable ICU 4.0
-     */
-    virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/;
-
-    /**
-     * Gets the most recent time zone transition before the base time.
-     * @param base      The base time.
-     * @param inclusive Whether the base time is inclusive or not.
-     * @param result    Receives the most recent transition before the base time.
-     * @return  TRUE if the transition is found.
-     * @stable ICU 4.0
-     */
-    virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/;
-
-    /**
-     * Returns the number of <code>TimeZoneRule</code>s which represents time transitions,
-     * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except
-     * <code>InitialTimeZoneRule</code>.  The return value range is 0 or any positive value.
-     * @param status    Receives error status code.
-     * @return The number of <code>TimeZoneRule</code>s representing time transitions.
-     * @stable ICU 4.0
-     */
-    virtual int32_t countTransitionRules(UErrorCode& status) /*const*/;
-
-    /**
-     * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code>
-     * which represent time transitions for this time zone.  On successful return,
-     * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and
-     * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code>
-     * instances up to the size specified by trscount.  The results are referencing the
-     * rule instance held by this time zone instance.  Therefore, after this time zone
-     * is destructed, they are no longer available.
-     * @param initial       Receives the initial timezone rule
-     * @param trsrules      Receives the timezone transition rules
-     * @param trscount      On input, specify the size of the array 'transitions' receiving
-     *                      the timezone transition rules.  On output, actual number of
-     *                      rules filled in the array will be set.
-     * @param status        Receives error status code.
-     * @stable ICU 4.0
-     */
-    virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial,
-        const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) /*const*/;
-
-
-public:
-
-    /**
-     * Override TimeZone Returns a unique class ID POLYMORPHICALLY. Pure virtual
-     * override. This method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone() methods call
-     * this method.
-     *
-     * @return   The class ID for this object. All objects of a given class have the
-     *           same class ID. Objects of other classes have different class IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-
-    /**
-     * Return the class ID for this class. This is useful only for comparing to a return
-     * value from getDynamicClassID(). For example:
-     * <pre>
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       Derived::getStaticClassID()) ...
-     * </pre>
-     * @return   The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-private:
-    /**
-     * Constants specifying values of startMode and endMode.
-     */
-    enum EMode
-    {
-        DOM_MODE = 1,
-        DOW_IN_MONTH_MODE,
-        DOW_GE_DOM_MODE,
-        DOW_LE_DOM_MODE
-    };
-
-    SimpleTimeZone(); // default constructor not implemented
-
-    /**
-     * Internal construction method.
-     * @param rawOffsetGMT    The new SimpleTimeZone's raw GMT offset
-     * @param startMonth      the month DST starts
-     * @param startDay        the day DST starts
-     * @param startDayOfWeek  the DOW DST starts
-     * @param startTime       the time DST starts
-     * @param startTimeMode   Whether the start time is local wall time, local
-     *                        standard time, or UTC time. Default is local wall time.
-     * @param endMonth        the month DST ends
-     * @param endDay          the day DST ends
-     * @param endDayOfWeek    the DOW DST ends
-     * @param endTime         the time DST ends
-     * @param endTimeMode     Whether the end time is local wall time, local
-     *                        standard time, or UTC time. Default is local wall time.
-     * @param savingsDST      The number of milliseconds added to standard time
-     *                        to get DST time. Default is one hour.
-     * @param status          An UErrorCode to receive the status.
-     */
-    void construct(int32_t rawOffsetGMT,
-                   int8_t startMonth, int8_t startDay, int8_t startDayOfWeek,
-                   int32_t startTime, TimeMode startTimeMode,
-                   int8_t endMonth, int8_t endDay, int8_t endDayOfWeek,
-                   int32_t endTime, TimeMode endTimeMode,
-                   int32_t dstSavings, UErrorCode& status);
-
-    /**
-     * Compare a given date in the year to a rule. Return 1, 0, or -1, depending
-     * on whether the date is after, equal to, or before the rule date. The
-     * millis are compared directly against the ruleMillis, so any
-     * standard-daylight adjustments must be handled by the caller.
-     *
-     * @return  1 if the date is after the rule date, -1 if the date is before
-     *          the rule date, or 0 if the date is equal to the rule date.
-     */
-    static int32_t compareToRule(int8_t month, int8_t monthLen, int8_t prevMonthLen,
-                                 int8_t dayOfMonth,
-                                 int8_t dayOfWeek, int32_t millis, int32_t millisDelta,
-                                 EMode ruleMode, int8_t ruleMonth, int8_t ruleDayOfWeek,
-                                 int8_t ruleDay, int32_t ruleMillis);
-
-    /**
-     * Given a set of encoded rules in startDay and startDayOfMonth, decode
-     * them and set the startMode appropriately.  Do the same for endDay and
-     * endDayOfMonth.
-     * <P>
-     * Upon entry, the day of week variables may be zero or
-     * negative, in order to indicate special modes.  The day of month
-     * variables may also be negative.
-     * <P>
-     * Upon exit, the mode variables will be
-     * set, and the day of week and day of month variables will be positive.
-     * <P>
-     * This method also recognizes a startDay or endDay of zero as indicating
-     * no DST.
-     */
-    void decodeRules(UErrorCode& status);
-    void decodeStartRule(UErrorCode& status);
-    void decodeEndRule(UErrorCode& status);
-
-    int8_t startMonth, startDay, startDayOfWeek;   // the month, day, DOW, and time DST starts
-    int32_t startTime;
-    TimeMode startTimeMode, endTimeMode; // Mode for startTime, endTime; see TimeMode
-    int8_t endMonth, endDay, endDayOfWeek; // the month, day, DOW, and time DST ends
-    int32_t endTime;
-    int32_t startYear;  // the year these DST rules took effect
-    int32_t rawOffset;  // the TimeZone's raw GMT offset
-    UBool useDaylight; // flag indicating whether this TimeZone uses DST
-    static const int8_t STATICMONTHLENGTH[12]; // lengths of the months
-    EMode startMode, endMode;   // flags indicating what kind of rules the DST rules are
-
-    /**
-     * A positive value indicating the amount of time saved during DST in ms.
-     * Typically one hour; sometimes 30 minutes.
-     */
-    int32_t dstSavings;
-
-    /* Private for BasicTimeZone implementation */
-    void initTransitionRules(UErrorCode& status);
-    void clearTransitionRules(void);
-    void deleteTransitionRules(void);
-    UBool   transitionRulesInitialized;
-    InitialTimeZoneRule*    initialRule;
-    TimeZoneTransition*     firstTransition;
-    AnnualTimeZoneRule*     stdRule;
-    AnnualTimeZoneRule*     dstRule;
-};
-
-inline void SimpleTimeZone::setStartRule(int32_t month, int32_t dayOfWeekInMonth,
-                                         int32_t dayOfWeek,
-                                         int32_t time, UErrorCode& status) {
-    setStartRule(month, dayOfWeekInMonth, dayOfWeek, time, WALL_TIME, status);
-}
-
-inline void SimpleTimeZone::setStartRule(int32_t month, int32_t dayOfMonth,
-                                         int32_t time,
-                                         UErrorCode& status) {
-    setStartRule(month, dayOfMonth, time, WALL_TIME, status);
-}
-
-inline void SimpleTimeZone::setStartRule(int32_t month, int32_t dayOfMonth,
-                                         int32_t dayOfWeek,
-                                         int32_t time, UBool after, UErrorCode& status) {
-    setStartRule(month, dayOfMonth, dayOfWeek, time, WALL_TIME, after, status);
-}
-
-inline void SimpleTimeZone::setEndRule(int32_t month, int32_t dayOfWeekInMonth,
-                                       int32_t dayOfWeek,
-                                       int32_t time, UErrorCode& status) {
-    setEndRule(month, dayOfWeekInMonth, dayOfWeek, time, WALL_TIME, status);
-}
-
-inline void SimpleTimeZone::setEndRule(int32_t month, int32_t dayOfMonth,
-                                       int32_t time, UErrorCode& status) {
-    setEndRule(month, dayOfMonth, time, WALL_TIME, status);
-}
-
-inline void SimpleTimeZone::setEndRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
-                                       int32_t time, UBool after, UErrorCode& status) {
-    setEndRule(month, dayOfMonth, dayOfWeek, time, WALL_TIME, after, status);
-}
-
-inline void
-SimpleTimeZone::getOffset(UDate date, UBool local, int32_t& rawOffsetRef,
-                          int32_t& dstOffsetRef, UErrorCode& ec) const {
-    TimeZone::getOffset(date, local, rawOffsetRef, dstOffsetRef, ec);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _SIMPLETZ

Copied: MacRuby/trunk/icu-1060/unicode/simpletz.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/simpletz.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/simpletz.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/simpletz.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,927 @@
+/*
+ ********************************************************************************
+ * Copyright (C) 1997-2008, International Business Machines                     *
+ * Corporation and others. All Rights Reserved.                                 *
+ ********************************************************************************
+ *
+ * File SIMPLETZ.H
+ *
+ * Modification History:
+ *
+ *   Date        Name        Description
+ *   04/21/97    aliu        Overhauled header.
+ *   08/10/98    stephen     JDK 1.2 sync
+ *                           Added setStartRule() / setEndRule() overloads
+ *                           Added hasSameRules()
+ *   09/02/98    stephen     Added getOffset(monthLen)
+ *                           Changed getOffset() to take UErrorCode
+ *   07/09/99    stephen     Removed millisPerHour (unused, for HP compiler)
+ *   12/02/99    aliu        Added TimeMode and constructor and setStart/EndRule
+ *                           methods that take TimeMode. Added to docs.
+ ********************************************************************************
+ */
+
+#ifndef SIMPLETZ_H
+#define SIMPLETZ_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: SimpleTimeZone is a concrete subclass of TimeZone.
+ */
+ 
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/basictz.h"
+
+U_NAMESPACE_BEGIN
+
+// forward declaration
+class InitialTimeZoneRule;
+class TimeZoneTransition;
+class AnnualTimeZoneRule;
+
+/**
+ * <code>SimpleTimeZone</code> is a concrete subclass of <code>TimeZone</code>
+ * that represents a time zone for use with a Gregorian calendar. This
+ * class does not handle historical changes.
+ * <P>
+ * When specifying daylight-savings-time begin and end dates, use a negative value for
+ * <code>dayOfWeekInMonth</code> to indicate that <code>SimpleTimeZone</code> should
+ * count from the end of the month backwards. For example, in the U.S., Daylight Savings
+ * Time ends at the last (dayOfWeekInMonth = -1) Sunday in October, at 2 AM in standard
+ * time.
+ *
+ * @see      Calendar
+ * @see      GregorianCalendar
+ * @see      TimeZone
+ * @author   D. Goldsmith, Mark Davis, Chen-Lieh Huang, Alan Liu
+ */
+class U_I18N_API SimpleTimeZone: public BasicTimeZone {
+public:
+
+    /**
+     * TimeMode is used, together with a millisecond offset after
+     * midnight, to specify a rule transition time.  Most rules
+     * transition at a local wall time, that is, according to the
+     * current time in effect, either standard, or DST.  However, some
+     * rules transition at local standard time, and some at a specific
+     * UTC time.  Although it might seem that all times could be
+     * converted to wall time, thus eliminating the need for this
+     * parameter, this is not the case.
+     * @stable ICU 2.0
+     */
+    enum TimeMode {
+        WALL_TIME = 0,
+        STANDARD_TIME,
+        UTC_TIME
+    };
+
+    /**
+     * Copy constructor
+     * @param source the object to be copied.
+     * @stable ICU 2.0
+     */
+    SimpleTimeZone(const SimpleTimeZone& source);
+
+    /**
+     * Default assignment operator
+     * @param right    the object to be copied.
+     * @stable ICU 2.0
+     */
+    SimpleTimeZone& operator=(const SimpleTimeZone& right);
+
+    /**
+     * Destructor
+     * @stable ICU 2.0
+     */
+    virtual ~SimpleTimeZone();
+
+    /**
+     * Returns true if the two TimeZone objects are equal; that is, they have
+     * the same ID, raw GMT offset, and DST rules.
+     *
+     * @param that  The SimpleTimeZone object to be compared with.
+     * @return      True if the given time zone is equal to this time zone; false
+     *              otherwise.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const TimeZone& that) const;
+
+    /**
+     * Constructs a SimpleTimeZone with the given raw GMT offset and time zone ID,
+     * and which doesn't observe daylight savings time.  Normally you should use
+     * TimeZone::createInstance() to create a TimeZone instead of creating a
+     * SimpleTimeZone directly with this constructor.
+     *
+     * @param rawOffsetGMT  The given base time zone offset to GMT.
+     * @param ID         The timezone ID which is obtained from
+     *                   TimeZone.getAvailableIDs.
+     * @stable ICU 2.0
+     */
+    SimpleTimeZone(int32_t rawOffsetGMT, const UnicodeString& ID);
+
+    /**
+     * Construct a SimpleTimeZone with the given raw GMT offset, time zone ID,
+     * and times to start and end daylight savings time. To create a TimeZone that
+     * doesn't observe daylight savings time, don't use this constructor; use
+     * SimpleTimeZone(rawOffset, ID) instead. Normally, you should use
+     * TimeZone.createInstance() to create a TimeZone instead of creating a
+     * SimpleTimeZone directly with this constructor.
+     * <P>
+     * Various types of daylight-savings time rules can be specfied by using different
+     * values for startDay and startDayOfWeek and endDay and endDayOfWeek.  For a
+     * complete explanation of how these parameters work, see the documentation for
+     * setStartRule().
+     *
+     * @param rawOffsetGMT      The new SimpleTimeZone's raw GMT offset
+     * @param ID                The new SimpleTimeZone's time zone ID.
+     * @param savingsStartMonth The daylight savings starting month. Month is
+     *                          0-based. eg, 0 for January.
+     * @param savingsStartDayOfWeekInMonth   The daylight savings starting
+     *                          day-of-week-in-month. See setStartRule() for a
+     *                          complete explanation.
+     * @param savingsStartDayOfWeek The daylight savings starting day-of-week.
+     *                          See setStartRule() for a complete explanation.
+     * @param savingsStartTime  The daylight savings starting time, expressed as the
+     *                          number of milliseconds after midnight.
+     * @param savingsEndMonth   The daylight savings ending month. Month is
+     *                          0-based. eg, 0 for January.
+     * @param savingsEndDayOfWeekInMonth     The daylight savings ending day-of-week-in-month.
+     *                          See setStartRule() for a complete explanation.
+     * @param savingsEndDayOfWeek The daylight savings ending day-of-week.
+     *                          See setStartRule() for a complete explanation.
+     * @param savingsEndTime    The daylight savings ending time, expressed as the
+     *                          number of milliseconds after midnight.
+     * @param status            An UErrorCode to receive the status.
+     * @stable ICU 2.0
+     */
+    SimpleTimeZone(int32_t rawOffsetGMT, const UnicodeString& ID,
+        int8_t savingsStartMonth, int8_t savingsStartDayOfWeekInMonth,
+        int8_t savingsStartDayOfWeek, int32_t savingsStartTime,
+        int8_t savingsEndMonth, int8_t savingsEndDayOfWeekInMonth,
+        int8_t savingsEndDayOfWeek, int32_t savingsEndTime,
+        UErrorCode& status);
+    /**
+     * Construct a SimpleTimeZone with the given raw GMT offset, time zone ID,
+     * and times to start and end daylight savings time. To create a TimeZone that
+     * doesn't observe daylight savings time, don't use this constructor; use
+     * SimpleTimeZone(rawOffset, ID) instead. Normally, you should use
+     * TimeZone.createInstance() to create a TimeZone instead of creating a
+     * SimpleTimeZone directly with this constructor.
+     * <P>
+     * Various types of daylight-savings time rules can be specfied by using different
+     * values for startDay and startDayOfWeek and endDay and endDayOfWeek.  For a
+     * complete explanation of how these parameters work, see the documentation for
+     * setStartRule().
+     *
+     * @param rawOffsetGMT      The new SimpleTimeZone's raw GMT offset
+     * @param ID                The new SimpleTimeZone's time zone ID.
+     * @param savingsStartMonth The daylight savings starting month. Month is
+     *                          0-based. eg, 0 for January.
+     * @param savingsStartDayOfWeekInMonth   The daylight savings starting
+     *                          day-of-week-in-month. See setStartRule() for a
+     *                          complete explanation.
+     * @param savingsStartDayOfWeek The daylight savings starting day-of-week.
+     *                          See setStartRule() for a complete explanation.
+     * @param savingsStartTime  The daylight savings starting time, expressed as the
+     *                          number of milliseconds after midnight.
+     * @param savingsEndMonth   The daylight savings ending month. Month is
+     *                          0-based. eg, 0 for January.
+     * @param savingsEndDayOfWeekInMonth     The daylight savings ending day-of-week-in-month.
+     *                          See setStartRule() for a complete explanation.
+     * @param savingsEndDayOfWeek The daylight savings ending day-of-week.
+     *                          See setStartRule() for a complete explanation.
+     * @param savingsEndTime    The daylight savings ending time, expressed as the
+     *                          number of milliseconds after midnight.
+     * @param savingsDST        The number of milliseconds added to standard time
+     *                          to get DST time. Default is one hour.
+     * @param status            An UErrorCode to receive the status.
+     * @stable ICU 2.0
+     */
+    SimpleTimeZone(int32_t rawOffsetGMT, const UnicodeString& ID,
+        int8_t savingsStartMonth, int8_t savingsStartDayOfWeekInMonth,
+        int8_t savingsStartDayOfWeek, int32_t savingsStartTime,
+        int8_t savingsEndMonth, int8_t savingsEndDayOfWeekInMonth,
+        int8_t savingsEndDayOfWeek, int32_t savingsEndTime,
+        int32_t savingsDST, UErrorCode& status);
+
+    /**
+     * Construct a SimpleTimeZone with the given raw GMT offset, time zone ID,
+     * and times to start and end daylight savings time. To create a TimeZone that
+     * doesn't observe daylight savings time, don't use this constructor; use
+     * SimpleTimeZone(rawOffset, ID) instead. Normally, you should use
+     * TimeZone.createInstance() to create a TimeZone instead of creating a
+     * SimpleTimeZone directly with this constructor.
+     * <P>
+     * Various types of daylight-savings time rules can be specfied by using different
+     * values for startDay and startDayOfWeek and endDay and endDayOfWeek.  For a
+     * complete explanation of how these parameters work, see the documentation for
+     * setStartRule().
+     *
+     * @param rawOffsetGMT      The new SimpleTimeZone's raw GMT offset
+     * @param ID                The new SimpleTimeZone's time zone ID.
+     * @param savingsStartMonth The daylight savings starting month. Month is
+     *                          0-based. eg, 0 for January.
+     * @param savingsStartDayOfWeekInMonth   The daylight savings starting
+     *                          day-of-week-in-month. See setStartRule() for a
+     *                          complete explanation.
+     * @param savingsStartDayOfWeek The daylight savings starting day-of-week.
+     *                          See setStartRule() for a complete explanation.
+     * @param savingsStartTime  The daylight savings starting time, expressed as the
+     *                          number of milliseconds after midnight.
+     * @param savingsStartTimeMode Whether the start time is local wall time, local
+     *                          standard time, or UTC time. Default is local wall time.
+     * @param savingsEndMonth   The daylight savings ending month. Month is
+     *                          0-based. eg, 0 for January.
+     * @param savingsEndDayOfWeekInMonth     The daylight savings ending day-of-week-in-month.
+     *                          See setStartRule() for a complete explanation.
+     * @param savingsEndDayOfWeek The daylight savings ending day-of-week.
+     *                          See setStartRule() for a complete explanation.
+     * @param savingsEndTime    The daylight savings ending time, expressed as the
+     *                          number of milliseconds after midnight.
+     * @param savingsEndTimeMode Whether the end time is local wall time, local
+     *                          standard time, or UTC time. Default is local wall time.
+     * @param savingsDST        The number of milliseconds added to standard time
+     *                          to get DST time. Default is one hour.
+     * @param status            An UErrorCode to receive the status.
+     * @stable ICU 2.0
+     */
+    SimpleTimeZone(int32_t rawOffsetGMT, const UnicodeString& ID,
+        int8_t savingsStartMonth, int8_t savingsStartDayOfWeekInMonth,
+        int8_t savingsStartDayOfWeek, int32_t savingsStartTime,
+        TimeMode savingsStartTimeMode,
+        int8_t savingsEndMonth, int8_t savingsEndDayOfWeekInMonth,
+        int8_t savingsEndDayOfWeek, int32_t savingsEndTime, TimeMode savingsEndTimeMode,
+        int32_t savingsDST, UErrorCode& status);
+
+    /**
+     * Sets the daylight savings starting year, that is, the year this time zone began
+     * observing its specified daylight savings time rules.  The time zone is considered
+     * not to observe daylight savings time prior to that year; SimpleTimeZone doesn't
+     * support historical daylight-savings-time rules.
+     * @param year the daylight savings starting year.
+     * @stable ICU 2.0
+     */
+    void setStartYear(int32_t year);
+
+    /**
+     * Sets the daylight savings starting rule. For example, in the U.S., Daylight Savings
+     * Time starts at the first Sunday in April, at 2 AM in standard time.
+     * Therefore, you can set the start rule by calling:
+     * setStartRule(TimeFields.APRIL, 1, TimeFields.SUNDAY, 2*60*60*1000);
+     * The dayOfWeekInMonth and dayOfWeek parameters together specify how to calculate
+     * the exact starting date.  Their exact meaning depend on their respective signs,
+     * allowing various types of rules to be constructed, as follows:
+     * <ul>
+     *   <li>If both dayOfWeekInMonth and dayOfWeek are positive, they specify the
+     *       day of week in the month (e.g., (2, WEDNESDAY) is the second Wednesday
+     *       of the month).</li>
+     *   <li>If dayOfWeek is positive and dayOfWeekInMonth is negative, they specify
+     *       the day of week in the month counting backward from the end of the month.
+     *       (e.g., (-1, MONDAY) is the last Monday in the month)</li>
+     *   <li>If dayOfWeek is zero and dayOfWeekInMonth is positive, dayOfWeekInMonth
+     *       specifies the day of the month, regardless of what day of the week it is.
+     *       (e.g., (10, 0) is the tenth day of the month)</li>
+     *   <li>If dayOfWeek is zero and dayOfWeekInMonth is negative, dayOfWeekInMonth
+     *       specifies the day of the month counting backward from the end of the
+     *       month, regardless of what day of the week it is (e.g., (-2, 0) is the
+     *       next-to-last day of the month).</li>
+     *   <li>If dayOfWeek is negative and dayOfWeekInMonth is positive, they specify the
+     *       first specified day of the week on or after the specfied day of the month.
+     *       (e.g., (15, -SUNDAY) is the first Sunday after the 15th of the month
+     *       [or the 15th itself if the 15th is a Sunday].)</li>
+     *   <li>If dayOfWeek and DayOfWeekInMonth are both negative, they specify the
+     *       last specified day of the week on or before the specified day of the month.
+     *       (e.g., (-20, -TUESDAY) is the last Tuesday before the 20th of the month
+     *       [or the 20th itself if the 20th is a Tuesday].)</li>
+     * </ul>
+     * @param month the daylight savings starting month. Month is 0-based.
+     * eg, 0 for January.
+     * @param dayOfWeekInMonth the daylight savings starting
+     * day-of-week-in-month. Please see the member description for an example.
+     * @param dayOfWeek the daylight savings starting day-of-week. Please see
+     * the member description for an example.
+     * @param time the daylight savings starting time. Please see the member
+     * description for an example.
+     * @param status An UErrorCode
+     * @stable ICU 2.0
+     */
+    void setStartRule(int32_t month, int32_t dayOfWeekInMonth, int32_t dayOfWeek,
+                      int32_t time, UErrorCode& status);
+    /**
+     * Sets the daylight savings starting rule. For example, in the U.S., Daylight Savings
+     * Time starts at the first Sunday in April, at 2 AM in standard time.
+     * Therefore, you can set the start rule by calling:
+     * setStartRule(TimeFields.APRIL, 1, TimeFields.SUNDAY, 2*60*60*1000);
+     * The dayOfWeekInMonth and dayOfWeek parameters together specify how to calculate
+     * the exact starting date.  Their exact meaning depend on their respective signs,
+     * allowing various types of rules to be constructed, as follows:
+     * <ul>
+     *   <li>If both dayOfWeekInMonth and dayOfWeek are positive, they specify the
+     *       day of week in the month (e.g., (2, WEDNESDAY) is the second Wednesday
+     *       of the month).</li>
+     *   <li>If dayOfWeek is positive and dayOfWeekInMonth is negative, they specify
+     *       the day of week in the month counting backward from the end of the month.
+     *       (e.g., (-1, MONDAY) is the last Monday in the month)</li>
+     *   <li>If dayOfWeek is zero and dayOfWeekInMonth is positive, dayOfWeekInMonth
+     *       specifies the day of the month, regardless of what day of the week it is.
+     *       (e.g., (10, 0) is the tenth day of the month)</li>
+     *   <li>If dayOfWeek is zero and dayOfWeekInMonth is negative, dayOfWeekInMonth
+     *       specifies the day of the month counting backward from the end of the
+     *       month, regardless of what day of the week it is (e.g., (-2, 0) is the
+     *       next-to-last day of the month).</li>
+     *   <li>If dayOfWeek is negative and dayOfWeekInMonth is positive, they specify the
+     *       first specified day of the week on or after the specfied day of the month.
+     *       (e.g., (15, -SUNDAY) is the first Sunday after the 15th of the month
+     *       [or the 15th itself if the 15th is a Sunday].)</li>
+     *   <li>If dayOfWeek and DayOfWeekInMonth are both negative, they specify the
+     *       last specified day of the week on or before the specified day of the month.
+     *       (e.g., (-20, -TUESDAY) is the last Tuesday before the 20th of the month
+     *       [or the 20th itself if the 20th is a Tuesday].)</li>
+     * </ul>
+     * @param month the daylight savings starting month. Month is 0-based.
+     * eg, 0 for January.
+     * @param dayOfWeekInMonth the daylight savings starting
+     * day-of-week-in-month. Please see the member description for an example.
+     * @param dayOfWeek the daylight savings starting day-of-week. Please see
+     * the member description for an example.
+     * @param time the daylight savings starting time. Please see the member
+     * description for an example.
+     * @param mode whether the time is local wall time, local standard time,
+     * or UTC time. Default is local wall time.
+     * @param status An UErrorCode
+     * @stable ICU 2.0
+     */
+    void setStartRule(int32_t month, int32_t dayOfWeekInMonth, int32_t dayOfWeek,
+                      int32_t time, TimeMode mode, UErrorCode& status);
+
+    /**
+     * Sets the DST start rule to a fixed date within a month.
+     *
+     * @param month         The month in which this rule occurs (0-based).
+     * @param dayOfMonth    The date in that month (1-based).
+     * @param time          The time of that day (number of millis after midnight)
+     *                      when DST takes effect in local wall time, which is
+     *                      standard time in this case.
+     * @param status An UErrorCode
+     * @stable ICU 2.0
+     */
+    void setStartRule(int32_t month, int32_t dayOfMonth, int32_t time,
+                      UErrorCode& status);
+    /**
+     * Sets the DST start rule to a fixed date within a month.
+     *
+     * @param month         The month in which this rule occurs (0-based).
+     * @param dayOfMonth    The date in that month (1-based).
+     * @param time          The time of that day (number of millis after midnight)
+     *                      when DST takes effect in local wall time, which is
+     *                      standard time in this case.
+     * @param mode whether the time is local wall time, local standard time,
+     * or UTC time. Default is local wall time.
+     * @param status An UErrorCode
+     * @stable ICU 2.0
+     */
+    void setStartRule(int32_t month, int32_t dayOfMonth, int32_t time,
+                      TimeMode mode, UErrorCode& status);
+
+    /**
+     * Sets the DST start rule to a weekday before or after a give date within
+     * a month, e.g., the first Monday on or after the 8th.
+     *
+     * @param month         The month in which this rule occurs (0-based).
+     * @param dayOfMonth    A date within that month (1-based).
+     * @param dayOfWeek     The day of the week on which this rule occurs.
+     * @param time          The time of that day (number of millis after midnight)
+     *                      when DST takes effect in local wall time, which is
+     *                      standard time in this case.
+     * @param after         If true, this rule selects the first dayOfWeek on
+     *                      or after dayOfMonth.  If false, this rule selects
+     *                      the last dayOfWeek on or before dayOfMonth.
+     * @param status An UErrorCode
+     * @stable ICU 2.0
+     */
+    void setStartRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
+                      int32_t time, UBool after, UErrorCode& status);
+    /**
+     * Sets the DST start rule to a weekday before or after a give date within
+     * a month, e.g., the first Monday on or after the 8th.
+     *
+     * @param month         The month in which this rule occurs (0-based).
+     * @param dayOfMonth    A date within that month (1-based).
+     * @param dayOfWeek     The day of the week on which this rule occurs.
+     * @param time          The time of that day (number of millis after midnight)
+     *                      when DST takes effect in local wall time, which is
+     *                      standard time in this case.
+     * @param mode whether the time is local wall time, local standard time,
+     * or UTC time. Default is local wall time.
+     * @param after         If true, this rule selects the first dayOfWeek on
+     *                      or after dayOfMonth.  If false, this rule selects
+     *                      the last dayOfWeek on or before dayOfMonth.
+     * @param status An UErrorCode
+     * @stable ICU 2.0
+     */
+    void setStartRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
+                      int32_t time, TimeMode mode, UBool after, UErrorCode& status);
+
+    /**
+     * Sets the daylight savings ending rule. For example, in the U.S., Daylight
+     * Savings Time ends at the last (-1) Sunday in October, at 2 AM in standard time.
+     * Therefore, you can set the end rule by calling:
+     * <pre>
+     * .   setEndRule(TimeFields.OCTOBER, -1, TimeFields.SUNDAY, 2*60*60*1000);
+     * </pre>
+     * Various other types of rules can be specified by manipulating the dayOfWeek
+     * and dayOfWeekInMonth parameters.  For complete details, see the documentation
+     * for setStartRule().
+     *
+     * @param month the daylight savings ending month. Month is 0-based.
+     * eg, 0 for January.
+     * @param dayOfWeekInMonth the daylight savings ending
+     * day-of-week-in-month. See setStartRule() for a complete explanation.
+     * @param dayOfWeek the daylight savings ending day-of-week. See setStartRule()
+     * for a complete explanation.
+     * @param time the daylight savings ending time. Please see the member
+     * description for an example.
+     * @param status An UErrorCode
+     * @stable ICU 2.0
+     */
+    void setEndRule(int32_t month, int32_t dayOfWeekInMonth, int32_t dayOfWeek,
+                    int32_t time, UErrorCode& status);
+
+    /**
+     * Sets the daylight savings ending rule. For example, in the U.S., Daylight
+     * Savings Time ends at the last (-1) Sunday in October, at 2 AM in standard time.
+     * Therefore, you can set the end rule by calling:
+     * <pre>
+     * .   setEndRule(TimeFields.OCTOBER, -1, TimeFields.SUNDAY, 2*60*60*1000);
+     * </pre>
+     * Various other types of rules can be specified by manipulating the dayOfWeek
+     * and dayOfWeekInMonth parameters.  For complete details, see the documentation
+     * for setStartRule().
+     *
+     * @param month the daylight savings ending month. Month is 0-based.
+     * eg, 0 for January.
+     * @param dayOfWeekInMonth the daylight savings ending
+     * day-of-week-in-month. See setStartRule() for a complete explanation.
+     * @param dayOfWeek the daylight savings ending day-of-week. See setStartRule()
+     * for a complete explanation.
+     * @param time the daylight savings ending time. Please see the member
+     * description for an example.
+     * @param mode whether the time is local wall time, local standard time,
+     * or UTC time. Default is local wall time.
+     * @param status An UErrorCode
+     * @stable ICU 2.0
+     */
+    void setEndRule(int32_t month, int32_t dayOfWeekInMonth, int32_t dayOfWeek,
+                    int32_t time, TimeMode mode, UErrorCode& status);
+
+    /**
+     * Sets the DST end rule to a fixed date within a month.
+     *
+     * @param month         The month in which this rule occurs (0-based).
+     * @param dayOfMonth    The date in that month (1-based).
+     * @param time          The time of that day (number of millis after midnight)
+     *                      when DST ends in local wall time, which is daylight
+     *                      time in this case.
+     * @param status An UErrorCode
+     * @stable ICU 2.0
+     */
+    void setEndRule(int32_t month, int32_t dayOfMonth, int32_t time, UErrorCode& status);
+
+    /**
+     * Sets the DST end rule to a fixed date within a month.
+     *
+     * @param month         The month in which this rule occurs (0-based).
+     * @param dayOfMonth    The date in that month (1-based).
+     * @param time          The time of that day (number of millis after midnight)
+     *                      when DST ends in local wall time, which is daylight
+     *                      time in this case.
+     * @param mode whether the time is local wall time, local standard time,
+     * or UTC time. Default is local wall time.
+     * @param status An UErrorCode
+     * @stable ICU 2.0
+     */
+    void setEndRule(int32_t month, int32_t dayOfMonth, int32_t time,
+                    TimeMode mode, UErrorCode& status);
+
+    /**
+     * Sets the DST end rule to a weekday before or after a give date within
+     * a month, e.g., the first Monday on or after the 8th.
+     *
+     * @param month         The month in which this rule occurs (0-based).
+     * @param dayOfMonth    A date within that month (1-based).
+     * @param dayOfWeek     The day of the week on which this rule occurs.
+     * @param time          The time of that day (number of millis after midnight)
+     *                      when DST ends in local wall time, which is daylight
+     *                      time in this case.
+     * @param after         If true, this rule selects the first dayOfWeek on
+     *                      or after dayOfMonth.  If false, this rule selects
+     *                      the last dayOfWeek on or before dayOfMonth.
+     * @param status An UErrorCode
+     * @stable ICU 2.0
+     */
+    void setEndRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
+                    int32_t time, UBool after, UErrorCode& status);
+
+    /**
+     * Sets the DST end rule to a weekday before or after a give date within
+     * a month, e.g., the first Monday on or after the 8th.
+     *
+     * @param month         The month in which this rule occurs (0-based).
+     * @param dayOfMonth    A date within that month (1-based).
+     * @param dayOfWeek     The day of the week on which this rule occurs.
+     * @param time          The time of that day (number of millis after midnight)
+     *                      when DST ends in local wall time, which is daylight
+     *                      time in this case.
+     * @param mode whether the time is local wall time, local standard time,
+     * or UTC time. Default is local wall time.
+     * @param after         If true, this rule selects the first dayOfWeek on
+     *                      or after dayOfMonth.  If false, this rule selects
+     *                      the last dayOfWeek on or before dayOfMonth.
+     * @param status An UErrorCode
+     * @stable ICU 2.0
+     */
+    void setEndRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
+                    int32_t time, TimeMode mode, UBool after, UErrorCode& status);
+
+    /**
+     * Returns the TimeZone's adjusted GMT offset (i.e., the number of milliseconds to add
+     * to GMT to get local time in this time zone, taking daylight savings time into
+     * account) as of a particular reference date.  The reference date is used to determine
+     * whether daylight savings time is in effect and needs to be figured into the offset
+     * that is returned (in other words, what is the adjusted GMT offset in this time zone
+     * at this particular date and time?).  For the time zones produced by createTimeZone(),
+     * the reference data is specified according to the Gregorian calendar, and the date
+     * and time fields are in GMT, NOT local time.
+     *
+     * @param era        The reference date's era
+     * @param year       The reference date's year
+     * @param month      The reference date's month (0-based; 0 is January)
+     * @param day        The reference date's day-in-month (1-based)
+     * @param dayOfWeek  The reference date's day-of-week (1-based; 1 is Sunday)
+     * @param millis     The reference date's milliseconds in day, UTT (NOT local time).
+     * @param status     An UErrorCode to receive the status.
+     * @return           The offset in milliseconds to add to GMT to get local time.
+     * @stable ICU 2.0
+     */
+    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
+                              uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const;
+
+    /**
+     * Gets the time zone offset, for current date, modified in case of
+     * daylight savings. This is the offset to add *to* UTC to get local time.
+     * @param era the era of the given date.
+     * @param year the year in the given date.
+     * @param month the month in the given date.
+     * Month is 0-based. e.g., 0 for January.
+     * @param day the day-in-month of the given date.
+     * @param dayOfWeek the day-of-week of the given date.
+     * @param milliseconds the millis in day in <em>standard</em> local time.
+     * @param monthLength the length of the given month in days.
+     * @param status     An UErrorCode to receive the status.
+     * @return the offset to add *to* GMT to get local time.
+     * @stable ICU 2.0
+     */
+    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
+                           uint8_t dayOfWeek, int32_t milliseconds,
+                           int32_t monthLength, UErrorCode& status) const;
+    /**
+     * Gets the time zone offset, for current date, modified in case of
+     * daylight savings. This is the offset to add *to* UTC to get local time.
+     * @param era the era of the given date.
+     * @param year the year in the given date.
+     * @param month the month in the given date.
+     * Month is 0-based. e.g., 0 for January.
+     * @param day the day-in-month of the given date.
+     * @param dayOfWeek the day-of-week of the given date.
+     * @param milliseconds the millis in day in <em>standard</em> local time.
+     * @param monthLength the length of the given month in days.
+     * @param prevMonthLength length of the previous month in days.
+     * @param status     An UErrorCode to receive the status.
+     * @return the offset to add *to* GMT to get local time.
+     * @stable ICU 2.0
+     */
+    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
+                              uint8_t dayOfWeek, int32_t milliseconds,
+                              int32_t monthLength, int32_t prevMonthLength,
+                              UErrorCode& status) const;
+
+    /**
+     * Redeclared TimeZone method.  This implementation simply calls
+     * the base class method, which otherwise would be hidden.
+     * @stable ICU 2.8
+     */
+    virtual void getOffset(UDate date, UBool local, int32_t& rawOffset,
+                           int32_t& dstOffset, UErrorCode& ec) const;
+
+    /**
+     * Get time zone offsets from local wall time.
+     * @internal
+     */
+    virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt,
+        int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) /*const*/;
+
+    /**
+     * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
+     * to GMT to get local time, before taking daylight savings time into account).
+     *
+     * @return   The TimeZone's raw GMT offset.
+     * @stable ICU 2.0
+     */
+    virtual int32_t getRawOffset(void) const;
+
+    /**
+     * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
+     * to GMT to get local time, before taking daylight savings time into account).
+     *
+     * @param offsetMillis  The new raw GMT offset for this time zone.
+     * @stable ICU 2.0
+     */
+    virtual void setRawOffset(int32_t offsetMillis);
+
+    /**
+     * Sets the amount of time in ms that the clock is advanced during DST.
+     * @param millisSavedDuringDST the number of milliseconds the time is
+     * advanced with respect to standard time when the daylight savings rules
+     * are in effect. A positive number, typically one hour (3600000).
+     * @param status  An UErrorCode to receive the status.
+     * @stable ICU 2.0
+     */
+    void setDSTSavings(int32_t millisSavedDuringDST, UErrorCode& status);
+
+    /**
+     * Returns the amount of time in ms that the clock is advanced during DST.
+     * @return the number of milliseconds the time is
+     * advanced with respect to standard time when the daylight savings rules
+     * are in effect. A positive number, typically one hour (3600000).
+     * @stable ICU 2.0
+     */
+    virtual int32_t getDSTSavings(void) const;
+
+    /**
+     * Queries if this TimeZone uses Daylight Savings Time.
+     *
+     * @return   True if this TimeZone uses Daylight Savings Time; false otherwise.
+     * @stable ICU 2.0
+     */
+    virtual UBool useDaylightTime(void) const;
+
+    /**
+     * Returns true if the given date is within the period when daylight savings time
+     * is in effect; false otherwise.  If the TimeZone doesn't observe daylight savings
+     * time, this functions always returns false.
+     * This method is wasteful since it creates a new GregorianCalendar and
+     * deletes it each time it is called. This is a deprecated method
+     * and provided only for Java compatibility.
+     *
+     * @param date The date to test.
+     * @param status  An UErrorCode to receive the status.
+     * @return true if the given date is in Daylight Savings Time;
+     * false otherwise.
+     * @deprecated ICU 2.4. Use Calendar::inDaylightTime() instead.
+     */
+    virtual UBool inDaylightTime(UDate date, UErrorCode& status) const;
+
+    /**
+     * Return true if this zone has the same rules and offset as another zone.
+     * @param other the TimeZone object to be compared with
+     * @return true if the given zone has the same rules and offset as this one
+     * @stable ICU 2.0
+     */
+    UBool hasSameRules(const TimeZone& other) const;
+
+    /**
+     * Clones TimeZone objects polymorphically. Clients are responsible for deleting
+     * the TimeZone object cloned.
+     *
+     * @return   A new copy of this TimeZone object.
+     * @stable ICU 2.0
+     */
+    virtual TimeZone* clone(void) const;
+
+    /**
+     * Gets the first time zone transition after the base time.
+     * @param base      The base time.
+     * @param inclusive Whether the base time is inclusive or not.
+     * @param result    Receives the first transition after the base time.
+     * @return  TRUE if the transition is found.
+     * @stable ICU 4.0
+     */
+    virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/;
+
+    /**
+     * Gets the most recent time zone transition before the base time.
+     * @param base      The base time.
+     * @param inclusive Whether the base time is inclusive or not.
+     * @param result    Receives the most recent transition before the base time.
+     * @return  TRUE if the transition is found.
+     * @stable ICU 4.0
+     */
+    virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/;
+
+    /**
+     * Returns the number of <code>TimeZoneRule</code>s which represents time transitions,
+     * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except
+     * <code>InitialTimeZoneRule</code>.  The return value range is 0 or any positive value.
+     * @param status    Receives error status code.
+     * @return The number of <code>TimeZoneRule</code>s representing time transitions.
+     * @stable ICU 4.0
+     */
+    virtual int32_t countTransitionRules(UErrorCode& status) /*const*/;
+
+    /**
+     * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code>
+     * which represent time transitions for this time zone.  On successful return,
+     * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and
+     * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code>
+     * instances up to the size specified by trscount.  The results are referencing the
+     * rule instance held by this time zone instance.  Therefore, after this time zone
+     * is destructed, they are no longer available.
+     * @param initial       Receives the initial timezone rule
+     * @param trsrules      Receives the timezone transition rules
+     * @param trscount      On input, specify the size of the array 'transitions' receiving
+     *                      the timezone transition rules.  On output, actual number of
+     *                      rules filled in the array will be set.
+     * @param status        Receives error status code.
+     * @stable ICU 4.0
+     */
+    virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial,
+        const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) /*const*/;
+
+
+public:
+
+    /**
+     * Override TimeZone Returns a unique class ID POLYMORPHICALLY. Pure virtual
+     * override. This method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone() methods call
+     * this method.
+     *
+     * @return   The class ID for this object. All objects of a given class have the
+     *           same class ID. Objects of other classes have different class IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+    /**
+     * Return the class ID for this class. This is useful only for comparing to a return
+     * value from getDynamicClassID(). For example:
+     * <pre>
+     * .   Base* polymorphic_pointer = createPolymorphicObject();
+     * .   if (polymorphic_pointer->getDynamicClassID() ==
+     * .       Derived::getStaticClassID()) ...
+     * </pre>
+     * @return   The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+private:
+    /**
+     * Constants specifying values of startMode and endMode.
+     */
+    enum EMode
+    {
+        DOM_MODE = 1,
+        DOW_IN_MONTH_MODE,
+        DOW_GE_DOM_MODE,
+        DOW_LE_DOM_MODE
+    };
+
+    SimpleTimeZone(); // default constructor not implemented
+
+    /**
+     * Internal construction method.
+     * @param rawOffsetGMT    The new SimpleTimeZone's raw GMT offset
+     * @param startMonth      the month DST starts
+     * @param startDay        the day DST starts
+     * @param startDayOfWeek  the DOW DST starts
+     * @param startTime       the time DST starts
+     * @param startTimeMode   Whether the start time is local wall time, local
+     *                        standard time, or UTC time. Default is local wall time.
+     * @param endMonth        the month DST ends
+     * @param endDay          the day DST ends
+     * @param endDayOfWeek    the DOW DST ends
+     * @param endTime         the time DST ends
+     * @param endTimeMode     Whether the end time is local wall time, local
+     *                        standard time, or UTC time. Default is local wall time.
+     * @param savingsDST      The number of milliseconds added to standard time
+     *                        to get DST time. Default is one hour.
+     * @param status          An UErrorCode to receive the status.
+     */
+    void construct(int32_t rawOffsetGMT,
+                   int8_t startMonth, int8_t startDay, int8_t startDayOfWeek,
+                   int32_t startTime, TimeMode startTimeMode,
+                   int8_t endMonth, int8_t endDay, int8_t endDayOfWeek,
+                   int32_t endTime, TimeMode endTimeMode,
+                   int32_t dstSavings, UErrorCode& status);
+
+    /**
+     * Compare a given date in the year to a rule. Return 1, 0, or -1, depending
+     * on whether the date is after, equal to, or before the rule date. The
+     * millis are compared directly against the ruleMillis, so any
+     * standard-daylight adjustments must be handled by the caller.
+     *
+     * @return  1 if the date is after the rule date, -1 if the date is before
+     *          the rule date, or 0 if the date is equal to the rule date.
+     */
+    static int32_t compareToRule(int8_t month, int8_t monthLen, int8_t prevMonthLen,
+                                 int8_t dayOfMonth,
+                                 int8_t dayOfWeek, int32_t millis, int32_t millisDelta,
+                                 EMode ruleMode, int8_t ruleMonth, int8_t ruleDayOfWeek,
+                                 int8_t ruleDay, int32_t ruleMillis);
+
+    /**
+     * Given a set of encoded rules in startDay and startDayOfMonth, decode
+     * them and set the startMode appropriately.  Do the same for endDay and
+     * endDayOfMonth.
+     * <P>
+     * Upon entry, the day of week variables may be zero or
+     * negative, in order to indicate special modes.  The day of month
+     * variables may also be negative.
+     * <P>
+     * Upon exit, the mode variables will be
+     * set, and the day of week and day of month variables will be positive.
+     * <P>
+     * This method also recognizes a startDay or endDay of zero as indicating
+     * no DST.
+     */
+    void decodeRules(UErrorCode& status);
+    void decodeStartRule(UErrorCode& status);
+    void decodeEndRule(UErrorCode& status);
+
+    int8_t startMonth, startDay, startDayOfWeek;   // the month, day, DOW, and time DST starts
+    int32_t startTime;
+    TimeMode startTimeMode, endTimeMode; // Mode for startTime, endTime; see TimeMode
+    int8_t endMonth, endDay, endDayOfWeek; // the month, day, DOW, and time DST ends
+    int32_t endTime;
+    int32_t startYear;  // the year these DST rules took effect
+    int32_t rawOffset;  // the TimeZone's raw GMT offset
+    UBool useDaylight; // flag indicating whether this TimeZone uses DST
+    static const int8_t STATICMONTHLENGTH[12]; // lengths of the months
+    EMode startMode, endMode;   // flags indicating what kind of rules the DST rules are
+
+    /**
+     * A positive value indicating the amount of time saved during DST in ms.
+     * Typically one hour; sometimes 30 minutes.
+     */
+    int32_t dstSavings;
+
+    /* Private for BasicTimeZone implementation */
+    void initTransitionRules(UErrorCode& status);
+    void clearTransitionRules(void);
+    void deleteTransitionRules(void);
+    UBool   transitionRulesInitialized;
+    InitialTimeZoneRule*    initialRule;
+    TimeZoneTransition*     firstTransition;
+    AnnualTimeZoneRule*     stdRule;
+    AnnualTimeZoneRule*     dstRule;
+};
+
+inline void SimpleTimeZone::setStartRule(int32_t month, int32_t dayOfWeekInMonth,
+                                         int32_t dayOfWeek,
+                                         int32_t time, UErrorCode& status) {
+    setStartRule(month, dayOfWeekInMonth, dayOfWeek, time, WALL_TIME, status);
+}
+
+inline void SimpleTimeZone::setStartRule(int32_t month, int32_t dayOfMonth,
+                                         int32_t time,
+                                         UErrorCode& status) {
+    setStartRule(month, dayOfMonth, time, WALL_TIME, status);
+}
+
+inline void SimpleTimeZone::setStartRule(int32_t month, int32_t dayOfMonth,
+                                         int32_t dayOfWeek,
+                                         int32_t time, UBool after, UErrorCode& status) {
+    setStartRule(month, dayOfMonth, dayOfWeek, time, WALL_TIME, after, status);
+}
+
+inline void SimpleTimeZone::setEndRule(int32_t month, int32_t dayOfWeekInMonth,
+                                       int32_t dayOfWeek,
+                                       int32_t time, UErrorCode& status) {
+    setEndRule(month, dayOfWeekInMonth, dayOfWeek, time, WALL_TIME, status);
+}
+
+inline void SimpleTimeZone::setEndRule(int32_t month, int32_t dayOfMonth,
+                                       int32_t time, UErrorCode& status) {
+    setEndRule(month, dayOfMonth, time, WALL_TIME, status);
+}
+
+inline void SimpleTimeZone::setEndRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
+                                       int32_t time, UBool after, UErrorCode& status) {
+    setEndRule(month, dayOfMonth, dayOfWeek, time, WALL_TIME, after, status);
+}
+
+inline void
+SimpleTimeZone::getOffset(UDate date, UBool local, int32_t& rawOffsetRef,
+                          int32_t& dstOffsetRef, UErrorCode& ec) const {
+    TimeZone::getOffset(date, local, rawOffsetRef, dstOffsetRef, ec);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _SIMPLETZ

Deleted: MacRuby/trunk/icu-1060/unicode/smpdtfmt.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/smpdtfmt.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/smpdtfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,975 +0,0 @@
-/*
-* Copyright (C) 1997-2009, International Business Machines Corporation and others. All Rights Reserved.
-*******************************************************************************
-*
-* File SMPDTFMT.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   02/19/97    aliu        Converted from java.
-*   07/09/97    helena      Make ParsePosition into a class.
-*   07/21/98    stephen     Added GMT_PLUS, GMT_MINUS
-*                            Changed setTwoDigitStartDate to set2DigitYearStart
-*                            Changed getTwoDigitStartDate to get2DigitYearStart
-*                            Removed subParseLong
-*                            Removed getZoneIndex (added in DateFormatSymbols)
-*   06/14/99    stephen     Removed fgTimeZoneDataSuffix
-*   10/14/99    aliu        Updated class doc to describe 2-digit year parsing
-*                           {j28 4182066}.
-*******************************************************************************
-*/
-
-#ifndef SMPDTFMT_H
-#define SMPDTFMT_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Format and parse dates in a language-independent manner.
- */
- 
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/datefmt.h"
-
-U_NAMESPACE_BEGIN
-
-class DateFormatSymbols;
-class DateFormat;
-class MessageFormat;
-
-/**
- *
- * SimpleDateFormat is a concrete class for formatting and parsing dates in a
- * language-independent manner. It allows for formatting (millis -> text),
- * parsing (text -> millis), and normalization. Formats/Parses a date or time,
- * which is the standard milliseconds since 24:00 GMT, Jan 1, 1970.
- * <P>
- * Clients are encouraged to create a date-time formatter using DateFormat::getInstance(),
- * getDateInstance(), getDateInstance(), or getDateTimeInstance() rather than
- * explicitly constructing an instance of SimpleDateFormat.  This way, the client
- * is guaranteed to get an appropriate formatting pattern for whatever locale the
- * program is running in.  However, if the client needs something more unusual than
- * the default patterns in the locales, he can construct a SimpleDateFormat directly
- * and give it an appropriate pattern (or use one of the factory methods on DateFormat
- * and modify the pattern after the fact with toPattern() and applyPattern().
- * <P>
- * Date/Time format syntax:
- * <P>
- * The date/time format is specified by means of a string time pattern. In this
- * pattern, all ASCII letters are reserved as pattern letters, which are defined
- * as the following:
- * <pre>
- * \code
- * Symbol   Meaning                 Presentation        Example
- * ------   -------                 ------------        -------
- * G        era designator          (Text)              AD
- * y        year                    (Number)            1996
- * Y        year (week of year)     (Number)            1997
- * u        extended year           (Number)            4601
- * Q        Quarter                 (Text & Number)     Q2 & 02
- * M        month in year           (Text & Number)     July & 07
- * d        day in month            (Number)            10
- * h        hour in am/pm (1~12)    (Number)            12
- * H        hour in day (0~23)      (Number)            0
- * m        minute in hour          (Number)            30
- * s        second in minute        (Number)            55
- * S        fractional second       (Number)            978
- * E        day of week             (Text)              Tuesday
- * e        day of week (local 1~7) (Text & Number)     Tues & 2
- * D        day in year             (Number)            189
- * F        day of week in month    (Number)            2 (2nd Wed in July)
- * w        week in year            (Number)            27
- * W        week in month           (Number)            2
- * a        am/pm marker            (Text)              PM
- * k        hour in day (1~24)      (Number)            24
- * K        hour in am/pm (0~11)    (Number)            0
- * z        time zone               (Time)              Pacific Standard Time
- * Z        time zone (RFC 822)     (Number)            -0800
- * v        time zone (generic)     (Text)              Pacific Time
- * V        time zone (abreviation) (Text)              PT
- * VVVV     time zone (location)    (Text)              United States (Los Angeles)
- * g        Julian day              (Number)            2451334
- * A        milliseconds in day     (Number)            69540000
- * q        stand alone quarter     (Text & Number)     Q2 & 02
- * L        stand alone month       (Text & Number)     July & 07
- * c        stand alone day of week (Text & Number)     Tuesday & 2
- * '        escape for text         (Delimiter)         'Date='
- * ''       single quote            (Literal)           'o''clock'
- * \endcode
- * </pre>
- * The count of pattern letters determine the format.
- * <P>
- * (Text): 4 or more, use full form, &lt;4, use short or abbreviated form if it
- * exists. (e.g., "EEEE" produces "Monday", "EEE" produces "Mon")
- * <P>
- * (Number): the minimum number of digits. Shorter numbers are zero-padded to
- * this amount (e.g. if "m" produces "6", "mm" produces "06"). Year is handled
- * specially; that is, if the count of 'y' is 2, the Year will be truncated to 2 digits.
- * (e.g., if "yyyy" produces "1997", "yy" produces "97".)
- * Unlike other fields, fractional seconds are padded on the right with zero.
- * <P>
- * (Text & Number): 3 or over, use text, otherwise use number.  (e.g., "M" produces "1",
- * "MM" produces "01", "MMM" produces "Jan", and "MMMM" produces "January".)
- * <P>
- * Any characters in the pattern that are not in the ranges of ['a'..'z'] and
- * ['A'..'Z'] will be treated as quoted text. For instance, characters
- * like ':', '.', ' ', '#' and '@' will appear in the resulting time text
- * even they are not embraced within single quotes.
- * <P>
- * A pattern containing any invalid pattern letter will result in a failing
- * UErrorCode result during formatting or parsing.
- * <P>
- * Examples using the US locale:
- * <pre>
- * \code
- *    Format Pattern                         Result
- *    --------------                         -------
- *    "yyyy.MM.dd G 'at' HH:mm:ss vvvv" ->>  1996.07.10 AD at 15:08:56 Pacific Time
- *    "EEE, MMM d, ''yy"                ->>  Wed, July 10, '96
- *    "h:mm a"                          ->>  12:08 PM
- *    "hh 'o''clock' a, zzzz"           ->>  12 o'clock PM, Pacific Daylight Time
- *    "K:mm a, vvv"                     ->>  0:00 PM, PT
- *    "yyyyy.MMMMM.dd GGG hh:mm aaa"    ->>  1996.July.10 AD 12:08 PM
- * \endcode
- * </pre>
- * Code Sample:
- * <pre>
- * \code
- *     UErrorCode success = U_ZERO_ERROR;
- *     SimpleTimeZone* pdt = new SimpleTimeZone(-8 * 60 * 60 * 1000, "PST");
- *     pdt->setStartRule( Calendar::APRIL, 1, Calendar::SUNDAY, 2*60*60*1000);
- *     pdt->setEndRule( Calendar::OCTOBER, -1, Calendar::SUNDAY, 2*60*60*1000);
- *
- *     // Format the current time.
- *     SimpleDateFormat* formatter
- *         = new SimpleDateFormat ("yyyy.MM.dd G 'at' hh:mm:ss a zzz", success );
- *     GregorianCalendar cal(success);
- *     UDate currentTime_1 = cal.getTime(success);
- *     FieldPosition fp(0);
- *     UnicodeString dateString;
- *     formatter->format( currentTime_1, dateString, fp );
- *     cout << "result: " << dateString << endl;
- *
- *     // Parse the previous string back into a Date.
- *     ParsePosition pp(0);
- *     UDate currentTime_2 = formatter->parse(dateString, pp );
- * \endcode
- * </pre>
- * In the above example, the time value "currentTime_2" obtained from parsing
- * will be equal to currentTime_1. However, they may not be equal if the am/pm
- * marker 'a' is left out from the format pattern while the "hour in am/pm"
- * pattern symbol is used. This information loss can happen when formatting the
- * time in PM.
- *
- * <p>
- * When parsing a date string using the abbreviated year pattern ("y" or "yy"),
- * SimpleDateFormat must interpret the abbreviated year
- * relative to some century.  It does this by adjusting dates to be
- * within 80 years before and 20 years after the time the SimpleDateFormat
- * instance is created. For example, using a pattern of "MM/dd/yy" and a
- * SimpleDateFormat instance created on Jan 1, 1997,  the string
- * "01/11/12" would be interpreted as Jan 11, 2012 while the string "05/04/64"
- * would be interpreted as May 4, 1964.
- * During parsing, only strings consisting of exactly two digits, as defined by
- * <code>Unicode::isDigit()</code>, will be parsed into the default century.
- * Any other numeric string, such as a one digit string, a three or more digit
- * string, or a two digit string that isn't all digits (for example, "-1"), is
- * interpreted literally.  So "01/02/3" or "01/02/003" are parsed, using the
- * same pattern, as Jan 2, 3 AD.  Likewise, "01/02/-3" is parsed as Jan 2, 4 BC.
- *
- * <p>
- * If the year pattern has more than two 'y' characters, the year is
- * interpreted literally, regardless of the number of digits.  So using the
- * pattern "MM/dd/yyyy", "01/11/12" parses to Jan 11, 12 A.D.
- *
- * <p>
- * When numeric fields abut one another directly, with no intervening delimiter
- * characters, they constitute a run of abutting numeric fields.  Such runs are
- * parsed specially.  For example, the format "HHmmss" parses the input text
- * "123456" to 12:34:56, parses the input text "12345" to 1:23:45, and fails to
- * parse "1234".  In other words, the leftmost field of the run is flexible,
- * while the others keep a fixed width.  If the parse fails anywhere in the run,
- * then the leftmost field is shortened by one character, and the entire run is
- * parsed again. This is repeated until either the parse succeeds or the
- * leftmost field is one character in length.  If the parse still fails at that
- * point, the parse of the run fails.
- *
- * <P>
- * For time zones that have no names, SimpleDateFormat uses strings GMT+hours:minutes or
- * GMT-hours:minutes.
- * <P>
- * The calendar defines what is the first day of the week, the first week of the
- * year, whether hours are zero based or not (0 vs 12 or 24), and the timezone.
- * There is one common number format to handle all the numbers; the digit count
- * is handled programmatically according to the pattern.
- *
- * <p><em>User subclasses are not supported.</em> While clients may write
- * subclasses, such code will not necessarily work and will not be
- * guaranteed to work stably from release to release.
- */
-class U_I18N_API SimpleDateFormat: public DateFormat {
-public:
-    /**
-     * Construct a SimpleDateFormat using the default pattern for the default
-     * locale.
-     * <P>
-     * [Note:] Not all locales support SimpleDateFormat; for full generality,
-     * use the factory methods in the DateFormat class.
-     * @param status    Output param set to success/failure code.
-     * @stable ICU 2.0
-     */
-    SimpleDateFormat(UErrorCode& status);
-
-    /**
-     * Construct a SimpleDateFormat using the given pattern and the default locale.
-     * The locale is used to obtain the symbols used in formatting (e.g., the
-     * names of the months), but not to provide the pattern.
-     * <P>
-     * [Note:] Not all locales support SimpleDateFormat; for full generality,
-     * use the factory methods in the DateFormat class.
-     * @param pattern    the pattern for the format.
-     * @param status     Output param set to success/failure code.
-     * @stable ICU 2.0
-     */
-    SimpleDateFormat(const UnicodeString& pattern,
-                     UErrorCode& status);
-
-    /**
-     * Construct a SimpleDateFormat using the given pattern and locale.
-     * The locale is used to obtain the symbols used in formatting (e.g., the
-     * names of the months), but not to provide the pattern.
-     * <P>
-     * [Note:] Not all locales support SimpleDateFormat; for full generality,
-     * use the factory methods in the DateFormat class.
-     * @param pattern    the pattern for the format.
-     * @param locale     the given locale.
-     * @param status     Output param set to success/failure code.
-     * @stable ICU 2.0
-     */
-    SimpleDateFormat(const UnicodeString& pattern,
-                     const Locale& locale,
-                     UErrorCode& status);
-
-    /**
-     * Construct a SimpleDateFormat using the given pattern and locale-specific
-     * symbol data.  The formatter takes ownership of the DateFormatSymbols object;
-     * the caller is no longer responsible for deleting it.
-     * @param pattern           the given pattern for the format.
-     * @param formatDataToAdopt the symbols to be adopted.
-     * @param status            Output param set to success/faulure code.
-     * @stable ICU 2.0
-     */
-    SimpleDateFormat(const UnicodeString& pattern,
-                     DateFormatSymbols* formatDataToAdopt,
-                     UErrorCode& status);
-
-    /**
-     * Construct a SimpleDateFormat using the given pattern and locale-specific
-     * symbol data.  The DateFormatSymbols object is NOT adopted; the caller
-     * remains responsible for deleting it.
-     * @param pattern           the given pattern for the format.
-     * @param formatData        the formatting symbols to be use.
-     * @param status            Output param set to success/faulure code.
-     * @stable ICU 2.0
-     */
-    SimpleDateFormat(const UnicodeString& pattern,
-                     const DateFormatSymbols& formatData,
-                     UErrorCode& status);
-
-    /**
-     * Copy constructor.
-     * @stable ICU 2.0
-     */
-    SimpleDateFormat(const SimpleDateFormat&);
-
-    /**
-     * Assignment operator.
-     * @stable ICU 2.0
-     */
-    SimpleDateFormat& operator=(const SimpleDateFormat&);
-
-    /**
-     * Destructor.
-     * @stable ICU 2.0
-     */
-    virtual ~SimpleDateFormat();
-
-    /**
-     * Clone this Format object polymorphically. The caller owns the result and
-     * should delete it when done.
-     * @return    A copy of the object.
-     * @stable ICU 2.0
-     */
-    virtual Format* clone(void) const;
-
-    /**
-     * Return true if the given Format objects are semantically equal. Objects
-     * of different subclasses are considered unequal.
-     * @param other    the object to be compared with.
-     * @return         true if the given Format objects are semantically equal.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const Format& other) const;
-
-    /**
-     * Format a date or time, which is the standard millis since 24:00 GMT, Jan
-     * 1, 1970. Overrides DateFormat pure virtual method.
-     * <P>
-     * Example: using the US locale: "yyyy.MM.dd e 'at' HH:mm:ss zzz" ->>
-     * 1996.07.10 AD at 15:08:56 PDT
-     *
-     * @param cal       Calendar set to the date and time to be formatted
-     *                  into a date/time string.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       The formatting position. On input: an alignment field,
-     *                  if desired. On output: the offsets of the alignment field.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.1
-     */
-    virtual UnicodeString& format(  Calendar& cal,
-                                    UnicodeString& appendTo,
-                                    FieldPosition& pos) const;
-
-    /**
-     * Format a date or time, which is the standard millis since 24:00 GMT, Jan
-     * 1, 1970. Overrides DateFormat pure virtual method.
-     * <P>
-     * Example: using the US locale: "yyyy.MM.dd e 'at' HH:mm:ss zzz" ->>
-     * 1996.07.10 AD at 15:08:56 PDT
-     *
-     * @param obj       A Formattable containing the date-time value to be formatted
-     *                  into a date-time string.  If the type of the Formattable
-     *                  is a numeric type, it is treated as if it were an
-     *                  instance of Date.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param pos       The formatting position. On input: an alignment field,
-     *                  if desired. On output: the offsets of the alignment field.
-     * @param status    Output param set to success/faulure code.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& format(  const Formattable& obj,
-                                    UnicodeString& appendTo,
-                                    FieldPosition& pos,
-                                    UErrorCode& status) const;
-
-    /**
-     * Redeclared DateFormat method.
-     * @param date          the Date value to be formatted.
-     * @param appendTo      Output parameter to receive result.
-     *                      Result is appended to existing contents.
-     * @param fieldPosition The formatting position. On input: an alignment field,
-     *                      if desired. On output: the offsets of the alignment field.
-     * @return              Reference to 'appendTo' parameter.
-     * @stable ICU 2.1
-     */
-    UnicodeString& format(UDate date,
-                          UnicodeString& appendTo,
-                          FieldPosition& fieldPosition) const;
-
-    /**
-     * Redeclared DateFormat method.
-     * @param obj       Object to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param status    Input/output success/failure code.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(const Formattable& obj,
-                          UnicodeString& appendTo,
-                          UErrorCode& status) const;
-
-    /**
-     * Redeclared DateFormat method.
-     * @param date      Date value to be formatted.
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @return          Reference to 'appendTo' parameter.
-     * @stable ICU 2.0
-     */
-    UnicodeString& format(UDate date, UnicodeString& appendTo) const;
-
-    /**
-     * Parse a date/time string beginning at the given parse position. For
-     * example, a time text "07/10/96 4:5 PM, PDT" will be parsed into a Date
-     * that is equivalent to Date(837039928046).
-     * <P>
-     * By default, parsing is lenient: If the input is not in the form used by
-     * this object's format method but can still be parsed as a date, then the
-     * parse succeeds. Clients may insist on strict adherence to the format by
-     * calling setLenient(false).
-     *
-     * @param text  The date/time string to be parsed
-     * @param cal   a Calendar set to the date and time to be formatted
-     *              into a date/time string.
-     * @param pos   On input, the position at which to start parsing; on
-     *              output, the position at which parsing terminated, or the
-     *              start position if the parse failed.
-     * @return      A valid UDate if the input could be parsed.
-     * @stable ICU 2.1
-     */
-    virtual void parse( const UnicodeString& text,
-                        Calendar& cal,
-                        ParsePosition& pos) const;
-
-    /**
-     * Parse a date/time string starting at the given parse position. For
-     * example, a time text "07/10/96 4:5 PM, PDT" will be parsed into a Date
-     * that is equivalent to Date(837039928046).
-     * <P>
-     * By default, parsing is lenient: If the input is not in the form used by
-     * this object's format method but can still be parsed as a date, then the
-     * parse succeeds. Clients may insist on strict adherence to the format by
-     * calling setLenient(false).
-     *
-     * @see DateFormat::setLenient(boolean)
-     *
-     * @param text  The date/time string to be parsed
-     * @param pos   On input, the position at which to start parsing; on
-     *              output, the position at which parsing terminated, or the
-     *              start position if the parse failed.
-     * @return      A valid UDate if the input could be parsed.
-     * @stable ICU 2.0
-     */
-    UDate parse( const UnicodeString& text,
-                 ParsePosition& pos) const;
-
-
-    /**
-     * Parse a date/time string. For example, a time text "07/10/96 4:5 PM, PDT"
-     * will be parsed into a UDate that is equivalent to Date(837039928046).
-     * Parsing begins at the beginning of the string and proceeds as far as
-     * possible.  Assuming no parse errors were encountered, this function
-     * doesn't return any information about how much of the string was consumed
-     * by the parsing.  If you need that information, use the version of
-     * parse() that takes a ParsePosition.
-     *
-     * @param text  The date/time string to be parsed
-     * @param status Filled in with U_ZERO_ERROR if the parse was successful, and with
-     *              an error value if there was a parse error.
-     * @return      A valid UDate if the input could be parsed.
-     * @stable ICU 2.0
-     */
-    virtual UDate parse( const UnicodeString& text,
-                        UErrorCode& status) const;
-
-    /**
-     * Set the start UDate used to interpret two-digit year strings.
-     * When dates are parsed having 2-digit year strings, they are placed within
-     * a assumed range of 100 years starting on the two digit start date.  For
-     * example, the string "24-Jan-17" may be in the year 1817, 1917, 2017, or
-     * some other year.  SimpleDateFormat chooses a year so that the resultant
-     * date is on or after the two digit start date and within 100 years of the
-     * two digit start date.
-     * <P>
-     * By default, the two digit start date is set to 80 years before the current
-     * time at which a SimpleDateFormat object is created.
-     * @param d      start UDate used to interpret two-digit year strings.
-     * @param status Filled in with U_ZERO_ERROR if the parse was successful, and with
-     *               an error value if there was a parse error.
-     * @stable ICU 2.0
-     */
-    virtual void set2DigitYearStart(UDate d, UErrorCode& status);
-
-    /**
-     * Get the start UDate used to interpret two-digit year strings.
-     * When dates are parsed having 2-digit year strings, they are placed within
-     * a assumed range of 100 years starting on the two digit start date.  For
-     * example, the string "24-Jan-17" may be in the year 1817, 1917, 2017, or
-     * some other year.  SimpleDateFormat chooses a year so that the resultant
-     * date is on or after the two digit start date and within 100 years of the
-     * two digit start date.
-     * <P>
-     * By default, the two digit start date is set to 80 years before the current
-     * time at which a SimpleDateFormat object is created.
-     * @param status Filled in with U_ZERO_ERROR if the parse was successful, and with
-     *               an error value if there was a parse error.
-     * @stable ICU 2.0
-     */
-    UDate get2DigitYearStart(UErrorCode& status) const;
-
-    /**
-     * Return a pattern string describing this date format.
-     * @param result Output param to receive the pattern.
-     * @return       A reference to 'result'.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& toPattern(UnicodeString& result) const;
-
-    /**
-     * Return a localized pattern string describing this date format.
-     * In most cases, this will return the same thing as toPattern(),
-     * but a locale can specify characters to use in pattern descriptions
-     * in place of the ones described in this class's class documentation.
-     * (Presumably, letters that would be more mnemonic in that locale's
-     * language.)  This function would produce a pattern using those
-     * letters.
-     *
-     * @param result    Receives the localized pattern.
-     * @param status    Output param set to success/failure code on
-     *                  exit. If the pattern is invalid, this will be
-     *                  set to a failure result.
-     * @return          A reference to 'result'.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& toLocalizedPattern(UnicodeString& result,
-                                              UErrorCode& status) const;
-
-    /**
-     * Apply the given unlocalized pattern string to this date format.
-     * (i.e., after this call, this formatter will format dates according to
-     * the new pattern)
-     *
-     * @param pattern   The pattern to be applied.
-     * @stable ICU 2.0
-     */
-    virtual void applyPattern(const UnicodeString& pattern);
-
-    /**
-     * Apply the given localized pattern string to this date format.
-     * (see toLocalizedPattern() for more information on localized patterns.)
-     *
-     * @param pattern   The localized pattern to be applied.
-     * @param status    Output param set to success/failure code on
-     *                  exit. If the pattern is invalid, this will be
-     *                  set to a failure result.
-     * @stable ICU 2.0
-     */
-    virtual void applyLocalizedPattern(const UnicodeString& pattern,
-                                       UErrorCode& status);
-
-    /**
-     * Gets the date/time formatting symbols (this is an object carrying
-     * the various strings and other symbols used in formatting: e.g., month
-     * names and abbreviations, time zone names, AM/PM strings, etc.)
-     * @return a copy of the date-time formatting data associated
-     * with this date-time formatter.
-     * @stable ICU 2.0
-     */
-    virtual const DateFormatSymbols* getDateFormatSymbols(void) const;
-
-    /**
-     * Set the date/time formatting symbols.  The caller no longer owns the
-     * DateFormatSymbols object and should not delete it after making this call.
-     * @param newFormatSymbols the given date-time formatting symbols to copy.
-     * @stable ICU 2.0
-     */
-    virtual void adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols);
-
-    /**
-     * Set the date/time formatting data.
-     * @param newFormatSymbols the given date-time formatting symbols to copy.
-     * @stable ICU 2.0
-     */
-    virtual void setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols);
-
-    /**
-     * Return the class ID for this class. This is useful only for comparing to
-     * a return value from getDynamicClassID(). For example:
-     * <pre>
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       erived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
-     * method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone()
-     * methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-
-    /**
-     * Set the calendar to be used by this date format. Initially, the default
-     * calendar for the specified or default locale is used.  The caller should
-     * not delete the Calendar object after it is adopted by this call.
-     * Adopting a new calendar will change to the default symbols.
-     *
-     * @param calendarToAdopt    Calendar object to be adopted.
-     * @stable ICU 2.0
-     */
-    virtual void adoptCalendar(Calendar* calendarToAdopt);
-
-    /**
-     * This is for ICU internal use only. Please do not use.
-     * Check whether the 'field' is smaller than all the fields covered in
-     * pattern, return TRUE if it is. The sequence of calendar field, 
-     * from large to small is: ERA, YEAR, MONTH, DATE, AM_PM, HOUR, MINUTE,...
-     * @param field    the calendar field need to check against
-     * @return         TRUE if the 'field' is smaller than all the fields 
-     *                 covered in pattern. FALSE otherwise.
-     * @internal ICU 4.0
-     */
-    UBool isFieldUnitIgnored(UCalendarDateFields field) const;
-
-
-    /**
-     * This is for ICU internal use only. Please do not use.
-     * Check whether the 'field' is smaller than all the fields covered in
-     * pattern, return TRUE if it is. The sequence of calendar field, 
-     * from large to small is: ERA, YEAR, MONTH, DATE, AM_PM, HOUR, MINUTE,...
-     * @param pattern  the pattern to check against
-     * @param field    the calendar field need to check against
-     * @return         TRUE if the 'field' is smaller than all the fields 
-     *                 covered in pattern. FALSE otherwise.
-     * @internal ICU 4.0
-     */
-    static UBool isFieldUnitIgnored(const UnicodeString& pattern, 
-                                    UCalendarDateFields field);
-
-
-
-    /**
-     * This is for ICU internal use only. Please do not use.
-     * Get the locale of this simple date formatter.
-     * It is used in DateIntervalFormat.
-     *
-     * @return   locale in this simple date formatter
-     * @internal ICU 4.0
-     */
-    const Locale& getSmpFmtLocale(void) const;
-
-
-private:
-    friend class DateFormat;
-
-    void initializeDefaultCentury(void);
-
-    SimpleDateFormat(); // default constructor not implemented
-
-    /**
-     * Used by the DateFormat factory methods to construct a SimpleDateFormat.
-     * @param timeStyle the time style.
-     * @param dateStyle the date style.
-     * @param locale    the given locale.
-     * @param status    Output param set to success/failure code on
-     *                  exit.
-     */
-    SimpleDateFormat(EStyle timeStyle, EStyle dateStyle, const Locale& locale, UErrorCode& status);
-
-    /**
-     * Construct a SimpleDateFormat for the given locale.  If no resource data
-     * is available, create an object of last resort, using hard-coded strings.
-     * This is an internal method, called by DateFormat.  It should never fail.
-     * @param locale    the given locale.
-     * @param status    Output param set to success/failure code on
-     *                  exit.
-     */
-    SimpleDateFormat(const Locale& locale, UErrorCode& status); // Use default pattern
-
-    /**
-     * Called by format() to format a single field.
-     *
-     * @param appendTo  Output parameter to receive result.
-     *                  Result is appended to existing contents.
-     * @param ch        The format character we encountered in the pattern.
-     * @param count     Number of characters in the current pattern symbol (e.g.,
-     *                  "yyyy" in the pattern would result in a call to this function
-     *                  with ch equal to 'y' and count equal to 4)
-     * @param pos       The FieldPosition being filled in by the format() call.  If
-     *                  this function is formatting the field specfied by pos, it
-     *                  will fill in pos with the beginning and ending offsets of the
-     *                  field.
-     * @param status    Receives a status code, which will be U_ZERO_ERROR if the operation
-     *                  succeeds.
-     */
-    void subFormat(             UnicodeString &appendTo,
-                                UChar ch,
-                                int32_t count,
-                                FieldPosition& pos,
-                                Calendar& cal,
-                                UErrorCode& status) const; // in case of illegal argument
-
-    /**
-     * Used by subFormat() to format a numeric value.
-     * Appends to toAppendTo a string representation of "value"
-     * having a number of digits between "minDigits" and
-     * "maxDigits".  Uses the DateFormat's NumberFormat.
-     *
-     * @param appendTo  Output parameter to receive result.
-     *                  Formatted number is appended to existing contents.
-     * @param value     Value to format.
-     * @param minDigits Minimum number of digits the result should have
-     * @param maxDigits Maximum number of digits the result should have
-     */
-    void zeroPaddingNumber(          UnicodeString &appendTo,
-                                     int32_t value,
-                                     int32_t minDigits,
-                                     int32_t maxDigits) const;
-
-    /**
-     * Return true if the given format character, occuring count
-     * times, represents a numeric field.
-     */
-    static UBool isNumeric(UChar formatChar, int32_t count);
-
-    /**
-     * initializes fCalendar from parameters.  Returns fCalendar as a convenience.
-     * @param adoptZone  Zone to be adopted, or NULL for TimeZone::createDefault().
-     * @param locale Locale of the calendar
-     * @param status Error code
-     * @return the newly constructed fCalendar
-     */
-    Calendar *initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status);
-
-    /**
-     * initializes fSymbols from parameters.
-     * @param locale Locale of the symbols
-     * @param calendar Alias to Calendar that will be used.
-     * @param status Error code
-     */
-    void initializeSymbols(const Locale& locale, Calendar* calendar, UErrorCode& status);
-
-    /**
-     * Called by several of the constructors to load pattern data and formatting symbols
-     * out of a resource bundle and initialize the locale based on it.
-     * @param timeStyle     The time style, as passed to DateFormat::createDateInstance().
-     * @param dateStyle     The date style, as passed to DateFormat::createTimeInstance().
-     * @param locale        The locale to load the patterns from.
-     * @param status        Filled in with an error code if loading the data from the
-     *                      resources fails.
-     */
-    void construct(EStyle timeStyle, EStyle dateStyle, const Locale& locale, UErrorCode& status);
-
-    /**
-     * Called by construct() and the various constructors to set up the SimpleDateFormat's
-     * Calendar and NumberFormat objects.
-     * @param locale    The locale for which we want a Calendar and a NumberFormat.
-     * @param statuc    Filled in with an error code if creating either subobject fails.
-     */
-    void initialize(const Locale& locale, UErrorCode& status);
-
-    /**
-     * Private code-size reduction function used by subParse.
-     * @param text the time text being parsed.
-     * @param start where to start parsing.
-     * @param field the date field being parsed.
-     * @param stringArray the string array to parsed.
-     * @param stringArrayCount the size of the array.
-     * @param cal a Calendar set to the date and time to be formatted
-     *            into a date/time string.
-     * @return the new start position if matching succeeded; a negative number
-     * indicating matching failure, otherwise.
-     */
-    int32_t matchString(const UnicodeString& text, int32_t start, UCalendarDateFields field,
-                        const UnicodeString* stringArray, int32_t stringArrayCount, Calendar& cal) const;
-
-    /**
-     * Private code-size reduction function used by subParse.
-     * @param text the time text being parsed.
-     * @param start where to start parsing.
-     * @param field the date field being parsed.
-     * @param stringArray the string array to parsed.
-     * @param stringArrayCount the size of the array.
-     * @param cal a Calendar set to the date and time to be formatted
-     *            into a date/time string.
-     * @return the new start position if matching succeeded; a negative number
-     * indicating matching failure, otherwise.
-     */
-    int32_t matchQuarterString(const UnicodeString& text, int32_t start, UCalendarDateFields field,
-                               const UnicodeString* stringArray, int32_t stringArrayCount, Calendar& cal) const;
-    
-    /**
-     * Private function used by subParse to match literal pattern text.
-     *
-     * @param pattern the pattern string
-     * @param patternOffset the starting offset into the pattern text. On
-     *        outupt will be set the offset of the first non-literal character in the pattern
-     * @param text the text being parsed
-     * @param textOffset the starting offset into the text. On output
-     *                   will be set to the offset of the character after the match
-     * @param lenient <code>TRUE</code> if the parse is lenient, <code>FALSE</code> otherwise.
-     *
-     * @return <code>TRUE</code> if the literal text could be matched, <code>FALSE</code> otherwise.
-     */
-    static UBool matchLiterals(const UnicodeString &pattern, int32_t &patternOffset,
-                               const UnicodeString &text, int32_t &textOffset, UBool lenient);
-    
-    /**
-     * Private member function that converts the parsed date strings into
-     * timeFields. Returns -start (for ParsePosition) if failed.
-     * @param text the time text to be parsed.
-     * @param start where to start parsing.
-     * @param ch the pattern character for the date field text to be parsed.
-     * @param count the count of a pattern character.
-     * @param obeyCount if true then the count is strictly obeyed.
-     * @param ambiguousYear If true then the two-digit year == the default start year.
-     * @param cal a Calendar set to the date and time to be formatted
-     *            into a date/time string.
-     * @return the new start position if matching succeeded; a negative number
-     * indicating matching failure, otherwise.
-     */
-    int32_t subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count,
-                     UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], Calendar& cal) const;
-
-    void parseInt(const UnicodeString& text,
-                  Formattable& number,
-                  ParsePosition& pos,
-                  UBool allowNegative) const;
-
-    void parseInt(const UnicodeString& text,
-                  Formattable& number,
-                  int32_t maxDigits,
-                  ParsePosition& pos,
-                  UBool allowNegative) const;
-
-    /**
-     * Translate a pattern, mapping each character in the from string to the
-     * corresponding character in the to string. Return an error if the original
-     * pattern contains an unmapped character, or if a quote is unmatched.
-     * Quoted (single quotes only) material is not translated.
-     * @param originalPattern   the original pattern.
-     * @param translatedPattern Output param to receive the translited pattern.
-     * @param from              the characters to be translited from.
-     * @param to                the characters to be translited to.
-     * @param status            Receives a status code, which will be U_ZERO_ERROR
-     *                          if the operation succeeds.
-     */
-    static void translatePattern(const UnicodeString& originalPattern,
-                                UnicodeString& translatedPattern,
-                                const UnicodeString& from,
-                                const UnicodeString& to,
-                                UErrorCode& status);
-
-    /**
-     * Sets the starting date of the 100-year window that dates with 2-digit years
-     * are considered to fall within.
-     * @param startDate the start date
-     * @param status    Receives a status code, which will be U_ZERO_ERROR
-     *                  if the operation succeeds.
-     */
-    void         parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status);
-    
-    /**
-     * Private methods for formatting/parsing GMT string
-     */
-    void appendGMT(UnicodeString &appendTo, Calendar& cal, UErrorCode& status) const;
-    void formatGMTDefault(UnicodeString &appendTo, int32_t offset) const;
-    int32_t parseGMT(const UnicodeString &text, ParsePosition &pos) const;
-    int32_t parseGMTDefault(const UnicodeString &text, ParsePosition &pos) const;
-    UBool isDefaultGMTFormat() const;
-
-    void formatRFC822TZ(UnicodeString &appendTo, int32_t offset) const;
-
-    /**
-     * Initialize MessageFormat instances used for GMT formatting/parsing
-     */
-    void initGMTFormatters(UErrorCode &status);
-
-    /**
-     * Used to map pattern characters to Calendar field identifiers.
-     */
-    static const UCalendarDateFields fgPatternIndexToCalendarField[];
-
-    /**
-     * Map index into pattern character string to DateFormat field number
-     */
-    static const UDateFormatField fgPatternIndexToDateFormatField[];
-
-    /**
-     * Used to map Calendar field to field level.
-     * The larger the level, the smaller the field unit.
-     * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
-     * UCAL_MONTH level is 20.
-     */
-    static const int32_t fgCalendarFieldToLevel[];
-    static const int32_t fgPatternCharToLevel[];
-
-    /**
-     * The formatting pattern for this formatter.
-     */
-    UnicodeString       fPattern;
-
-    /**
-     * The original locale used (for reloading symbols)
-     */
-    Locale              fLocale;
-
-    /**
-     * A pointer to an object containing the strings to use in formatting (e.g.,
-     * month and day names, AM and PM strings, time zone names, etc.)
-     */
-    DateFormatSymbols*  fSymbols;   // Owned
-
-    /**
-     * If dates have ambiguous years, we map them into the century starting
-     * at defaultCenturyStart, which may be any date.  If defaultCenturyStart is
-     * set to SYSTEM_DEFAULT_CENTURY, which it is by default, then the system
-     * values are used.  The instance values defaultCenturyStart and
-     * defaultCenturyStartYear are only used if explicitly set by the user
-     * through the API method parseAmbiguousDatesAsAfter().
-     */
-    UDate                fDefaultCenturyStart;
-
-    /**
-     * See documentation for defaultCenturyStart.
-     */
-    /*transient*/ int32_t   fDefaultCenturyStartYear;
-
-    enum ParsedTZType {
-        TZTYPE_UNK,
-        TZTYPE_STD,
-        TZTYPE_DST
-    };
-
-    ParsedTZType tztype; // here to avoid api change
-
-    /*
-     * MessageFormat instances used for localized GMT format
-     */
-    MessageFormat   **fGMTFormatters;
-
-    UBool fHaveDefaultCentury;
-};
-
-inline UDate
-SimpleDateFormat::get2DigitYearStart(UErrorCode& /*status*/) const
-{
-    return fDefaultCenturyStart;
-}
-
-inline UnicodeString&
-SimpleDateFormat::format(const Formattable& obj,
-                         UnicodeString& appendTo,
-                         UErrorCode& status) const {
-    // Don't use Format:: - use immediate base class only,
-    // in case immediate base modifies behavior later.
-    return DateFormat::format(obj, appendTo, status);
-}
-
-inline UnicodeString&
-SimpleDateFormat::format(UDate date,
-                         UnicodeString& appendTo,
-                         FieldPosition& fieldPosition) const {
-    // Don't use Format:: - use immediate base class only,
-    // in case immediate base modifies behavior later.
-    return DateFormat::format(date, appendTo, fieldPosition);
-}
-
-inline UnicodeString&
-SimpleDateFormat::format(UDate date, UnicodeString& appendTo) const {
-    return DateFormat::format(date, appendTo);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _SMPDTFMT
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/smpdtfmt.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/smpdtfmt.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/smpdtfmt.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/smpdtfmt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,975 @@
+/*
+* Copyright (C) 1997-2009, International Business Machines Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* File SMPDTFMT.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   02/19/97    aliu        Converted from java.
+*   07/09/97    helena      Make ParsePosition into a class.
+*   07/21/98    stephen     Added GMT_PLUS, GMT_MINUS
+*                            Changed setTwoDigitStartDate to set2DigitYearStart
+*                            Changed getTwoDigitStartDate to get2DigitYearStart
+*                            Removed subParseLong
+*                            Removed getZoneIndex (added in DateFormatSymbols)
+*   06/14/99    stephen     Removed fgTimeZoneDataSuffix
+*   10/14/99    aliu        Updated class doc to describe 2-digit year parsing
+*                           {j28 4182066}.
+*******************************************************************************
+*/
+
+#ifndef SMPDTFMT_H
+#define SMPDTFMT_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Format and parse dates in a language-independent manner.
+ */
+ 
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/datefmt.h"
+
+U_NAMESPACE_BEGIN
+
+class DateFormatSymbols;
+class DateFormat;
+class MessageFormat;
+
+/**
+ *
+ * SimpleDateFormat is a concrete class for formatting and parsing dates in a
+ * language-independent manner. It allows for formatting (millis -> text),
+ * parsing (text -> millis), and normalization. Formats/Parses a date or time,
+ * which is the standard milliseconds since 24:00 GMT, Jan 1, 1970.
+ * <P>
+ * Clients are encouraged to create a date-time formatter using DateFormat::getInstance(),
+ * getDateInstance(), getDateInstance(), or getDateTimeInstance() rather than
+ * explicitly constructing an instance of SimpleDateFormat.  This way, the client
+ * is guaranteed to get an appropriate formatting pattern for whatever locale the
+ * program is running in.  However, if the client needs something more unusual than
+ * the default patterns in the locales, he can construct a SimpleDateFormat directly
+ * and give it an appropriate pattern (or use one of the factory methods on DateFormat
+ * and modify the pattern after the fact with toPattern() and applyPattern().
+ * <P>
+ * Date/Time format syntax:
+ * <P>
+ * The date/time format is specified by means of a string time pattern. In this
+ * pattern, all ASCII letters are reserved as pattern letters, which are defined
+ * as the following:
+ * <pre>
+ * \code
+ * Symbol   Meaning                 Presentation        Example
+ * ------   -------                 ------------        -------
+ * G        era designator          (Text)              AD
+ * y        year                    (Number)            1996
+ * Y        year (week of year)     (Number)            1997
+ * u        extended year           (Number)            4601
+ * Q        Quarter                 (Text & Number)     Q2 & 02
+ * M        month in year           (Text & Number)     July & 07
+ * d        day in month            (Number)            10
+ * h        hour in am/pm (1~12)    (Number)            12
+ * H        hour in day (0~23)      (Number)            0
+ * m        minute in hour          (Number)            30
+ * s        second in minute        (Number)            55
+ * S        fractional second       (Number)            978
+ * E        day of week             (Text)              Tuesday
+ * e        day of week (local 1~7) (Text & Number)     Tues & 2
+ * D        day in year             (Number)            189
+ * F        day of week in month    (Number)            2 (2nd Wed in July)
+ * w        week in year            (Number)            27
+ * W        week in month           (Number)            2
+ * a        am/pm marker            (Text)              PM
+ * k        hour in day (1~24)      (Number)            24
+ * K        hour in am/pm (0~11)    (Number)            0
+ * z        time zone               (Time)              Pacific Standard Time
+ * Z        time zone (RFC 822)     (Number)            -0800
+ * v        time zone (generic)     (Text)              Pacific Time
+ * V        time zone (abreviation) (Text)              PT
+ * VVVV     time zone (location)    (Text)              United States (Los Angeles)
+ * g        Julian day              (Number)            2451334
+ * A        milliseconds in day     (Number)            69540000
+ * q        stand alone quarter     (Text & Number)     Q2 & 02
+ * L        stand alone month       (Text & Number)     July & 07
+ * c        stand alone day of week (Text & Number)     Tuesday & 2
+ * '        escape for text         (Delimiter)         'Date='
+ * ''       single quote            (Literal)           'o''clock'
+ * \endcode
+ * </pre>
+ * The count of pattern letters determine the format.
+ * <P>
+ * (Text): 4 or more, use full form, &lt;4, use short or abbreviated form if it
+ * exists. (e.g., "EEEE" produces "Monday", "EEE" produces "Mon")
+ * <P>
+ * (Number): the minimum number of digits. Shorter numbers are zero-padded to
+ * this amount (e.g. if "m" produces "6", "mm" produces "06"). Year is handled
+ * specially; that is, if the count of 'y' is 2, the Year will be truncated to 2 digits.
+ * (e.g., if "yyyy" produces "1997", "yy" produces "97".)
+ * Unlike other fields, fractional seconds are padded on the right with zero.
+ * <P>
+ * (Text & Number): 3 or over, use text, otherwise use number.  (e.g., "M" produces "1",
+ * "MM" produces "01", "MMM" produces "Jan", and "MMMM" produces "January".)
+ * <P>
+ * Any characters in the pattern that are not in the ranges of ['a'..'z'] and
+ * ['A'..'Z'] will be treated as quoted text. For instance, characters
+ * like ':', '.', ' ', '#' and '@' will appear in the resulting time text
+ * even they are not embraced within single quotes.
+ * <P>
+ * A pattern containing any invalid pattern letter will result in a failing
+ * UErrorCode result during formatting or parsing.
+ * <P>
+ * Examples using the US locale:
+ * <pre>
+ * \code
+ *    Format Pattern                         Result
+ *    --------------                         -------
+ *    "yyyy.MM.dd G 'at' HH:mm:ss vvvv" ->>  1996.07.10 AD at 15:08:56 Pacific Time
+ *    "EEE, MMM d, ''yy"                ->>  Wed, July 10, '96
+ *    "h:mm a"                          ->>  12:08 PM
+ *    "hh 'o''clock' a, zzzz"           ->>  12 o'clock PM, Pacific Daylight Time
+ *    "K:mm a, vvv"                     ->>  0:00 PM, PT
+ *    "yyyyy.MMMMM.dd GGG hh:mm aaa"    ->>  1996.July.10 AD 12:08 PM
+ * \endcode
+ * </pre>
+ * Code Sample:
+ * <pre>
+ * \code
+ *     UErrorCode success = U_ZERO_ERROR;
+ *     SimpleTimeZone* pdt = new SimpleTimeZone(-8 * 60 * 60 * 1000, "PST");
+ *     pdt->setStartRule( Calendar::APRIL, 1, Calendar::SUNDAY, 2*60*60*1000);
+ *     pdt->setEndRule( Calendar::OCTOBER, -1, Calendar::SUNDAY, 2*60*60*1000);
+ *
+ *     // Format the current time.
+ *     SimpleDateFormat* formatter
+ *         = new SimpleDateFormat ("yyyy.MM.dd G 'at' hh:mm:ss a zzz", success );
+ *     GregorianCalendar cal(success);
+ *     UDate currentTime_1 = cal.getTime(success);
+ *     FieldPosition fp(0);
+ *     UnicodeString dateString;
+ *     formatter->format( currentTime_1, dateString, fp );
+ *     cout << "result: " << dateString << endl;
+ *
+ *     // Parse the previous string back into a Date.
+ *     ParsePosition pp(0);
+ *     UDate currentTime_2 = formatter->parse(dateString, pp );
+ * \endcode
+ * </pre>
+ * In the above example, the time value "currentTime_2" obtained from parsing
+ * will be equal to currentTime_1. However, they may not be equal if the am/pm
+ * marker 'a' is left out from the format pattern while the "hour in am/pm"
+ * pattern symbol is used. This information loss can happen when formatting the
+ * time in PM.
+ *
+ * <p>
+ * When parsing a date string using the abbreviated year pattern ("y" or "yy"),
+ * SimpleDateFormat must interpret the abbreviated year
+ * relative to some century.  It does this by adjusting dates to be
+ * within 80 years before and 20 years after the time the SimpleDateFormat
+ * instance is created. For example, using a pattern of "MM/dd/yy" and a
+ * SimpleDateFormat instance created on Jan 1, 1997,  the string
+ * "01/11/12" would be interpreted as Jan 11, 2012 while the string "05/04/64"
+ * would be interpreted as May 4, 1964.
+ * During parsing, only strings consisting of exactly two digits, as defined by
+ * <code>Unicode::isDigit()</code>, will be parsed into the default century.
+ * Any other numeric string, such as a one digit string, a three or more digit
+ * string, or a two digit string that isn't all digits (for example, "-1"), is
+ * interpreted literally.  So "01/02/3" or "01/02/003" are parsed, using the
+ * same pattern, as Jan 2, 3 AD.  Likewise, "01/02/-3" is parsed as Jan 2, 4 BC.
+ *
+ * <p>
+ * If the year pattern has more than two 'y' characters, the year is
+ * interpreted literally, regardless of the number of digits.  So using the
+ * pattern "MM/dd/yyyy", "01/11/12" parses to Jan 11, 12 A.D.
+ *
+ * <p>
+ * When numeric fields abut one another directly, with no intervening delimiter
+ * characters, they constitute a run of abutting numeric fields.  Such runs are
+ * parsed specially.  For example, the format "HHmmss" parses the input text
+ * "123456" to 12:34:56, parses the input text "12345" to 1:23:45, and fails to
+ * parse "1234".  In other words, the leftmost field of the run is flexible,
+ * while the others keep a fixed width.  If the parse fails anywhere in the run,
+ * then the leftmost field is shortened by one character, and the entire run is
+ * parsed again. This is repeated until either the parse succeeds or the
+ * leftmost field is one character in length.  If the parse still fails at that
+ * point, the parse of the run fails.
+ *
+ * <P>
+ * For time zones that have no names, SimpleDateFormat uses strings GMT+hours:minutes or
+ * GMT-hours:minutes.
+ * <P>
+ * The calendar defines what is the first day of the week, the first week of the
+ * year, whether hours are zero based or not (0 vs 12 or 24), and the timezone.
+ * There is one common number format to handle all the numbers; the digit count
+ * is handled programmatically according to the pattern.
+ *
+ * <p><em>User subclasses are not supported.</em> While clients may write
+ * subclasses, such code will not necessarily work and will not be
+ * guaranteed to work stably from release to release.
+ */
+class U_I18N_API SimpleDateFormat: public DateFormat {
+public:
+    /**
+     * Construct a SimpleDateFormat using the default pattern for the default
+     * locale.
+     * <P>
+     * [Note:] Not all locales support SimpleDateFormat; for full generality,
+     * use the factory methods in the DateFormat class.
+     * @param status    Output param set to success/failure code.
+     * @stable ICU 2.0
+     */
+    SimpleDateFormat(UErrorCode& status);
+
+    /**
+     * Construct a SimpleDateFormat using the given pattern and the default locale.
+     * The locale is used to obtain the symbols used in formatting (e.g., the
+     * names of the months), but not to provide the pattern.
+     * <P>
+     * [Note:] Not all locales support SimpleDateFormat; for full generality,
+     * use the factory methods in the DateFormat class.
+     * @param pattern    the pattern for the format.
+     * @param status     Output param set to success/failure code.
+     * @stable ICU 2.0
+     */
+    SimpleDateFormat(const UnicodeString& pattern,
+                     UErrorCode& status);
+
+    /**
+     * Construct a SimpleDateFormat using the given pattern and locale.
+     * The locale is used to obtain the symbols used in formatting (e.g., the
+     * names of the months), but not to provide the pattern.
+     * <P>
+     * [Note:] Not all locales support SimpleDateFormat; for full generality,
+     * use the factory methods in the DateFormat class.
+     * @param pattern    the pattern for the format.
+     * @param locale     the given locale.
+     * @param status     Output param set to success/failure code.
+     * @stable ICU 2.0
+     */
+    SimpleDateFormat(const UnicodeString& pattern,
+                     const Locale& locale,
+                     UErrorCode& status);
+
+    /**
+     * Construct a SimpleDateFormat using the given pattern and locale-specific
+     * symbol data.  The formatter takes ownership of the DateFormatSymbols object;
+     * the caller is no longer responsible for deleting it.
+     * @param pattern           the given pattern for the format.
+     * @param formatDataToAdopt the symbols to be adopted.
+     * @param status            Output param set to success/faulure code.
+     * @stable ICU 2.0
+     */
+    SimpleDateFormat(const UnicodeString& pattern,
+                     DateFormatSymbols* formatDataToAdopt,
+                     UErrorCode& status);
+
+    /**
+     * Construct a SimpleDateFormat using the given pattern and locale-specific
+     * symbol data.  The DateFormatSymbols object is NOT adopted; the caller
+     * remains responsible for deleting it.
+     * @param pattern           the given pattern for the format.
+     * @param formatData        the formatting symbols to be use.
+     * @param status            Output param set to success/faulure code.
+     * @stable ICU 2.0
+     */
+    SimpleDateFormat(const UnicodeString& pattern,
+                     const DateFormatSymbols& formatData,
+                     UErrorCode& status);
+
+    /**
+     * Copy constructor.
+     * @stable ICU 2.0
+     */
+    SimpleDateFormat(const SimpleDateFormat&);
+
+    /**
+     * Assignment operator.
+     * @stable ICU 2.0
+     */
+    SimpleDateFormat& operator=(const SimpleDateFormat&);
+
+    /**
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~SimpleDateFormat();
+
+    /**
+     * Clone this Format object polymorphically. The caller owns the result and
+     * should delete it when done.
+     * @return    A copy of the object.
+     * @stable ICU 2.0
+     */
+    virtual Format* clone(void) const;
+
+    /**
+     * Return true if the given Format objects are semantically equal. Objects
+     * of different subclasses are considered unequal.
+     * @param other    the object to be compared with.
+     * @return         true if the given Format objects are semantically equal.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const Format& other) const;
+
+    /**
+     * Format a date or time, which is the standard millis since 24:00 GMT, Jan
+     * 1, 1970. Overrides DateFormat pure virtual method.
+     * <P>
+     * Example: using the US locale: "yyyy.MM.dd e 'at' HH:mm:ss zzz" ->>
+     * 1996.07.10 AD at 15:08:56 PDT
+     *
+     * @param cal       Calendar set to the date and time to be formatted
+     *                  into a date/time string.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       The formatting position. On input: an alignment field,
+     *                  if desired. On output: the offsets of the alignment field.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.1
+     */
+    virtual UnicodeString& format(  Calendar& cal,
+                                    UnicodeString& appendTo,
+                                    FieldPosition& pos) const;
+
+    /**
+     * Format a date or time, which is the standard millis since 24:00 GMT, Jan
+     * 1, 1970. Overrides DateFormat pure virtual method.
+     * <P>
+     * Example: using the US locale: "yyyy.MM.dd e 'at' HH:mm:ss zzz" ->>
+     * 1996.07.10 AD at 15:08:56 PDT
+     *
+     * @param obj       A Formattable containing the date-time value to be formatted
+     *                  into a date-time string.  If the type of the Formattable
+     *                  is a numeric type, it is treated as if it were an
+     *                  instance of Date.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param pos       The formatting position. On input: an alignment field,
+     *                  if desired. On output: the offsets of the alignment field.
+     * @param status    Output param set to success/faulure code.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& format(  const Formattable& obj,
+                                    UnicodeString& appendTo,
+                                    FieldPosition& pos,
+                                    UErrorCode& status) const;
+
+    /**
+     * Redeclared DateFormat method.
+     * @param date          the Date value to be formatted.
+     * @param appendTo      Output parameter to receive result.
+     *                      Result is appended to existing contents.
+     * @param fieldPosition The formatting position. On input: an alignment field,
+     *                      if desired. On output: the offsets of the alignment field.
+     * @return              Reference to 'appendTo' parameter.
+     * @stable ICU 2.1
+     */
+    UnicodeString& format(UDate date,
+                          UnicodeString& appendTo,
+                          FieldPosition& fieldPosition) const;
+
+    /**
+     * Redeclared DateFormat method.
+     * @param obj       Object to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param status    Input/output success/failure code.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(const Formattable& obj,
+                          UnicodeString& appendTo,
+                          UErrorCode& status) const;
+
+    /**
+     * Redeclared DateFormat method.
+     * @param date      Date value to be formatted.
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @return          Reference to 'appendTo' parameter.
+     * @stable ICU 2.0
+     */
+    UnicodeString& format(UDate date, UnicodeString& appendTo) const;
+
+    /**
+     * Parse a date/time string beginning at the given parse position. For
+     * example, a time text "07/10/96 4:5 PM, PDT" will be parsed into a Date
+     * that is equivalent to Date(837039928046).
+     * <P>
+     * By default, parsing is lenient: If the input is not in the form used by
+     * this object's format method but can still be parsed as a date, then the
+     * parse succeeds. Clients may insist on strict adherence to the format by
+     * calling setLenient(false).
+     *
+     * @param text  The date/time string to be parsed
+     * @param cal   a Calendar set to the date and time to be formatted
+     *              into a date/time string.
+     * @param pos   On input, the position at which to start parsing; on
+     *              output, the position at which parsing terminated, or the
+     *              start position if the parse failed.
+     * @return      A valid UDate if the input could be parsed.
+     * @stable ICU 2.1
+     */
+    virtual void parse( const UnicodeString& text,
+                        Calendar& cal,
+                        ParsePosition& pos) const;
+
+    /**
+     * Parse a date/time string starting at the given parse position. For
+     * example, a time text "07/10/96 4:5 PM, PDT" will be parsed into a Date
+     * that is equivalent to Date(837039928046).
+     * <P>
+     * By default, parsing is lenient: If the input is not in the form used by
+     * this object's format method but can still be parsed as a date, then the
+     * parse succeeds. Clients may insist on strict adherence to the format by
+     * calling setLenient(false).
+     *
+     * @see DateFormat::setLenient(boolean)
+     *
+     * @param text  The date/time string to be parsed
+     * @param pos   On input, the position at which to start parsing; on
+     *              output, the position at which parsing terminated, or the
+     *              start position if the parse failed.
+     * @return      A valid UDate if the input could be parsed.
+     * @stable ICU 2.0
+     */
+    UDate parse( const UnicodeString& text,
+                 ParsePosition& pos) const;
+
+
+    /**
+     * Parse a date/time string. For example, a time text "07/10/96 4:5 PM, PDT"
+     * will be parsed into a UDate that is equivalent to Date(837039928046).
+     * Parsing begins at the beginning of the string and proceeds as far as
+     * possible.  Assuming no parse errors were encountered, this function
+     * doesn't return any information about how much of the string was consumed
+     * by the parsing.  If you need that information, use the version of
+     * parse() that takes a ParsePosition.
+     *
+     * @param text  The date/time string to be parsed
+     * @param status Filled in with U_ZERO_ERROR if the parse was successful, and with
+     *              an error value if there was a parse error.
+     * @return      A valid UDate if the input could be parsed.
+     * @stable ICU 2.0
+     */
+    virtual UDate parse( const UnicodeString& text,
+                        UErrorCode& status) const;
+
+    /**
+     * Set the start UDate used to interpret two-digit year strings.
+     * When dates are parsed having 2-digit year strings, they are placed within
+     * a assumed range of 100 years starting on the two digit start date.  For
+     * example, the string "24-Jan-17" may be in the year 1817, 1917, 2017, or
+     * some other year.  SimpleDateFormat chooses a year so that the resultant
+     * date is on or after the two digit start date and within 100 years of the
+     * two digit start date.
+     * <P>
+     * By default, the two digit start date is set to 80 years before the current
+     * time at which a SimpleDateFormat object is created.
+     * @param d      start UDate used to interpret two-digit year strings.
+     * @param status Filled in with U_ZERO_ERROR if the parse was successful, and with
+     *               an error value if there was a parse error.
+     * @stable ICU 2.0
+     */
+    virtual void set2DigitYearStart(UDate d, UErrorCode& status);
+
+    /**
+     * Get the start UDate used to interpret two-digit year strings.
+     * When dates are parsed having 2-digit year strings, they are placed within
+     * a assumed range of 100 years starting on the two digit start date.  For
+     * example, the string "24-Jan-17" may be in the year 1817, 1917, 2017, or
+     * some other year.  SimpleDateFormat chooses a year so that the resultant
+     * date is on or after the two digit start date and within 100 years of the
+     * two digit start date.
+     * <P>
+     * By default, the two digit start date is set to 80 years before the current
+     * time at which a SimpleDateFormat object is created.
+     * @param status Filled in with U_ZERO_ERROR if the parse was successful, and with
+     *               an error value if there was a parse error.
+     * @stable ICU 2.0
+     */
+    UDate get2DigitYearStart(UErrorCode& status) const;
+
+    /**
+     * Return a pattern string describing this date format.
+     * @param result Output param to receive the pattern.
+     * @return       A reference to 'result'.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& toPattern(UnicodeString& result) const;
+
+    /**
+     * Return a localized pattern string describing this date format.
+     * In most cases, this will return the same thing as toPattern(),
+     * but a locale can specify characters to use in pattern descriptions
+     * in place of the ones described in this class's class documentation.
+     * (Presumably, letters that would be more mnemonic in that locale's
+     * language.)  This function would produce a pattern using those
+     * letters.
+     *
+     * @param result    Receives the localized pattern.
+     * @param status    Output param set to success/failure code on
+     *                  exit. If the pattern is invalid, this will be
+     *                  set to a failure result.
+     * @return          A reference to 'result'.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& toLocalizedPattern(UnicodeString& result,
+                                              UErrorCode& status) const;
+
+    /**
+     * Apply the given unlocalized pattern string to this date format.
+     * (i.e., after this call, this formatter will format dates according to
+     * the new pattern)
+     *
+     * @param pattern   The pattern to be applied.
+     * @stable ICU 2.0
+     */
+    virtual void applyPattern(const UnicodeString& pattern);
+
+    /**
+     * Apply the given localized pattern string to this date format.
+     * (see toLocalizedPattern() for more information on localized patterns.)
+     *
+     * @param pattern   The localized pattern to be applied.
+     * @param status    Output param set to success/failure code on
+     *                  exit. If the pattern is invalid, this will be
+     *                  set to a failure result.
+     * @stable ICU 2.0
+     */
+    virtual void applyLocalizedPattern(const UnicodeString& pattern,
+                                       UErrorCode& status);
+
+    /**
+     * Gets the date/time formatting symbols (this is an object carrying
+     * the various strings and other symbols used in formatting: e.g., month
+     * names and abbreviations, time zone names, AM/PM strings, etc.)
+     * @return a copy of the date-time formatting data associated
+     * with this date-time formatter.
+     * @stable ICU 2.0
+     */
+    virtual const DateFormatSymbols* getDateFormatSymbols(void) const;
+
+    /**
+     * Set the date/time formatting symbols.  The caller no longer owns the
+     * DateFormatSymbols object and should not delete it after making this call.
+     * @param newFormatSymbols the given date-time formatting symbols to copy.
+     * @stable ICU 2.0
+     */
+    virtual void adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols);
+
+    /**
+     * Set the date/time formatting data.
+     * @param newFormatSymbols the given date-time formatting symbols to copy.
+     * @stable ICU 2.0
+     */
+    virtual void setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols);
+
+    /**
+     * Return the class ID for this class. This is useful only for comparing to
+     * a return value from getDynamicClassID(). For example:
+     * <pre>
+     * .   Base* polymorphic_pointer = createPolymorphicObject();
+     * .   if (polymorphic_pointer->getDynamicClassID() ==
+     * .       erived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+     * method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+    /**
+     * Set the calendar to be used by this date format. Initially, the default
+     * calendar for the specified or default locale is used.  The caller should
+     * not delete the Calendar object after it is adopted by this call.
+     * Adopting a new calendar will change to the default symbols.
+     *
+     * @param calendarToAdopt    Calendar object to be adopted.
+     * @stable ICU 2.0
+     */
+    virtual void adoptCalendar(Calendar* calendarToAdopt);
+
+    /**
+     * This is for ICU internal use only. Please do not use.
+     * Check whether the 'field' is smaller than all the fields covered in
+     * pattern, return TRUE if it is. The sequence of calendar field, 
+     * from large to small is: ERA, YEAR, MONTH, DATE, AM_PM, HOUR, MINUTE,...
+     * @param field    the calendar field need to check against
+     * @return         TRUE if the 'field' is smaller than all the fields 
+     *                 covered in pattern. FALSE otherwise.
+     * @internal ICU 4.0
+     */
+    UBool isFieldUnitIgnored(UCalendarDateFields field) const;
+
+
+    /**
+     * This is for ICU internal use only. Please do not use.
+     * Check whether the 'field' is smaller than all the fields covered in
+     * pattern, return TRUE if it is. The sequence of calendar field, 
+     * from large to small is: ERA, YEAR, MONTH, DATE, AM_PM, HOUR, MINUTE,...
+     * @param pattern  the pattern to check against
+     * @param field    the calendar field need to check against
+     * @return         TRUE if the 'field' is smaller than all the fields 
+     *                 covered in pattern. FALSE otherwise.
+     * @internal ICU 4.0
+     */
+    static UBool isFieldUnitIgnored(const UnicodeString& pattern, 
+                                    UCalendarDateFields field);
+
+
+
+    /**
+     * This is for ICU internal use only. Please do not use.
+     * Get the locale of this simple date formatter.
+     * It is used in DateIntervalFormat.
+     *
+     * @return   locale in this simple date formatter
+     * @internal ICU 4.0
+     */
+    const Locale& getSmpFmtLocale(void) const;
+
+
+private:
+    friend class DateFormat;
+
+    void initializeDefaultCentury(void);
+
+    SimpleDateFormat(); // default constructor not implemented
+
+    /**
+     * Used by the DateFormat factory methods to construct a SimpleDateFormat.
+     * @param timeStyle the time style.
+     * @param dateStyle the date style.
+     * @param locale    the given locale.
+     * @param status    Output param set to success/failure code on
+     *                  exit.
+     */
+    SimpleDateFormat(EStyle timeStyle, EStyle dateStyle, const Locale& locale, UErrorCode& status);
+
+    /**
+     * Construct a SimpleDateFormat for the given locale.  If no resource data
+     * is available, create an object of last resort, using hard-coded strings.
+     * This is an internal method, called by DateFormat.  It should never fail.
+     * @param locale    the given locale.
+     * @param status    Output param set to success/failure code on
+     *                  exit.
+     */
+    SimpleDateFormat(const Locale& locale, UErrorCode& status); // Use default pattern
+
+    /**
+     * Called by format() to format a single field.
+     *
+     * @param appendTo  Output parameter to receive result.
+     *                  Result is appended to existing contents.
+     * @param ch        The format character we encountered in the pattern.
+     * @param count     Number of characters in the current pattern symbol (e.g.,
+     *                  "yyyy" in the pattern would result in a call to this function
+     *                  with ch equal to 'y' and count equal to 4)
+     * @param pos       The FieldPosition being filled in by the format() call.  If
+     *                  this function is formatting the field specfied by pos, it
+     *                  will fill in pos with the beginning and ending offsets of the
+     *                  field.
+     * @param status    Receives a status code, which will be U_ZERO_ERROR if the operation
+     *                  succeeds.
+     */
+    void subFormat(             UnicodeString &appendTo,
+                                UChar ch,
+                                int32_t count,
+                                FieldPosition& pos,
+                                Calendar& cal,
+                                UErrorCode& status) const; // in case of illegal argument
+
+    /**
+     * Used by subFormat() to format a numeric value.
+     * Appends to toAppendTo a string representation of "value"
+     * having a number of digits between "minDigits" and
+     * "maxDigits".  Uses the DateFormat's NumberFormat.
+     *
+     * @param appendTo  Output parameter to receive result.
+     *                  Formatted number is appended to existing contents.
+     * @param value     Value to format.
+     * @param minDigits Minimum number of digits the result should have
+     * @param maxDigits Maximum number of digits the result should have
+     */
+    void zeroPaddingNumber(          UnicodeString &appendTo,
+                                     int32_t value,
+                                     int32_t minDigits,
+                                     int32_t maxDigits) const;
+
+    /**
+     * Return true if the given format character, occuring count
+     * times, represents a numeric field.
+     */
+    static UBool isNumeric(UChar formatChar, int32_t count);
+
+    /**
+     * initializes fCalendar from parameters.  Returns fCalendar as a convenience.
+     * @param adoptZone  Zone to be adopted, or NULL for TimeZone::createDefault().
+     * @param locale Locale of the calendar
+     * @param status Error code
+     * @return the newly constructed fCalendar
+     */
+    Calendar *initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status);
+
+    /**
+     * initializes fSymbols from parameters.
+     * @param locale Locale of the symbols
+     * @param calendar Alias to Calendar that will be used.
+     * @param status Error code
+     */
+    void initializeSymbols(const Locale& locale, Calendar* calendar, UErrorCode& status);
+
+    /**
+     * Called by several of the constructors to load pattern data and formatting symbols
+     * out of a resource bundle and initialize the locale based on it.
+     * @param timeStyle     The time style, as passed to DateFormat::createDateInstance().
+     * @param dateStyle     The date style, as passed to DateFormat::createTimeInstance().
+     * @param locale        The locale to load the patterns from.
+     * @param status        Filled in with an error code if loading the data from the
+     *                      resources fails.
+     */
+    void construct(EStyle timeStyle, EStyle dateStyle, const Locale& locale, UErrorCode& status);
+
+    /**
+     * Called by construct() and the various constructors to set up the SimpleDateFormat's
+     * Calendar and NumberFormat objects.
+     * @param locale    The locale for which we want a Calendar and a NumberFormat.
+     * @param statuc    Filled in with an error code if creating either subobject fails.
+     */
+    void initialize(const Locale& locale, UErrorCode& status);
+
+    /**
+     * Private code-size reduction function used by subParse.
+     * @param text the time text being parsed.
+     * @param start where to start parsing.
+     * @param field the date field being parsed.
+     * @param stringArray the string array to parsed.
+     * @param stringArrayCount the size of the array.
+     * @param cal a Calendar set to the date and time to be formatted
+     *            into a date/time string.
+     * @return the new start position if matching succeeded; a negative number
+     * indicating matching failure, otherwise.
+     */
+    int32_t matchString(const UnicodeString& text, int32_t start, UCalendarDateFields field,
+                        const UnicodeString* stringArray, int32_t stringArrayCount, Calendar& cal) const;
+
+    /**
+     * Private code-size reduction function used by subParse.
+     * @param text the time text being parsed.
+     * @param start where to start parsing.
+     * @param field the date field being parsed.
+     * @param stringArray the string array to parsed.
+     * @param stringArrayCount the size of the array.
+     * @param cal a Calendar set to the date and time to be formatted
+     *            into a date/time string.
+     * @return the new start position if matching succeeded; a negative number
+     * indicating matching failure, otherwise.
+     */
+    int32_t matchQuarterString(const UnicodeString& text, int32_t start, UCalendarDateFields field,
+                               const UnicodeString* stringArray, int32_t stringArrayCount, Calendar& cal) const;
+    
+    /**
+     * Private function used by subParse to match literal pattern text.
+     *
+     * @param pattern the pattern string
+     * @param patternOffset the starting offset into the pattern text. On
+     *        outupt will be set the offset of the first non-literal character in the pattern
+     * @param text the text being parsed
+     * @param textOffset the starting offset into the text. On output
+     *                   will be set to the offset of the character after the match
+     * @param lenient <code>TRUE</code> if the parse is lenient, <code>FALSE</code> otherwise.
+     *
+     * @return <code>TRUE</code> if the literal text could be matched, <code>FALSE</code> otherwise.
+     */
+    static UBool matchLiterals(const UnicodeString &pattern, int32_t &patternOffset,
+                               const UnicodeString &text, int32_t &textOffset, UBool lenient);
+    
+    /**
+     * Private member function that converts the parsed date strings into
+     * timeFields. Returns -start (for ParsePosition) if failed.
+     * @param text the time text to be parsed.
+     * @param start where to start parsing.
+     * @param ch the pattern character for the date field text to be parsed.
+     * @param count the count of a pattern character.
+     * @param obeyCount if true then the count is strictly obeyed.
+     * @param ambiguousYear If true then the two-digit year == the default start year.
+     * @param cal a Calendar set to the date and time to be formatted
+     *            into a date/time string.
+     * @return the new start position if matching succeeded; a negative number
+     * indicating matching failure, otherwise.
+     */
+    int32_t subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count,
+                     UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], Calendar& cal) const;
+
+    void parseInt(const UnicodeString& text,
+                  Formattable& number,
+                  ParsePosition& pos,
+                  UBool allowNegative) const;
+
+    void parseInt(const UnicodeString& text,
+                  Formattable& number,
+                  int32_t maxDigits,
+                  ParsePosition& pos,
+                  UBool allowNegative) const;
+
+    /**
+     * Translate a pattern, mapping each character in the from string to the
+     * corresponding character in the to string. Return an error if the original
+     * pattern contains an unmapped character, or if a quote is unmatched.
+     * Quoted (single quotes only) material is not translated.
+     * @param originalPattern   the original pattern.
+     * @param translatedPattern Output param to receive the translited pattern.
+     * @param from              the characters to be translited from.
+     * @param to                the characters to be translited to.
+     * @param status            Receives a status code, which will be U_ZERO_ERROR
+     *                          if the operation succeeds.
+     */
+    static void translatePattern(const UnicodeString& originalPattern,
+                                UnicodeString& translatedPattern,
+                                const UnicodeString& from,
+                                const UnicodeString& to,
+                                UErrorCode& status);
+
+    /**
+     * Sets the starting date of the 100-year window that dates with 2-digit years
+     * are considered to fall within.
+     * @param startDate the start date
+     * @param status    Receives a status code, which will be U_ZERO_ERROR
+     *                  if the operation succeeds.
+     */
+    void         parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status);
+    
+    /**
+     * Private methods for formatting/parsing GMT string
+     */
+    void appendGMT(UnicodeString &appendTo, Calendar& cal, UErrorCode& status) const;
+    void formatGMTDefault(UnicodeString &appendTo, int32_t offset) const;
+    int32_t parseGMT(const UnicodeString &text, ParsePosition &pos) const;
+    int32_t parseGMTDefault(const UnicodeString &text, ParsePosition &pos) const;
+    UBool isDefaultGMTFormat() const;
+
+    void formatRFC822TZ(UnicodeString &appendTo, int32_t offset) const;
+
+    /**
+     * Initialize MessageFormat instances used for GMT formatting/parsing
+     */
+    void initGMTFormatters(UErrorCode &status);
+
+    /**
+     * Used to map pattern characters to Calendar field identifiers.
+     */
+    static const UCalendarDateFields fgPatternIndexToCalendarField[];
+
+    /**
+     * Map index into pattern character string to DateFormat field number
+     */
+    static const UDateFormatField fgPatternIndexToDateFormatField[];
+
+    /**
+     * Used to map Calendar field to field level.
+     * The larger the level, the smaller the field unit.
+     * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
+     * UCAL_MONTH level is 20.
+     */
+    static const int32_t fgCalendarFieldToLevel[];
+    static const int32_t fgPatternCharToLevel[];
+
+    /**
+     * The formatting pattern for this formatter.
+     */
+    UnicodeString       fPattern;
+
+    /**
+     * The original locale used (for reloading symbols)
+     */
+    Locale              fLocale;
+
+    /**
+     * A pointer to an object containing the strings to use in formatting (e.g.,
+     * month and day names, AM and PM strings, time zone names, etc.)
+     */
+    DateFormatSymbols*  fSymbols;   // Owned
+
+    /**
+     * If dates have ambiguous years, we map them into the century starting
+     * at defaultCenturyStart, which may be any date.  If defaultCenturyStart is
+     * set to SYSTEM_DEFAULT_CENTURY, which it is by default, then the system
+     * values are used.  The instance values defaultCenturyStart and
+     * defaultCenturyStartYear are only used if explicitly set by the user
+     * through the API method parseAmbiguousDatesAsAfter().
+     */
+    UDate                fDefaultCenturyStart;
+
+    /**
+     * See documentation for defaultCenturyStart.
+     */
+    /*transient*/ int32_t   fDefaultCenturyStartYear;
+
+    enum ParsedTZType {
+        TZTYPE_UNK,
+        TZTYPE_STD,
+        TZTYPE_DST
+    };
+
+    ParsedTZType tztype; // here to avoid api change
+
+    /*
+     * MessageFormat instances used for localized GMT format
+     */
+    MessageFormat   **fGMTFormatters;
+
+    UBool fHaveDefaultCentury;
+};
+
+inline UDate
+SimpleDateFormat::get2DigitYearStart(UErrorCode& /*status*/) const
+{
+    return fDefaultCenturyStart;
+}
+
+inline UnicodeString&
+SimpleDateFormat::format(const Formattable& obj,
+                         UnicodeString& appendTo,
+                         UErrorCode& status) const {
+    // Don't use Format:: - use immediate base class only,
+    // in case immediate base modifies behavior later.
+    return DateFormat::format(obj, appendTo, status);
+}
+
+inline UnicodeString&
+SimpleDateFormat::format(UDate date,
+                         UnicodeString& appendTo,
+                         FieldPosition& fieldPosition) const {
+    // Don't use Format:: - use immediate base class only,
+    // in case immediate base modifies behavior later.
+    return DateFormat::format(date, appendTo, fieldPosition);
+}
+
+inline UnicodeString&
+SimpleDateFormat::format(UDate date, UnicodeString& appendTo) const {
+    return DateFormat::format(date, appendTo);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _SMPDTFMT
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/sortkey.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/sortkey.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/sortkey.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,324 +0,0 @@
-/*
- *****************************************************************************
- * Copyright (C) 1996-2006, International Business Machines Corporation and others.
- * All Rights Reserved.
- *****************************************************************************
- *
- * File sortkey.h
- *
- * Created by: Helena Shih
- *
- * Modification History:
- *
- *  Date         Name          Description
- *
- *  6/20/97     helena      Java class name change.
- *  8/18/97     helena      Added internal API documentation.
- *  6/26/98     erm         Changed to use byte arrays and memcmp.
- *****************************************************************************
- */
-
-#ifndef SORTKEY_H
-#define SORTKEY_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Keys for comparing strings multiple times. 
- */
- 
-#if !UCONFIG_NO_COLLATION
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-#include "unicode/coll.h"
-
-U_NAMESPACE_BEGIN
-
-/* forward declaration */
-class RuleBasedCollator;
-
-/**
- *
- * Collation keys are generated by the Collator class.  Use the CollationKey objects
- * instead of Collator to compare strings multiple times.  A CollationKey
- * preprocesses the comparison information from the Collator object to
- * make the comparison faster.  If you are not going to comparing strings
- * multiple times, then using the Collator object is generally faster,
- * since it only processes as much of the string as needed to make a
- * comparison.
- * <p> For example (with strength == tertiary)
- * <p>When comparing "Abernathy" to "Baggins-Smythworthy", Collator
- * only needs to process a couple of characters, while a comparison
- * with CollationKeys will process all of the characters.  On the other hand,
- * if you are doing a sort of a number of fields, it is much faster to use
- * CollationKeys, since you will be comparing strings multiple times.
- * <p>Typical use of CollationKeys are in databases, where you store a CollationKey
- * in a hidden field, and use it for sorting or indexing.
- *
- * <p>Example of use:
- * <pre>
- * \code
- *     UErrorCode success = U_ZERO_ERROR;
- *     Collator* myCollator = Collator::createInstance(success);
- *     CollationKey* keys = new CollationKey [3];
- *     myCollator->getCollationKey("Tom", keys[0], success );
- *     myCollator->getCollationKey("Dick", keys[1], success );
- *     myCollator->getCollationKey("Harry", keys[2], success );
- *
- *     // Inside body of sort routine, compare keys this way:
- *     CollationKey tmp;
- *     if(keys[0].compareTo( keys[1] ) > 0 ) {
- *         tmp = keys[0]; keys[0] = keys[1]; keys[1] = tmp;
- *     }
- *     //...
- * \endcode
- * </pre>
- * <p>Because Collator::compare()'s algorithm is complex, it is faster to sort
- * long lists of words by retrieving collation keys with Collator::getCollationKey().
- * You can then cache the collation keys and compare them using CollationKey::compareTo().
- * <p>
- * <strong>Note:</strong> <code>Collator</code>s with different Locale,
- * CollationStrength and DecompositionMode settings will return different
- * CollationKeys for the same set of strings. Locales have specific
- * collation rules, and the way in which secondary and tertiary differences
- * are taken into account, for example, will result in different CollationKeys
- * for same strings.
- * <p>
-
- * @see          Collator
- * @see          RuleBasedCollator
- * @version      1.3 12/18/96
- * @author       Helena Shih
- * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
- */
-class U_I18N_API CollationKey : public UObject {
-public:
-    /**
-    * This creates an empty collation key based on the null string.  An empty
-    * collation key contains no sorting information.  When comparing two empty
-    * collation keys, the result is Collator::EQUAL.  Comparing empty collation key
-    * with non-empty collation key is always Collator::LESS.
-    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-    */
-    CollationKey();
-
-
-    /**
-    * Creates a collation key based on the collation key values.
-    * @param values the collation key values
-    * @param count number of collation key values, including trailing nulls.
-    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-    */
-    CollationKey(const  uint8_t*    values,
-                int32_t     count);
-
-    /**
-    * Copy constructor.
-    * @param other    the object to be copied.
-    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-    */
-    CollationKey(const CollationKey& other);
-
-    /**
-    * Sort key destructor.
-    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-    */
-    virtual ~CollationKey();
-
-    /**
-    * Assignment operator
-    * @param other    the object to be copied.
-    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-    */
-    const   CollationKey&   operator=(const CollationKey& other);
-
-    /**
-    * Compare if two collation keys are the same.
-    * @param source the collation key to compare to.
-    * @return Returns true if two collation keys are equal, false otherwise.
-    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-    */
-    UBool                   operator==(const CollationKey& source) const;
-
-    /**
-    * Compare if two collation keys are not the same.
-    * @param source the collation key to compare to.
-    * @return Returns TRUE if two collation keys are different, FALSE otherwise.
-    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-    */
-    UBool                   operator!=(const CollationKey& source) const;
-
-
-    /**
-    * Test to see if the key is in an invalid state. The key will be in an
-    * invalid state if it couldn't allocate memory for some operation.
-    * @return Returns TRUE if the key is in an invalid, FALSE otherwise.
-    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-    */
-    UBool                   isBogus(void) const;
-
-    /**
-    * Returns a pointer to the collation key values. The storage is owned
-    * by the collation key and the pointer will become invalid if the key
-    * is deleted.
-    * @param count the output parameter of number of collation key values,
-    * including any trailing nulls.
-    * @return a pointer to the collation key values.
-    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-    */
-    const    uint8_t*       getByteArray(int32_t& count) const;
-
-#ifdef U_USE_COLLATION_KEY_DEPRECATES
-    /**
-    * Extracts the collation key values into a new array. The caller owns
-    * this storage and should free it.
-    * @param count the output parameter of number of collation key values,
-    * including any trailing nulls.
-    * @obsolete ICU 2.6. Use getByteArray instead since this API will be removed in that release.
-    */
-    uint8_t*                toByteArray(int32_t& count) const;
-#endif
-
-    /**
-    * Convenience method which does a string(bit-wise) comparison of the
-    * two collation keys.
-    * @param target target collation key to be compared with
-    * @return Returns Collator::LESS if sourceKey &lt; targetKey,
-    * Collator::GREATER if sourceKey > targetKey and Collator::EQUAL
-    * otherwise.
-    * @deprecated ICU 2.6 use the overload with error code
-    */
-    Collator::EComparisonResult compareTo(const CollationKey& target) const;
-
-    /**
-    * Convenience method which does a string(bit-wise) comparison of the
-    * two collation keys.
-    * @param target target collation key to be compared with
-    * @param status error code
-    * @return Returns UCOL_LESS if sourceKey &lt; targetKey,
-    * UCOL_GREATER if sourceKey > targetKey and UCOL_EQUAL
-    * otherwise.
-    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-    */
-    UCollationResult compareTo(const CollationKey& target, UErrorCode &status) const;
-
-    /**
-    * Creates an integer that is unique to the collation key.  NOTE: this
-    * is not the same as String.hashCode.
-    * <p>Example of use:
-    * <pre>
-    * .    UErrorCode status = U_ZERO_ERROR;
-    * .    Collator *myCollation = Collator::createInstance(Locale::US, status);
-    * .    if (U_FAILURE(status)) return;
-    * .    CollationKey key1, key2;
-    * .    UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
-    * .    myCollation->getCollationKey("abc", key1, status1);
-    * .    if (U_FAILURE(status1)) { delete myCollation; return; }
-    * .    myCollation->getCollationKey("ABC", key2, status2);
-    * .    if (U_FAILURE(status2)) { delete myCollation; return; }
-    * .    // key1.hashCode() != key2.hashCode()
-    * </pre>
-    * @return the hash value based on the string's collation order.
-    * @see UnicodeString#hashCode
-    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-    */
-    int32_t                 hashCode(void) const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-private:
-    /**
-    * Returns an array of the collation key values as 16-bit integers.
-    * The caller owns the storage and must delete it.
-    * @param values Output param of the collation key values.
-    * @param count output parameter of the number of collation key values
-    * @return a pointer to an array of 16-bit collation key values.
-    */
-    void adopt(uint8_t *values, int32_t count);
-
-    /*
-    * Creates a collation key with a string.
-    */
-
-    /**
-    * If this CollationKey has capacity less than newSize,
-    * its internal capacity will be increased to newSize.
-    * @param newSize minimum size this CollationKey has to have
-    * @return this CollationKey
-    */
-    CollationKey&           ensureCapacity(int32_t newSize);
-    /**
-    * Set the CollationKey to a "bogus" or invalid state
-    * @return this CollationKey
-    */
-    CollationKey&           setToBogus(void);
-    /**
-    * Resets this CollationKey to an empty state
-    * @return this CollationKey
-    */
-    CollationKey&           reset(void);
-    
-    /**
-    * Allow private access to RuleBasedCollator
-    */
-    friend  class           RuleBasedCollator;
-    /**
-    * Bogus status
-    */
-    UBool                   fBogus;
-    /**
-    * Size of fBytes used to store the sortkey. i.e. up till the 
-    * null-termination.
-    */
-    int32_t                 fCount;
-    /**
-    * Full size of the fBytes
-    */
-    int32_t                 fCapacity;
-    /**
-    * Unique hash value of this CollationKey
-    */
-    int32_t                 fHashCode;
-    /**
-    * Array to store the sortkey
-    */
-    uint8_t*                fBytes;
-
-};
-
-inline UBool
-CollationKey::operator!=(const CollationKey& other) const
-{
-    return !(*this == other);
-}
-
-inline UBool
-CollationKey::isBogus() const
-{
-    return fBogus;
-}
-
-inline const uint8_t*
-CollationKey::getByteArray(int32_t &count) const
-{
-    count = fCount;
-    return fBytes;
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/sortkey.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/sortkey.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/sortkey.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/sortkey.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,324 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 1996-2006, International Business Machines Corporation and others.
+ * All Rights Reserved.
+ *****************************************************************************
+ *
+ * File sortkey.h
+ *
+ * Created by: Helena Shih
+ *
+ * Modification History:
+ *
+ *  Date         Name          Description
+ *
+ *  6/20/97     helena      Java class name change.
+ *  8/18/97     helena      Added internal API documentation.
+ *  6/26/98     erm         Changed to use byte arrays and memcmp.
+ *****************************************************************************
+ */
+
+#ifndef SORTKEY_H
+#define SORTKEY_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Keys for comparing strings multiple times. 
+ */
+ 
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/coll.h"
+
+U_NAMESPACE_BEGIN
+
+/* forward declaration */
+class RuleBasedCollator;
+
+/**
+ *
+ * Collation keys are generated by the Collator class.  Use the CollationKey objects
+ * instead of Collator to compare strings multiple times.  A CollationKey
+ * preprocesses the comparison information from the Collator object to
+ * make the comparison faster.  If you are not going to comparing strings
+ * multiple times, then using the Collator object is generally faster,
+ * since it only processes as much of the string as needed to make a
+ * comparison.
+ * <p> For example (with strength == tertiary)
+ * <p>When comparing "Abernathy" to "Baggins-Smythworthy", Collator
+ * only needs to process a couple of characters, while a comparison
+ * with CollationKeys will process all of the characters.  On the other hand,
+ * if you are doing a sort of a number of fields, it is much faster to use
+ * CollationKeys, since you will be comparing strings multiple times.
+ * <p>Typical use of CollationKeys are in databases, where you store a CollationKey
+ * in a hidden field, and use it for sorting or indexing.
+ *
+ * <p>Example of use:
+ * <pre>
+ * \code
+ *     UErrorCode success = U_ZERO_ERROR;
+ *     Collator* myCollator = Collator::createInstance(success);
+ *     CollationKey* keys = new CollationKey [3];
+ *     myCollator->getCollationKey("Tom", keys[0], success );
+ *     myCollator->getCollationKey("Dick", keys[1], success );
+ *     myCollator->getCollationKey("Harry", keys[2], success );
+ *
+ *     // Inside body of sort routine, compare keys this way:
+ *     CollationKey tmp;
+ *     if(keys[0].compareTo( keys[1] ) > 0 ) {
+ *         tmp = keys[0]; keys[0] = keys[1]; keys[1] = tmp;
+ *     }
+ *     //...
+ * \endcode
+ * </pre>
+ * <p>Because Collator::compare()'s algorithm is complex, it is faster to sort
+ * long lists of words by retrieving collation keys with Collator::getCollationKey().
+ * You can then cache the collation keys and compare them using CollationKey::compareTo().
+ * <p>
+ * <strong>Note:</strong> <code>Collator</code>s with different Locale,
+ * CollationStrength and DecompositionMode settings will return different
+ * CollationKeys for the same set of strings. Locales have specific
+ * collation rules, and the way in which secondary and tertiary differences
+ * are taken into account, for example, will result in different CollationKeys
+ * for same strings.
+ * <p>
+
+ * @see          Collator
+ * @see          RuleBasedCollator
+ * @version      1.3 12/18/96
+ * @author       Helena Shih
+ * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+ */
+class U_I18N_API CollationKey : public UObject {
+public:
+    /**
+    * This creates an empty collation key based on the null string.  An empty
+    * collation key contains no sorting information.  When comparing two empty
+    * collation keys, the result is Collator::EQUAL.  Comparing empty collation key
+    * with non-empty collation key is always Collator::LESS.
+    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+    */
+    CollationKey();
+
+
+    /**
+    * Creates a collation key based on the collation key values.
+    * @param values the collation key values
+    * @param count number of collation key values, including trailing nulls.
+    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+    */
+    CollationKey(const  uint8_t*    values,
+                int32_t     count);
+
+    /**
+    * Copy constructor.
+    * @param other    the object to be copied.
+    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+    */
+    CollationKey(const CollationKey& other);
+
+    /**
+    * Sort key destructor.
+    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+    */
+    virtual ~CollationKey();
+
+    /**
+    * Assignment operator
+    * @param other    the object to be copied.
+    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+    */
+    const   CollationKey&   operator=(const CollationKey& other);
+
+    /**
+    * Compare if two collation keys are the same.
+    * @param source the collation key to compare to.
+    * @return Returns true if two collation keys are equal, false otherwise.
+    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+    */
+    UBool                   operator==(const CollationKey& source) const;
+
+    /**
+    * Compare if two collation keys are not the same.
+    * @param source the collation key to compare to.
+    * @return Returns TRUE if two collation keys are different, FALSE otherwise.
+    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+    */
+    UBool                   operator!=(const CollationKey& source) const;
+
+
+    /**
+    * Test to see if the key is in an invalid state. The key will be in an
+    * invalid state if it couldn't allocate memory for some operation.
+    * @return Returns TRUE if the key is in an invalid, FALSE otherwise.
+    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+    */
+    UBool                   isBogus(void) const;
+
+    /**
+    * Returns a pointer to the collation key values. The storage is owned
+    * by the collation key and the pointer will become invalid if the key
+    * is deleted.
+    * @param count the output parameter of number of collation key values,
+    * including any trailing nulls.
+    * @return a pointer to the collation key values.
+    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+    */
+    const    uint8_t*       getByteArray(int32_t& count) const;
+
+#ifdef U_USE_COLLATION_KEY_DEPRECATES
+    /**
+    * Extracts the collation key values into a new array. The caller owns
+    * this storage and should free it.
+    * @param count the output parameter of number of collation key values,
+    * including any trailing nulls.
+    * @obsolete ICU 2.6. Use getByteArray instead since this API will be removed in that release.
+    */
+    uint8_t*                toByteArray(int32_t& count) const;
+#endif
+
+    /**
+    * Convenience method which does a string(bit-wise) comparison of the
+    * two collation keys.
+    * @param target target collation key to be compared with
+    * @return Returns Collator::LESS if sourceKey &lt; targetKey,
+    * Collator::GREATER if sourceKey > targetKey and Collator::EQUAL
+    * otherwise.
+    * @deprecated ICU 2.6 use the overload with error code
+    */
+    Collator::EComparisonResult compareTo(const CollationKey& target) const;
+
+    /**
+    * Convenience method which does a string(bit-wise) comparison of the
+    * two collation keys.
+    * @param target target collation key to be compared with
+    * @param status error code
+    * @return Returns UCOL_LESS if sourceKey &lt; targetKey,
+    * UCOL_GREATER if sourceKey > targetKey and UCOL_EQUAL
+    * otherwise.
+    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+    */
+    UCollationResult compareTo(const CollationKey& target, UErrorCode &status) const;
+
+    /**
+    * Creates an integer that is unique to the collation key.  NOTE: this
+    * is not the same as String.hashCode.
+    * <p>Example of use:
+    * <pre>
+    * .    UErrorCode status = U_ZERO_ERROR;
+    * .    Collator *myCollation = Collator::createInstance(Locale::US, status);
+    * .    if (U_FAILURE(status)) return;
+    * .    CollationKey key1, key2;
+    * .    UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
+    * .    myCollation->getCollationKey("abc", key1, status1);
+    * .    if (U_FAILURE(status1)) { delete myCollation; return; }
+    * .    myCollation->getCollationKey("ABC", key2, status2);
+    * .    if (U_FAILURE(status2)) { delete myCollation; return; }
+    * .    // key1.hashCode() != key2.hashCode()
+    * </pre>
+    * @return the hash value based on the string's collation order.
+    * @see UnicodeString#hashCode
+    * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+    */
+    int32_t                 hashCode(void) const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     * @deprecated ICU 2.8 Use Collator::getSortKey(...) instead
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+    /**
+    * Returns an array of the collation key values as 16-bit integers.
+    * The caller owns the storage and must delete it.
+    * @param values Output param of the collation key values.
+    * @param count output parameter of the number of collation key values
+    * @return a pointer to an array of 16-bit collation key values.
+    */
+    void adopt(uint8_t *values, int32_t count);
+
+    /*
+    * Creates a collation key with a string.
+    */
+
+    /**
+    * If this CollationKey has capacity less than newSize,
+    * its internal capacity will be increased to newSize.
+    * @param newSize minimum size this CollationKey has to have
+    * @return this CollationKey
+    */
+    CollationKey&           ensureCapacity(int32_t newSize);
+    /**
+    * Set the CollationKey to a "bogus" or invalid state
+    * @return this CollationKey
+    */
+    CollationKey&           setToBogus(void);
+    /**
+    * Resets this CollationKey to an empty state
+    * @return this CollationKey
+    */
+    CollationKey&           reset(void);
+    
+    /**
+    * Allow private access to RuleBasedCollator
+    */
+    friend  class           RuleBasedCollator;
+    /**
+    * Bogus status
+    */
+    UBool                   fBogus;
+    /**
+    * Size of fBytes used to store the sortkey. i.e. up till the 
+    * null-termination.
+    */
+    int32_t                 fCount;
+    /**
+    * Full size of the fBytes
+    */
+    int32_t                 fCapacity;
+    /**
+    * Unique hash value of this CollationKey
+    */
+    int32_t                 fHashCode;
+    /**
+    * Array to store the sortkey
+    */
+    uint8_t*                fBytes;
+
+};
+
+inline UBool
+CollationKey::operator!=(const CollationKey& other) const
+{
+    return !(*this == other);
+}
+
+inline UBool
+CollationKey::isBogus() const
+{
+    return fBogus;
+}
+
+inline const uint8_t*
+CollationKey::getByteArray(int32_t &count) const
+{
+    count = fCount;
+    return fBytes;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/strenum.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/strenum.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/strenum.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,271 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 2002-2007, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*/
-
-#ifndef STRENUM_H
-#define STRENUM_H
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-
-/**
- * \file 
- * \brief C++ API: String Enumeration
- */
- 
-U_NAMESPACE_BEGIN
-
-/**
- * Base class for 'pure' C++ implementations of uenum api.  Adds a
- * method that returns the next UnicodeString since in C++ this can
- * be a common storage format for strings.
- *
- * <p>The model is that the enumeration is over strings maintained by
- * a 'service.'  At any point, the service might change, invalidating
- * the enumerator (though this is expected to be rare).  The iterator
- * returns an error if this has occurred.  Lack of the error is no
- * guarantee that the service didn't change immediately after the
- * call, so the returned string still might not be 'valid' on
- * subsequent use.</p>
- *
- * <p>Strings may take the form of const char*, const UChar*, or const
- * UnicodeString*.  The type you get is determine by the variant of
- * 'next' that you call.  In general the StringEnumeration is
- * optimized for one of these types, but all StringEnumerations can
- * return all types.  Returned strings are each terminated with a NUL.
- * Depending on the service data, they might also include embedded NUL
- * characters, so API is provided to optionally return the true
- * length, counting the embedded NULs but not counting the terminating
- * NUL.</p>
- *
- * <p>The pointers returned by next, unext, and snext become invalid
- * upon any subsequent call to the enumeration's destructor, next,
- * unext, snext, or reset.</p>
- *
- * ICU 2.8 adds some default implementations and helper functions
- * for subclasses.
- *
- * @stable ICU 2.4 
- */
-class U_COMMON_API StringEnumeration : public UObject { 
-public:
-    /**
-     * Destructor.
-     * @stable ICU 2.4
-     */
-    virtual ~StringEnumeration();
-
-    /**
-     * Clone this object, an instance of a subclass of StringEnumeration.
-     * Clones can be used concurrently in multiple threads.
-     * If a subclass does not implement clone(), or if an error occurs,
-     * then NULL is returned.
-     * The clone functions in all subclasses return a base class pointer
-     * because some compilers do not support covariant (same-as-this)
-     * return types; cast to the appropriate subclass if necessary.
-     * The caller must delete the clone.
-     *
-     * @return a clone of this object
-     *
-     * @see getDynamicClassID
-     * @stable ICU 2.8
-     */
-    virtual StringEnumeration *clone() const;
-
-    /**
-     * <p>Return the number of elements that the iterator traverses.  If
-     * the iterator is out of sync with its service, status is set to
-     * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
-     *
-     * <p>The return value will not change except possibly as a result of
-     * a subsequent call to reset, or if the iterator becomes out of sync.</p>
-     *
-     * <p>This is a convenience function. It can end up being very
-     * expensive as all the items might have to be pre-fetched
-     * (depending on the storage format of the data being
-     * traversed).</p>
-     *
-     * @param status the error code.
-     * @return number of elements in the iterator.
-     *
-     * @stable ICU 2.4 */
-    virtual int32_t count(UErrorCode& status) const = 0;
-
-    /**
-     * <p>Returns the next element as a NUL-terminated char*.  If there
-     * are no more elements, returns NULL.  If the resultLength pointer
-     * is not NULL, the length of the string (not counting the
-     * terminating NUL) is returned at that address.  If an error
-     * status is returned, the value at resultLength is undefined.</p>
-     *
-     * <p>The returned pointer is owned by this iterator and must not be
-     * deleted by the caller.  The pointer is valid until the next call
-     * to next, unext, snext, reset, or the enumerator's destructor.</p>
-     *
-     * <p>If the iterator is out of sync with its service, status is set
-     * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
-     *
-     * <p>If the native service string is a UChar* string, it is
-     * converted to char* with the invariant converter.  If the
-     * conversion fails (because a character cannot be converted) then
-     * status is set to U_INVARIANT_CONVERSION_ERROR and the return
-     * value is undefined (though not NULL).</p>
-     *
-     * Starting with ICU 2.8, the default implementation calls snext()
-     * and handles the conversion.
-     *
-     * @param status the error code.
-     * @param resultLength a pointer to receive the length, can be NULL.
-     * @return a pointer to the string, or NULL.
-     *
-     * @stable ICU 2.4 
-     */
-    virtual const char* next(int32_t *resultLength, UErrorCode& status);
-
-    /**
-     * <p>Returns the next element as a NUL-terminated UChar*.  If there
-     * are no more elements, returns NULL.  If the resultLength pointer
-     * is not NULL, the length of the string (not counting the
-     * terminating NUL) is returned at that address.  If an error
-     * status is returned, the value at resultLength is undefined.</p>
-     *
-     * <p>The returned pointer is owned by this iterator and must not be
-     * deleted by the caller.  The pointer is valid until the next call
-     * to next, unext, snext, reset, or the enumerator's destructor.</p>
-     *
-     * <p>If the iterator is out of sync with its service, status is set
-     * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
-     *
-     * Starting with ICU 2.8, the default implementation calls snext()
-     * and handles the conversion.
-     *
-     * @param status the error code.
-     * @param resultLength a ponter to receive the length, can be NULL.
-     * @return a pointer to the string, or NULL.
-     *
-     * @stable ICU 2.4 
-     */
-    virtual const UChar* unext(int32_t *resultLength, UErrorCode& status);
-
-    /**
-     * <p>Returns the next element a UnicodeString*.  If there are no
-     * more elements, returns NULL.</p>
-     *
-     * <p>The returned pointer is owned by this iterator and must not be
-     * deleted by the caller.  The pointer is valid until the next call
-     * to next, unext, snext, reset, or the enumerator's destructor.</p>
-     *
-     * <p>If the iterator is out of sync with its service, status is set
-     * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
-     *
-     * @param status the error code.
-     * @return a pointer to the string, or NULL.
-     *
-     * @stable ICU 2.4 
-     */
-    virtual const UnicodeString* snext(UErrorCode& status) = 0;
-
-    /**
-     * <p>Resets the iterator.  This re-establishes sync with the
-     * service and rewinds the iterator to start at the first
-     * element.</p>
-     *
-     * <p>Previous pointers returned by next, unext, or snext become
-     * invalid, and the value returned by count might change.</p>
-     *
-     * @param status the error code.
-     *
-     * @stable ICU 2.4 
-     */
-    virtual void reset(UErrorCode& status) = 0;
-
-    /**
-     * Compares this enumeration to other to check if both are equal
-     *
-     * @param that The other string enumeration to compare this object to
-     * @return TRUE if the enumerations are equal. FALSE if not.
-     * @stable ICU 3.6 
-     */
-    virtual UBool operator==(const StringEnumeration& that)const;
-    /**
-     * Compares this enumeration to other to check if both are not equal
-     *
-     * @param that The other string enumeration to compare this object to
-     * @return TRUE if the enumerations are equal. FALSE if not.
-     * @stable ICU 3.6 
-     */
-    virtual UBool operator!=(const StringEnumeration& that)const;
-
-protected:
-    /**
-     * UnicodeString field for use with default implementations and subclasses.
-     * @stable ICU 2.8
-     */
-    UnicodeString unistr;
-    /**
-     * char * default buffer for use with default implementations and subclasses.
-     * @stable ICU 2.8
-     */
-    char charsBuffer[32];
-    /**
-     * char * buffer for use with default implementations and subclasses.
-     * Allocated in constructor and in ensureCharsCapacity().
-     * @stable ICU 2.8
-     */
-    char *chars;
-    /**
-     * Capacity of chars, for use with default implementations and subclasses.
-     * @stable ICU 2.8
-     */
-    int32_t charsCapacity;
-
-    /**
-     * Default constructor for use with default implementations and subclasses.
-     * @stable ICU 2.8
-     */
-    StringEnumeration();
-
-    /**
-     * Ensures that chars is at least as large as the requested capacity.
-     * For use with default implementations and subclasses.
-     *
-     * @param capacity Requested capacity.
-     * @param status ICU in/out error code.
-     * @stable ICU 2.8
-     */
-    void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
-
-    /**
-     * Converts s to Unicode and sets unistr to the result.
-     * For use with default implementations and subclasses,
-     * especially for implementations of snext() in terms of next().
-     * This is provided with a helper function instead of a default implementation
-     * of snext() to avoid potential infinite loops between next() and snext().
-     *
-     * For example:
-     * \code
-     * const UnicodeString* snext(UErrorCode& status) {
-     *   int32_t resultLength=0;
-     *   const char *s=next(&resultLength, status);
-     *   return setChars(s, resultLength, status);
-     * }
-     * \endcode
-     *
-     * @param s String to be converted to Unicode.
-     * @param length Length of the string.
-     * @param status ICU in/out error code.
-     * @return A pointer to unistr.
-     * @stable ICU 2.8
-     */
-    UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
-};
-
-U_NAMESPACE_END
-
-/* STRENUM_H */
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/strenum.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/strenum.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/strenum.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/strenum.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,271 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2002-2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*/
+
+#ifndef STRENUM_H
+#define STRENUM_H
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file 
+ * \brief C++ API: String Enumeration
+ */
+ 
+U_NAMESPACE_BEGIN
+
+/**
+ * Base class for 'pure' C++ implementations of uenum api.  Adds a
+ * method that returns the next UnicodeString since in C++ this can
+ * be a common storage format for strings.
+ *
+ * <p>The model is that the enumeration is over strings maintained by
+ * a 'service.'  At any point, the service might change, invalidating
+ * the enumerator (though this is expected to be rare).  The iterator
+ * returns an error if this has occurred.  Lack of the error is no
+ * guarantee that the service didn't change immediately after the
+ * call, so the returned string still might not be 'valid' on
+ * subsequent use.</p>
+ *
+ * <p>Strings may take the form of const char*, const UChar*, or const
+ * UnicodeString*.  The type you get is determine by the variant of
+ * 'next' that you call.  In general the StringEnumeration is
+ * optimized for one of these types, but all StringEnumerations can
+ * return all types.  Returned strings are each terminated with a NUL.
+ * Depending on the service data, they might also include embedded NUL
+ * characters, so API is provided to optionally return the true
+ * length, counting the embedded NULs but not counting the terminating
+ * NUL.</p>
+ *
+ * <p>The pointers returned by next, unext, and snext become invalid
+ * upon any subsequent call to the enumeration's destructor, next,
+ * unext, snext, or reset.</p>
+ *
+ * ICU 2.8 adds some default implementations and helper functions
+ * for subclasses.
+ *
+ * @stable ICU 2.4 
+ */
+class U_COMMON_API StringEnumeration : public UObject { 
+public:
+    /**
+     * Destructor.
+     * @stable ICU 2.4
+     */
+    virtual ~StringEnumeration();
+
+    /**
+     * Clone this object, an instance of a subclass of StringEnumeration.
+     * Clones can be used concurrently in multiple threads.
+     * If a subclass does not implement clone(), or if an error occurs,
+     * then NULL is returned.
+     * The clone functions in all subclasses return a base class pointer
+     * because some compilers do not support covariant (same-as-this)
+     * return types; cast to the appropriate subclass if necessary.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @stable ICU 2.8
+     */
+    virtual StringEnumeration *clone() const;
+
+    /**
+     * <p>Return the number of elements that the iterator traverses.  If
+     * the iterator is out of sync with its service, status is set to
+     * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
+     *
+     * <p>The return value will not change except possibly as a result of
+     * a subsequent call to reset, or if the iterator becomes out of sync.</p>
+     *
+     * <p>This is a convenience function. It can end up being very
+     * expensive as all the items might have to be pre-fetched
+     * (depending on the storage format of the data being
+     * traversed).</p>
+     *
+     * @param status the error code.
+     * @return number of elements in the iterator.
+     *
+     * @stable ICU 2.4 */
+    virtual int32_t count(UErrorCode& status) const = 0;
+
+    /**
+     * <p>Returns the next element as a NUL-terminated char*.  If there
+     * are no more elements, returns NULL.  If the resultLength pointer
+     * is not NULL, the length of the string (not counting the
+     * terminating NUL) is returned at that address.  If an error
+     * status is returned, the value at resultLength is undefined.</p>
+     *
+     * <p>The returned pointer is owned by this iterator and must not be
+     * deleted by the caller.  The pointer is valid until the next call
+     * to next, unext, snext, reset, or the enumerator's destructor.</p>
+     *
+     * <p>If the iterator is out of sync with its service, status is set
+     * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+     *
+     * <p>If the native service string is a UChar* string, it is
+     * converted to char* with the invariant converter.  If the
+     * conversion fails (because a character cannot be converted) then
+     * status is set to U_INVARIANT_CONVERSION_ERROR and the return
+     * value is undefined (though not NULL).</p>
+     *
+     * Starting with ICU 2.8, the default implementation calls snext()
+     * and handles the conversion.
+     *
+     * @param status the error code.
+     * @param resultLength a pointer to receive the length, can be NULL.
+     * @return a pointer to the string, or NULL.
+     *
+     * @stable ICU 2.4 
+     */
+    virtual const char* next(int32_t *resultLength, UErrorCode& status);
+
+    /**
+     * <p>Returns the next element as a NUL-terminated UChar*.  If there
+     * are no more elements, returns NULL.  If the resultLength pointer
+     * is not NULL, the length of the string (not counting the
+     * terminating NUL) is returned at that address.  If an error
+     * status is returned, the value at resultLength is undefined.</p>
+     *
+     * <p>The returned pointer is owned by this iterator and must not be
+     * deleted by the caller.  The pointer is valid until the next call
+     * to next, unext, snext, reset, or the enumerator's destructor.</p>
+     *
+     * <p>If the iterator is out of sync with its service, status is set
+     * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+     *
+     * Starting with ICU 2.8, the default implementation calls snext()
+     * and handles the conversion.
+     *
+     * @param status the error code.
+     * @param resultLength a ponter to receive the length, can be NULL.
+     * @return a pointer to the string, or NULL.
+     *
+     * @stable ICU 2.4 
+     */
+    virtual const UChar* unext(int32_t *resultLength, UErrorCode& status);
+
+    /**
+     * <p>Returns the next element a UnicodeString*.  If there are no
+     * more elements, returns NULL.</p>
+     *
+     * <p>The returned pointer is owned by this iterator and must not be
+     * deleted by the caller.  The pointer is valid until the next call
+     * to next, unext, snext, reset, or the enumerator's destructor.</p>
+     *
+     * <p>If the iterator is out of sync with its service, status is set
+     * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+     *
+     * @param status the error code.
+     * @return a pointer to the string, or NULL.
+     *
+     * @stable ICU 2.4 
+     */
+    virtual const UnicodeString* snext(UErrorCode& status) = 0;
+
+    /**
+     * <p>Resets the iterator.  This re-establishes sync with the
+     * service and rewinds the iterator to start at the first
+     * element.</p>
+     *
+     * <p>Previous pointers returned by next, unext, or snext become
+     * invalid, and the value returned by count might change.</p>
+     *
+     * @param status the error code.
+     *
+     * @stable ICU 2.4 
+     */
+    virtual void reset(UErrorCode& status) = 0;
+
+    /**
+     * Compares this enumeration to other to check if both are equal
+     *
+     * @param that The other string enumeration to compare this object to
+     * @return TRUE if the enumerations are equal. FALSE if not.
+     * @stable ICU 3.6 
+     */
+    virtual UBool operator==(const StringEnumeration& that)const;
+    /**
+     * Compares this enumeration to other to check if both are not equal
+     *
+     * @param that The other string enumeration to compare this object to
+     * @return TRUE if the enumerations are equal. FALSE if not.
+     * @stable ICU 3.6 
+     */
+    virtual UBool operator!=(const StringEnumeration& that)const;
+
+protected:
+    /**
+     * UnicodeString field for use with default implementations and subclasses.
+     * @stable ICU 2.8
+     */
+    UnicodeString unistr;
+    /**
+     * char * default buffer for use with default implementations and subclasses.
+     * @stable ICU 2.8
+     */
+    char charsBuffer[32];
+    /**
+     * char * buffer for use with default implementations and subclasses.
+     * Allocated in constructor and in ensureCharsCapacity().
+     * @stable ICU 2.8
+     */
+    char *chars;
+    /**
+     * Capacity of chars, for use with default implementations and subclasses.
+     * @stable ICU 2.8
+     */
+    int32_t charsCapacity;
+
+    /**
+     * Default constructor for use with default implementations and subclasses.
+     * @stable ICU 2.8
+     */
+    StringEnumeration();
+
+    /**
+     * Ensures that chars is at least as large as the requested capacity.
+     * For use with default implementations and subclasses.
+     *
+     * @param capacity Requested capacity.
+     * @param status ICU in/out error code.
+     * @stable ICU 2.8
+     */
+    void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
+
+    /**
+     * Converts s to Unicode and sets unistr to the result.
+     * For use with default implementations and subclasses,
+     * especially for implementations of snext() in terms of next().
+     * This is provided with a helper function instead of a default implementation
+     * of snext() to avoid potential infinite loops between next() and snext().
+     *
+     * For example:
+     * \code
+     * const UnicodeString* snext(UErrorCode& status) {
+     *   int32_t resultLength=0;
+     *   const char *s=next(&resultLength, status);
+     *   return setChars(s, resultLength, status);
+     * }
+     * \endcode
+     *
+     * @param s String to be converted to Unicode.
+     * @param length Length of the string.
+     * @param status ICU in/out error code.
+     * @return A pointer to unistr.
+     * @stable ICU 2.8
+     */
+    UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
+};
+
+U_NAMESPACE_END
+
+/* STRENUM_H */
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/stsearch.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/stsearch.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/stsearch.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,518 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
-**********************************************************************
-*   Date        Name        Description
-*  03/22/2000   helena      Creation.
-**********************************************************************
-*/
-
-#ifndef STSEARCH_H
-#define STSEARCH_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Service for searching text based on RuleBasedCollator.
- */
- 
-#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/tblcoll.h"
-#include "unicode/coleitr.h"
-#include "unicode/search.h"
-
-U_NAMESPACE_BEGIN
-
-/** 
- *
- * <tt>StringSearch</tt> is a <tt>SearchIterator</tt> that provides
- * language-sensitive text searching based on the comparison rules defined
- * in a {@link RuleBasedCollator} object.
- * StringSearch ensures that language eccentricity can be 
- * handled, e.g. for the German collator, characters &szlig; and SS will be matched 
- * if case is chosen to be ignored.
- * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
- * "ICU Collation Design Document"</a> for more information.
- * <p> 
- * The algorithm implemented is a modified form of the Boyer Moore's search.
- * For more information  see 
- * <a href="http://icu-project.org/docs/papers/efficient_text_searching_in_java.html">
- * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i> 
- * in February, 1999, for further information on the algorithm.
- * <p>
- * There are 2 match options for selection:<br>
- * Let S' be the sub-string of a text string S between the offsets start and 
- * end <start, end>.
- * <br>
- * A pattern string P matches a text string S at the offsets <start, end> 
- * if
- * <pre> 
- * option 1. Some canonical equivalent of P matches some canonical equivalent 
- *           of S'
- * option 2. P matches S' and if P starts or ends with a combining mark, 
- *           there exists no non-ignorable combining mark before or after S? 
- *           in S respectively. 
- * </pre>
- * Option 2. will be the default.
- * <p>
- * This search has APIs similar to that of other text iteration mechanisms 
- * such as the break iterators in <tt>BreakIterator</tt>. Using these 
- * APIs, it is easy to scan through text looking for all occurances of 
- * a given pattern. This search iterator allows changing of direction by 
- * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>. 
- * Though a direction change can occur without calling <tt>reset</tt> first,  
- * this operation comes with some speed penalty.
- * Match results in the forward direction will match the result matches in 
- * the backwards direction in the reverse order
- * <p>
- * <tt>SearchIterator</tt> provides APIs to specify the starting position 
- * within the text string to be searched, e.g. <tt>setOffset</tt>,
- * <tt>preceding</tt> and <tt>following</tt>. Since the 
- * starting position will be set as it is specified, please take note that 
- * there are some danger points which the search may render incorrect 
- * results:
- * <ul>
- * <li> The midst of a substring that requires normalization.
- * <li> If the following match is to be found, the position should not be the
- *      second character which requires to be swapped with the preceding 
- *      character. Vice versa, if the preceding match is to be found, 
- *      position to search from should not be the first character which 
- *      requires to be swapped with the next character. E.g certain Thai and
- *      Lao characters require swapping.
- * <li> If a following pattern match is to be found, any position within a 
- *      contracting sequence except the first will fail. Vice versa if a 
- *      preceding pattern match is to be found, a invalid starting point 
- *      would be any character within a contracting sequence except the last.
- * </ul>
- * <p>
- * A breakiterator can be used if only matches at logical breaks are desired.
- * Using a breakiterator will only give you results that exactly matches the
- * boundaries given by the breakiterator. For instance the pattern "e" will
- * not be found in the string "\u00e9" if a character break iterator is used.
- * <p>
- * Options are provided to handle overlapping matches. 
- * E.g. In English, overlapping matches produces the result 0 and 2 
- * for the pattern "abab" in the text "ababab", where else mutually 
- * exclusive matches only produce the result of 0.
- * <p>
- * Though collator attributes will be taken into consideration while 
- * performing matches, there are no APIs here for setting and getting the 
- * attributes. These attributes can be set by getting the collator
- * from <tt>getCollator</tt> and using the APIs in <tt>coll.h</tt>.
- * Lastly to update StringSearch to the new collator attributes, 
- * reset() has to be called.
- * <p> 
- * Restriction: <br>
- * Currently there are no composite characters that consists of a
- * character with combining class > 0 before a character with combining 
- * class == 0. However, if such a character exists in the future,  
- * StringSearch does not guarantee the results for option 1.
- * <p>
- * Consult the <tt>SearchIterator</tt> documentation for information on
- * and examples of how to use instances of this class to implement text
- * searching.
- * <pre><code>
- * UnicodeString target("The quick brown fox jumps over the lazy dog.");
- * UnicodeString pattern("fox");
- *
- * UErrorCode      error = U_ZERO_ERROR;
- * StringSearch iter(pattern, target, Locale::getUS(), NULL, status);
- * for (int pos = iter.first(error);
- *      pos != USEARCH_DONE; 
- *      pos = iter.next(error))
- * {
- *     printf("Found match at %d pos, length is %d\n", pos, 
- *                                             iter.getMatchLength());
- * }
- * </code></pre>
- * <p>
- * Note, StringSearch is not to be subclassed.
- * </p>
- * @see SearchIterator
- * @see RuleBasedCollator
- * @since ICU 2.0
- */
-
-class U_I18N_API StringSearch : public SearchIterator
-{
-public:
-
-    // public constructors and destructors --------------------------------
-
-    /**
-     * Creating a <tt>StringSearch</tt> instance using the argument locale 
-     * language rule set. A collator will be created in the process, which 
-     * will be owned by this instance and will be deleted during 
-     * destruction
-     * @param pattern The text for which this object will search.
-     * @param text    The text in which to search for the pattern.
-     * @param locale  A locale which defines the language-sensitive 
-     *                comparison rules used to determine whether text in the 
-     *                pattern and target matches. 
-     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
-     *                the matches that are found. Matches whose start and end 
-     *                indices in the target text are not boundaries as 
-     *                determined by the <tt>BreakIterator</tt> are 
-     *                ignored. If this behavior is not desired, 
-     *                <tt>NULL</tt> can be passed in instead.
-     * @param status  for errors if any. If pattern or text is NULL, or if
-     *               either the length of pattern or text is 0 then an 
-     *               U_ILLEGAL_ARGUMENT_ERROR is returned.
-     * @stable ICU 2.0
-     */
-    StringSearch(const UnicodeString &pattern, const UnicodeString &text,
-                 const Locale        &locale,       
-                       BreakIterator *breakiter,
-                       UErrorCode    &status);
-
-    /**
-     * Creating a <tt>StringSearch</tt> instance using the argument collator 
-     * language rule set. Note, user retains the ownership of this collator, 
-     * it does not get destroyed during this instance's destruction.
-     * @param pattern The text for which this object will search.
-     * @param text    The text in which to search for the pattern.
-     * @param coll    A <tt>RuleBasedCollator</tt> object which defines 
-     *                the language-sensitive comparison rules used to 
-     *                determine whether text in the pattern and target 
-     *                matches. User is responsible for the clearing of this
-     *                object.
-     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
-     *                the matches that are found. Matches whose start and end 
-     *                indices in the target text are not boundaries as 
-     *                determined by the <tt>BreakIterator</tt> are 
-     *                ignored. If this behavior is not desired, 
-     *                <tt>NULL</tt> can be passed in instead.
-     * @param status for errors if any. If either the length of pattern or 
-     *               text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
-     * @stable ICU 2.0
-     */
-    StringSearch(const UnicodeString     &pattern, 
-                 const UnicodeString     &text,
-                       RuleBasedCollator *coll,       
-                       BreakIterator     *breakiter,
-                       UErrorCode        &status);
-
-    /**
-     * Creating a <tt>StringSearch</tt> instance using the argument locale 
-     * language rule set. A collator will be created in the process, which 
-     * will be owned by this instance and will be deleted during 
-     * destruction
-     * <p>
-     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
-     * will be done during searching for this version. The block of text 
-     * in <tt>CharacterIterator</tt> will be used as it is.
-     * @param pattern The text for which this object will search.
-     * @param text    The text iterator in which to search for the pattern.
-     * @param locale  A locale which defines the language-sensitive 
-     *                comparison rules used to determine whether text in the 
-     *                pattern and target matches. User is responsible for 
-     *                the clearing of this object.
-     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
-     *                the matches that are found. Matches whose start and end 
-     *                indices in the target text are not boundaries as 
-     *                determined by the <tt>BreakIterator</tt> are 
-     *                ignored. If this behavior is not desired, 
-     *                <tt>NULL</tt> can be passed in instead.
-     * @param status for errors if any. If either the length of pattern or 
-     *               text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
-     * @stable ICU 2.0
-     */
-    StringSearch(const UnicodeString &pattern, CharacterIterator &text,
-                 const Locale        &locale, 
-                       BreakIterator *breakiter,
-                       UErrorCode    &status);
-
-    /**
-     * Creating a <tt>StringSearch</tt> instance using the argument collator 
-     * language rule set. Note, user retains the ownership of this collator, 
-     * it does not get destroyed during this instance's destruction.
-     * <p>
-     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
-     * will be done during searching for this version. The block of text 
-     * in <tt>CharacterIterator</tt> will be used as it is.
-     * @param pattern The text for which this object will search.
-     * @param text    The text in which to search for the pattern.
-     * @param coll    A <tt>RuleBasedCollator</tt> object which defines 
-     *                the language-sensitive comparison rules used to 
-     *                determine whether text in the pattern and target 
-     *                matches. User is responsible for the clearing of this
-     *                object.
-     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
-     *                the matches that are found. Matches whose start and end 
-     *                indices in the target text are not boundaries as 
-     *                determined by the <tt>BreakIterator</tt> are 
-     *                ignored. If this behavior is not desired, 
-     *                <tt>NULL</tt> can be passed in instead.
-     * @param status for errors if any. If either the length of pattern or 
-     *               text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
-     * @stable ICU 2.0
-     */
-    StringSearch(const UnicodeString     &pattern, CharacterIterator &text,
-                       RuleBasedCollator *coll, 
-                       BreakIterator     *breakiter,
-                       UErrorCode        &status);
-
-    /**
-     * Copy constructor that creates a StringSearch instance with the same 
-     * behavior, and iterating over the same text.
-     * @param that StringSearch instance to be copied.
-     * @stable ICU 2.0
-     */
-    StringSearch(const StringSearch &that);
-
-    /**
-    * Destructor. Cleans up the search iterator data struct.
-    * If a collator is created in the constructor, it will be destroyed here.
-    * @stable ICU 2.0
-    */
-    virtual ~StringSearch(void);
-
-    /**
-     * Clone this object.
-     * Clones can be used concurrently in multiple threads.
-     * If an error occurs, then NULL is returned.
-     * The caller must delete the clone.
-     *
-     * @return a clone of this object
-     *
-     * @see getDynamicClassID
-     * @stable ICU 2.8
-     */
-    StringSearch *clone() const;
-
-    // operator overloading ---------------------------------------------
-
-    /**
-     * Assignment operator. Sets this iterator to have the same behavior,
-     * and iterate over the same text, as the one passed in.
-     * @param that instance to be copied.
-     * @stable ICU 2.0
-     */
-    StringSearch & operator=(const StringSearch &that);
-
-    /**
-     * Equality operator. 
-     * @param that instance to be compared.
-     * @return TRUE if both instances have the same attributes, 
-     *         breakiterators, collators and iterate over the same text 
-     *         while looking for the same pattern.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const SearchIterator &that) const;
-
-    // public get and set methods ----------------------------------------
-
-    /**
-     * Sets the index to point to the given position, and clears any state 
-     * that's affected.
-     * <p>
-     * This method takes the argument index and sets the position in the text 
-     * string accordingly without checking if the index is pointing to a 
-     * valid starting point to begin searching. 
-     * @param position within the text to be set. If position is less
-     *          than or greater than the text range for searching, 
-     *          an U_INDEX_OUTOFBOUNDS_ERROR will be returned
-     * @param status for errors if it occurs
-     * @stable ICU 2.0
-     */
-    virtual void setOffset(int32_t position, UErrorCode &status);
-
-    /**
-     * Return the current index in the text being searched.
-     * If the iteration has gone past the end of the text
-     * (or past the beginning for a backwards search), USEARCH_DONE
-     * is returned.
-     * @return current index in the text being searched.
-     * @stable ICU 2.0
-     */
-    virtual int32_t getOffset(void) const;
-
-    /**
-     * Set the target text to be searched.
-     * Text iteration will hence begin at the start of the text string. 
-     * This method is 
-     * useful if you want to re-use an iterator to search for the same 
-     * pattern within a different body of text.
-     * @param text text string to be searched
-     * @param status for errors if any. If the text length is 0 then an 
-     *        U_ILLEGAL_ARGUMENT_ERROR is returned.
-     * @stable ICU 2.0
-     */
-    virtual void setText(const UnicodeString &text, UErrorCode &status);
-    
-    /**
-     * Set the target text to be searched.
-     * Text iteration will hence begin at the start of the text string. 
-     * This method is 
-     * useful if you want to re-use an iterator to search for the same 
-     * pattern within a different body of text.
-     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
-     * will be done during searching for this version. The block of text 
-     * in <tt>CharacterIterator</tt> will be used as it is.
-     * @param text text string to be searched
-     * @param status for errors if any. If the text length is 0 then an 
-     *        U_ILLEGAL_ARGUMENT_ERROR is returned.
-     * @stable ICU 2.0
-     */
-    virtual void setText(CharacterIterator &text, UErrorCode &status);
-
-    /**
-     * Gets the collator used for the language rules.
-     * <p>
-     * Caller may modify but <b>must not</b> delete the <tt>RuleBasedCollator</tt>!
-     * Modifications to this collator will affect the original collator passed in to 
-     * the <tt>StringSearch></tt> constructor or to setCollator, if any.
-     * @return collator used for string search
-     * @stable ICU 2.0
-     */
-    RuleBasedCollator * getCollator() const;
-    
-    /**
-     * Sets the collator used for the language rules. User retains the 
-     * ownership of this collator, thus the responsibility of deletion lies 
-     * with the user. This method causes internal data such as Boyer-Moore 
-     * shift tables to be recalculated, but the iterator's position is 
-     * unchanged.
-     * @param coll    collator 
-     * @param status  for errors if any
-     * @stable ICU 2.0
-     */
-    void setCollator(RuleBasedCollator *coll, UErrorCode &status);
-    
-    /**
-     * Sets the pattern used for matching.
-     * Internal data like the Boyer Moore table will be recalculated, but 
-     * the iterator's position is unchanged.
-     * @param pattern search pattern to be found
-     * @param status for errors if any. If the pattern length is 0 then an 
-     *               U_ILLEGAL_ARGUMENT_ERROR is returned.
-     * @stable ICU 2.0
-     */
-    void setPattern(const UnicodeString &pattern, UErrorCode &status);
-    
-    /**
-     * Gets the search pattern.
-     * @return pattern used for matching
-     * @stable ICU 2.0
-     */
-    const UnicodeString & getPattern() const;
-
-    // public methods ----------------------------------------------------
-
-    /** 
-     * Reset the iteration.
-     * Search will begin at the start of the text string if a forward 
-     * iteration is initiated before a backwards iteration. Otherwise if 
-     * a backwards iteration is initiated before a forwards iteration, the 
-     * search will begin at the end of the text string.
-     * @stable ICU 2.0
-     */
-    virtual void reset();
-
-    /**
-     * Returns a copy of StringSearch with the same behavior, and 
-     * iterating over the same text, as this one. Note that all data will be
-     * replicated, except for the user-specified collator and the
-     * breakiterator.
-     * @return cloned object
-     * @stable ICU 2.0
-     */
-    virtual SearchIterator * safeClone(void) const;
-    
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.2
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.2
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-protected:
-
-    // protected method -------------------------------------------------
-
-    /**
-     * Search forward for matching text, starting at a given location.
-     * Clients should not call this method directly; instead they should 
-     * call {@link SearchIterator#next }.
-     * <p>
-     * If a match is found, this method returns the index at which the match
-     * starts and calls {@link SearchIterator#setMatchLength } with the number 
-     * of characters in the target text that make up the match. If no match 
-     * is found, the method returns <tt>USEARCH_DONE</tt>.
-     * <p>
-     * The <tt>StringSearch</tt> is adjusted so that its current index 
-     * (as returned by {@link #getOffset }) is the match position if one was 
-     * found.
-     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
-     * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE.
-     * @param position The index in the target text at which the search 
-     *                 starts
-     * @param status for errors if any occurs
-     * @return The index at which the matched text in the target starts, or 
-     *         USEARCH_DONE if no match was found.
-     * @stable ICU 2.0
-     */
-    virtual int32_t handleNext(int32_t position, UErrorCode &status);
-
-    /**
-     * Search backward for matching text, starting at a given location.
-     * Clients should not call this method directly; instead they should call
-     * <tt>SearchIterator.previous()</tt>, which this method overrides.
-     * <p>
-     * If a match is found, this method returns the index at which the match
-     * starts and calls {@link SearchIterator#setMatchLength } with the number 
-     * of characters in the target text that make up the match. If no match 
-     * is found, the method returns <tt>USEARCH_DONE</tt>.
-     * <p>
-     * The <tt>StringSearch</tt> is adjusted so that its current index 
-     * (as returned by {@link #getOffset }) is the match position if one was 
-     * found.
-     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
-     * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE.
-     * @param position The index in the target text at which the search 
-     *                 starts.
-     * @param status for errors if any occurs
-     * @return The index at which the matched text in the target starts, or 
-     *         USEARCH_DONE if no match was found.
-     * @stable ICU 2.0
-     */
-    virtual int32_t handlePrev(int32_t position, UErrorCode &status);
-    
-private :
-    StringSearch(); // default constructor not implemented
-
-    // private data members ----------------------------------------------
-
-    /**
-    * RuleBasedCollator, contains exactly the same UCollator * in m_strsrch_
-    * @stable ICU 2.0
-    */
-    RuleBasedCollator  m_collator_;
-    /**
-    * Pattern text
-    * @stable ICU 2.0
-    */
-    UnicodeString      m_pattern_;
-    /**
-    * String search struct data
-    * @stable ICU 2.0
-    */
-    UStringSearch     *m_strsrch_;
-
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-#endif
-

Copied: MacRuby/trunk/icu-1060/unicode/stsearch.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/stsearch.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/stsearch.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/stsearch.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,518 @@
+/*
+**********************************************************************
+*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  03/22/2000   helena      Creation.
+**********************************************************************
+*/
+
+#ifndef STSEARCH_H
+#define STSEARCH_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Service for searching text based on RuleBasedCollator.
+ */
+ 
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/tblcoll.h"
+#include "unicode/coleitr.h"
+#include "unicode/search.h"
+
+U_NAMESPACE_BEGIN
+
+/** 
+ *
+ * <tt>StringSearch</tt> is a <tt>SearchIterator</tt> that provides
+ * language-sensitive text searching based on the comparison rules defined
+ * in a {@link RuleBasedCollator} object.
+ * StringSearch ensures that language eccentricity can be 
+ * handled, e.g. for the German collator, characters &szlig; and SS will be matched 
+ * if case is chosen to be ignored.
+ * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
+ * "ICU Collation Design Document"</a> for more information.
+ * <p> 
+ * The algorithm implemented is a modified form of the Boyer Moore's search.
+ * For more information  see 
+ * <a href="http://icu-project.org/docs/papers/efficient_text_searching_in_java.html">
+ * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i> 
+ * in February, 1999, for further information on the algorithm.
+ * <p>
+ * There are 2 match options for selection:<br>
+ * Let S' be the sub-string of a text string S between the offsets start and 
+ * end <start, end>.
+ * <br>
+ * A pattern string P matches a text string S at the offsets <start, end> 
+ * if
+ * <pre> 
+ * option 1. Some canonical equivalent of P matches some canonical equivalent 
+ *           of S'
+ * option 2. P matches S' and if P starts or ends with a combining mark, 
+ *           there exists no non-ignorable combining mark before or after S? 
+ *           in S respectively. 
+ * </pre>
+ * Option 2. will be the default.
+ * <p>
+ * This search has APIs similar to that of other text iteration mechanisms 
+ * such as the break iterators in <tt>BreakIterator</tt>. Using these 
+ * APIs, it is easy to scan through text looking for all occurances of 
+ * a given pattern. This search iterator allows changing of direction by 
+ * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>. 
+ * Though a direction change can occur without calling <tt>reset</tt> first,  
+ * this operation comes with some speed penalty.
+ * Match results in the forward direction will match the result matches in 
+ * the backwards direction in the reverse order
+ * <p>
+ * <tt>SearchIterator</tt> provides APIs to specify the starting position 
+ * within the text string to be searched, e.g. <tt>setOffset</tt>,
+ * <tt>preceding</tt> and <tt>following</tt>. Since the 
+ * starting position will be set as it is specified, please take note that 
+ * there are some danger points which the search may render incorrect 
+ * results:
+ * <ul>
+ * <li> The midst of a substring that requires normalization.
+ * <li> If the following match is to be found, the position should not be the
+ *      second character which requires to be swapped with the preceding 
+ *      character. Vice versa, if the preceding match is to be found, 
+ *      position to search from should not be the first character which 
+ *      requires to be swapped with the next character. E.g certain Thai and
+ *      Lao characters require swapping.
+ * <li> If a following pattern match is to be found, any position within a 
+ *      contracting sequence except the first will fail. Vice versa if a 
+ *      preceding pattern match is to be found, a invalid starting point 
+ *      would be any character within a contracting sequence except the last.
+ * </ul>
+ * <p>
+ * A breakiterator can be used if only matches at logical breaks are desired.
+ * Using a breakiterator will only give you results that exactly matches the
+ * boundaries given by the breakiterator. For instance the pattern "e" will
+ * not be found in the string "\u00e9" if a character break iterator is used.
+ * <p>
+ * Options are provided to handle overlapping matches. 
+ * E.g. In English, overlapping matches produces the result 0 and 2 
+ * for the pattern "abab" in the text "ababab", where else mutually 
+ * exclusive matches only produce the result of 0.
+ * <p>
+ * Though collator attributes will be taken into consideration while 
+ * performing matches, there are no APIs here for setting and getting the 
+ * attributes. These attributes can be set by getting the collator
+ * from <tt>getCollator</tt> and using the APIs in <tt>coll.h</tt>.
+ * Lastly to update StringSearch to the new collator attributes, 
+ * reset() has to be called.
+ * <p> 
+ * Restriction: <br>
+ * Currently there are no composite characters that consists of a
+ * character with combining class > 0 before a character with combining 
+ * class == 0. However, if such a character exists in the future,  
+ * StringSearch does not guarantee the results for option 1.
+ * <p>
+ * Consult the <tt>SearchIterator</tt> documentation for information on
+ * and examples of how to use instances of this class to implement text
+ * searching.
+ * <pre><code>
+ * UnicodeString target("The quick brown fox jumps over the lazy dog.");
+ * UnicodeString pattern("fox");
+ *
+ * UErrorCode      error = U_ZERO_ERROR;
+ * StringSearch iter(pattern, target, Locale::getUS(), NULL, status);
+ * for (int pos = iter.first(error);
+ *      pos != USEARCH_DONE; 
+ *      pos = iter.next(error))
+ * {
+ *     printf("Found match at %d pos, length is %d\n", pos, 
+ *                                             iter.getMatchLength());
+ * }
+ * </code></pre>
+ * <p>
+ * Note, StringSearch is not to be subclassed.
+ * </p>
+ * @see SearchIterator
+ * @see RuleBasedCollator
+ * @since ICU 2.0
+ */
+
+class U_I18N_API StringSearch : public SearchIterator
+{
+public:
+
+    // public constructors and destructors --------------------------------
+
+    /**
+     * Creating a <tt>StringSearch</tt> instance using the argument locale 
+     * language rule set. A collator will be created in the process, which 
+     * will be owned by this instance and will be deleted during 
+     * destruction
+     * @param pattern The text for which this object will search.
+     * @param text    The text in which to search for the pattern.
+     * @param locale  A locale which defines the language-sensitive 
+     *                comparison rules used to determine whether text in the 
+     *                pattern and target matches. 
+     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
+     *                the matches that are found. Matches whose start and end 
+     *                indices in the target text are not boundaries as 
+     *                determined by the <tt>BreakIterator</tt> are 
+     *                ignored. If this behavior is not desired, 
+     *                <tt>NULL</tt> can be passed in instead.
+     * @param status  for errors if any. If pattern or text is NULL, or if
+     *               either the length of pattern or text is 0 then an 
+     *               U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    StringSearch(const UnicodeString &pattern, const UnicodeString &text,
+                 const Locale        &locale,       
+                       BreakIterator *breakiter,
+                       UErrorCode    &status);
+
+    /**
+     * Creating a <tt>StringSearch</tt> instance using the argument collator 
+     * language rule set. Note, user retains the ownership of this collator, 
+     * it does not get destroyed during this instance's destruction.
+     * @param pattern The text for which this object will search.
+     * @param text    The text in which to search for the pattern.
+     * @param coll    A <tt>RuleBasedCollator</tt> object which defines 
+     *                the language-sensitive comparison rules used to 
+     *                determine whether text in the pattern and target 
+     *                matches. User is responsible for the clearing of this
+     *                object.
+     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
+     *                the matches that are found. Matches whose start and end 
+     *                indices in the target text are not boundaries as 
+     *                determined by the <tt>BreakIterator</tt> are 
+     *                ignored. If this behavior is not desired, 
+     *                <tt>NULL</tt> can be passed in instead.
+     * @param status for errors if any. If either the length of pattern or 
+     *               text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    StringSearch(const UnicodeString     &pattern, 
+                 const UnicodeString     &text,
+                       RuleBasedCollator *coll,       
+                       BreakIterator     *breakiter,
+                       UErrorCode        &status);
+
+    /**
+     * Creating a <tt>StringSearch</tt> instance using the argument locale 
+     * language rule set. A collator will be created in the process, which 
+     * will be owned by this instance and will be deleted during 
+     * destruction
+     * <p>
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * @param pattern The text for which this object will search.
+     * @param text    The text iterator in which to search for the pattern.
+     * @param locale  A locale which defines the language-sensitive 
+     *                comparison rules used to determine whether text in the 
+     *                pattern and target matches. User is responsible for 
+     *                the clearing of this object.
+     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
+     *                the matches that are found. Matches whose start and end 
+     *                indices in the target text are not boundaries as 
+     *                determined by the <tt>BreakIterator</tt> are 
+     *                ignored. If this behavior is not desired, 
+     *                <tt>NULL</tt> can be passed in instead.
+     * @param status for errors if any. If either the length of pattern or 
+     *               text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    StringSearch(const UnicodeString &pattern, CharacterIterator &text,
+                 const Locale        &locale, 
+                       BreakIterator *breakiter,
+                       UErrorCode    &status);
+
+    /**
+     * Creating a <tt>StringSearch</tt> instance using the argument collator 
+     * language rule set. Note, user retains the ownership of this collator, 
+     * it does not get destroyed during this instance's destruction.
+     * <p>
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * @param pattern The text for which this object will search.
+     * @param text    The text in which to search for the pattern.
+     * @param coll    A <tt>RuleBasedCollator</tt> object which defines 
+     *                the language-sensitive comparison rules used to 
+     *                determine whether text in the pattern and target 
+     *                matches. User is responsible for the clearing of this
+     *                object.
+     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
+     *                the matches that are found. Matches whose start and end 
+     *                indices in the target text are not boundaries as 
+     *                determined by the <tt>BreakIterator</tt> are 
+     *                ignored. If this behavior is not desired, 
+     *                <tt>NULL</tt> can be passed in instead.
+     * @param status for errors if any. If either the length of pattern or 
+     *               text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    StringSearch(const UnicodeString     &pattern, CharacterIterator &text,
+                       RuleBasedCollator *coll, 
+                       BreakIterator     *breakiter,
+                       UErrorCode        &status);
+
+    /**
+     * Copy constructor that creates a StringSearch instance with the same 
+     * behavior, and iterating over the same text.
+     * @param that StringSearch instance to be copied.
+     * @stable ICU 2.0
+     */
+    StringSearch(const StringSearch &that);
+
+    /**
+    * Destructor. Cleans up the search iterator data struct.
+    * If a collator is created in the constructor, it will be destroyed here.
+    * @stable ICU 2.0
+    */
+    virtual ~StringSearch(void);
+
+    /**
+     * Clone this object.
+     * Clones can be used concurrently in multiple threads.
+     * If an error occurs, then NULL is returned.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @stable ICU 2.8
+     */
+    StringSearch *clone() const;
+
+    // operator overloading ---------------------------------------------
+
+    /**
+     * Assignment operator. Sets this iterator to have the same behavior,
+     * and iterate over the same text, as the one passed in.
+     * @param that instance to be copied.
+     * @stable ICU 2.0
+     */
+    StringSearch & operator=(const StringSearch &that);
+
+    /**
+     * Equality operator. 
+     * @param that instance to be compared.
+     * @return TRUE if both instances have the same attributes, 
+     *         breakiterators, collators and iterate over the same text 
+     *         while looking for the same pattern.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const SearchIterator &that) const;
+
+    // public get and set methods ----------------------------------------
+
+    /**
+     * Sets the index to point to the given position, and clears any state 
+     * that's affected.
+     * <p>
+     * This method takes the argument index and sets the position in the text 
+     * string accordingly without checking if the index is pointing to a 
+     * valid starting point to begin searching. 
+     * @param position within the text to be set. If position is less
+     *          than or greater than the text range for searching, 
+     *          an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+     * @param status for errors if it occurs
+     * @stable ICU 2.0
+     */
+    virtual void setOffset(int32_t position, UErrorCode &status);
+
+    /**
+     * Return the current index in the text being searched.
+     * If the iteration has gone past the end of the text
+     * (or past the beginning for a backwards search), USEARCH_DONE
+     * is returned.
+     * @return current index in the text being searched.
+     * @stable ICU 2.0
+     */
+    virtual int32_t getOffset(void) const;
+
+    /**
+     * Set the target text to be searched.
+     * Text iteration will hence begin at the start of the text string. 
+     * This method is 
+     * useful if you want to re-use an iterator to search for the same 
+     * pattern within a different body of text.
+     * @param text text string to be searched
+     * @param status for errors if any. If the text length is 0 then an 
+     *        U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    virtual void setText(const UnicodeString &text, UErrorCode &status);
+    
+    /**
+     * Set the target text to be searched.
+     * Text iteration will hence begin at the start of the text string. 
+     * This method is 
+     * useful if you want to re-use an iterator to search for the same 
+     * pattern within a different body of text.
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * @param text text string to be searched
+     * @param status for errors if any. If the text length is 0 then an 
+     *        U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    virtual void setText(CharacterIterator &text, UErrorCode &status);
+
+    /**
+     * Gets the collator used for the language rules.
+     * <p>
+     * Caller may modify but <b>must not</b> delete the <tt>RuleBasedCollator</tt>!
+     * Modifications to this collator will affect the original collator passed in to 
+     * the <tt>StringSearch></tt> constructor or to setCollator, if any.
+     * @return collator used for string search
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator * getCollator() const;
+    
+    /**
+     * Sets the collator used for the language rules. User retains the 
+     * ownership of this collator, thus the responsibility of deletion lies 
+     * with the user. This method causes internal data such as Boyer-Moore 
+     * shift tables to be recalculated, but the iterator's position is 
+     * unchanged.
+     * @param coll    collator 
+     * @param status  for errors if any
+     * @stable ICU 2.0
+     */
+    void setCollator(RuleBasedCollator *coll, UErrorCode &status);
+    
+    /**
+     * Sets the pattern used for matching.
+     * Internal data like the Boyer Moore table will be recalculated, but 
+     * the iterator's position is unchanged.
+     * @param pattern search pattern to be found
+     * @param status for errors if any. If the pattern length is 0 then an 
+     *               U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    void setPattern(const UnicodeString &pattern, UErrorCode &status);
+    
+    /**
+     * Gets the search pattern.
+     * @return pattern used for matching
+     * @stable ICU 2.0
+     */
+    const UnicodeString & getPattern() const;
+
+    // public methods ----------------------------------------------------
+
+    /** 
+     * Reset the iteration.
+     * Search will begin at the start of the text string if a forward 
+     * iteration is initiated before a backwards iteration. Otherwise if 
+     * a backwards iteration is initiated before a forwards iteration, the 
+     * search will begin at the end of the text string.
+     * @stable ICU 2.0
+     */
+    virtual void reset();
+
+    /**
+     * Returns a copy of StringSearch with the same behavior, and 
+     * iterating over the same text, as this one. Note that all data will be
+     * replicated, except for the user-specified collator and the
+     * breakiterator.
+     * @return cloned object
+     * @stable ICU 2.0
+     */
+    virtual SearchIterator * safeClone(void) const;
+    
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.2
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+protected:
+
+    // protected method -------------------------------------------------
+
+    /**
+     * Search forward for matching text, starting at a given location.
+     * Clients should not call this method directly; instead they should 
+     * call {@link SearchIterator#next }.
+     * <p>
+     * If a match is found, this method returns the index at which the match
+     * starts and calls {@link SearchIterator#setMatchLength } with the number 
+     * of characters in the target text that make up the match. If no match 
+     * is found, the method returns <tt>USEARCH_DONE</tt>.
+     * <p>
+     * The <tt>StringSearch</tt> is adjusted so that its current index 
+     * (as returned by {@link #getOffset }) is the match position if one was 
+     * found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE.
+     * @param position The index in the target text at which the search 
+     *                 starts
+     * @param status for errors if any occurs
+     * @return The index at which the matched text in the target starts, or 
+     *         USEARCH_DONE if no match was found.
+     * @stable ICU 2.0
+     */
+    virtual int32_t handleNext(int32_t position, UErrorCode &status);
+
+    /**
+     * Search backward for matching text, starting at a given location.
+     * Clients should not call this method directly; instead they should call
+     * <tt>SearchIterator.previous()</tt>, which this method overrides.
+     * <p>
+     * If a match is found, this method returns the index at which the match
+     * starts and calls {@link SearchIterator#setMatchLength } with the number 
+     * of characters in the target text that make up the match. If no match 
+     * is found, the method returns <tt>USEARCH_DONE</tt>.
+     * <p>
+     * The <tt>StringSearch</tt> is adjusted so that its current index 
+     * (as returned by {@link #getOffset }) is the match position if one was 
+     * found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE.
+     * @param position The index in the target text at which the search 
+     *                 starts.
+     * @param status for errors if any occurs
+     * @return The index at which the matched text in the target starts, or 
+     *         USEARCH_DONE if no match was found.
+     * @stable ICU 2.0
+     */
+    virtual int32_t handlePrev(int32_t position, UErrorCode &status);
+    
+private :
+    StringSearch(); // default constructor not implemented
+
+    // private data members ----------------------------------------------
+
+    /**
+    * RuleBasedCollator, contains exactly the same UCollator * in m_strsrch_
+    * @stable ICU 2.0
+    */
+    RuleBasedCollator  m_collator_;
+    /**
+    * Pattern text
+    * @stable ICU 2.0
+    */
+    UnicodeString      m_pattern_;
+    /**
+    * String search struct data
+    * @stable ICU 2.0
+    */
+    UStringSearch     *m_strsrch_;
+
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif
+

Deleted: MacRuby/trunk/icu-1060/unicode/symtable.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/symtable.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/symtable.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,112 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (c) 2000-2005, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*   Date        Name        Description
-*   02/04/00    aliu        Creation.
-**********************************************************************
-*/
-#ifndef SYMTABLE_H
-#define SYMTABLE_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-
-/**
- * \file 
- * \brief C++ API: An interface that defines both lookup protocol and parsing of
- * symbolic names.
- */
- 
-U_NAMESPACE_BEGIN
-
-class ParsePosition;
-class UnicodeFunctor;
-class UnicodeSet;
-class UnicodeString;
-
-/**
- * An interface that defines both lookup protocol and parsing of
- * symbolic names.
- *
- * <p>A symbol table maintains two kinds of mappings.  The first is
- * between symbolic names and their values.  For example, if the
- * variable with the name "start" is set to the value "alpha"
- * (perhaps, though not necessarily, through an expression such as
- * "$start=alpha"), then the call lookup("start") will return the
- * char[] array ['a', 'l', 'p', 'h', 'a'].
- *
- * <p>The second kind of mapping is between character values and
- * UnicodeMatcher objects.  This is used by RuleBasedTransliterator,
- * which uses characters in the private use area to represent objects
- * such as UnicodeSets.  If U+E015 is mapped to the UnicodeSet [a-z],
- * then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
- *
- * <p>Finally, a symbol table defines parsing behavior for symbolic
- * names.  All symbolic names start with the SYMBOL_REF character.
- * When a parser encounters this character, it calls parseReference()
- * with the position immediately following the SYMBOL_REF.  The symbol
- * table parses the name, if there is one, and returns it.
- *
- * @stable ICU 2.8
- */
-class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ {
-public:
-
-    /**
-     * The character preceding a symbol reference name.
-     * @stable ICU 2.8
-     */
-    enum { SYMBOL_REF = 0x0024 /*$*/ };
-
-    /**
-     * Destructor.
-     * @stable ICU 2.8
-     */
-    virtual ~SymbolTable();
-
-    /**
-     * Lookup the characters associated with this string and return it.
-     * Return <tt>NULL</tt> if no such name exists.  The resultant
-     * string may have length zero.
-     * @param s the symbolic name to lookup
-     * @return a string containing the name's value, or <tt>NULL</tt> if
-     * there is no mapping for s.
-     * @stable ICU 2.8
-     */
-    virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
-
-    /**
-     * Lookup the UnicodeMatcher associated with the given character, and
-     * return it.  Return <tt>NULL</tt> if not found.
-     * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
-     * @return the UnicodeMatcher object represented by the given
-     * character, or NULL if there is no mapping for ch.
-     * @stable ICU 2.8
-     */
-    virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0;
-
-    /**
-     * Parse a symbol reference name from the given string, starting
-     * at the given position.  If no valid symbol reference name is
-     * found, return the empty string and leave pos unchanged.  That is, if the
-     * character at pos cannot start a name, or if pos is at or after
-     * text.length(), then return an empty string.  This indicates an
-     * isolated SYMBOL_REF character.
-     * @param text the text to parse for the name
-     * @param pos on entry, the index of the first character to parse.
-     * This is the character following the SYMBOL_REF character.  On
-     * exit, the index after the last parsed character.  If the parse
-     * failed, pos is unchanged on exit.
-     * @param limit the index after the last character to be parsed.
-     * @return the parsed name, or an empty string if there is no
-     * valid symbolic name at the given position.
-     * @stable ICU 2.8
-     */
-    virtual UnicodeString parseReference(const UnicodeString& text,
-                                         ParsePosition& pos, int32_t limit) const = 0;
-};
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/symtable.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/symtable.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/symtable.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/symtable.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,112 @@
+/*
+**********************************************************************
+*   Copyright (c) 2000-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   02/04/00    aliu        Creation.
+**********************************************************************
+*/
+#ifndef SYMTABLE_H
+#define SYMTABLE_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file 
+ * \brief C++ API: An interface that defines both lookup protocol and parsing of
+ * symbolic names.
+ */
+ 
+U_NAMESPACE_BEGIN
+
+class ParsePosition;
+class UnicodeFunctor;
+class UnicodeSet;
+class UnicodeString;
+
+/**
+ * An interface that defines both lookup protocol and parsing of
+ * symbolic names.
+ *
+ * <p>A symbol table maintains two kinds of mappings.  The first is
+ * between symbolic names and their values.  For example, if the
+ * variable with the name "start" is set to the value "alpha"
+ * (perhaps, though not necessarily, through an expression such as
+ * "$start=alpha"), then the call lookup("start") will return the
+ * char[] array ['a', 'l', 'p', 'h', 'a'].
+ *
+ * <p>The second kind of mapping is between character values and
+ * UnicodeMatcher objects.  This is used by RuleBasedTransliterator,
+ * which uses characters in the private use area to represent objects
+ * such as UnicodeSets.  If U+E015 is mapped to the UnicodeSet [a-z],
+ * then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
+ *
+ * <p>Finally, a symbol table defines parsing behavior for symbolic
+ * names.  All symbolic names start with the SYMBOL_REF character.
+ * When a parser encounters this character, it calls parseReference()
+ * with the position immediately following the SYMBOL_REF.  The symbol
+ * table parses the name, if there is one, and returns it.
+ *
+ * @stable ICU 2.8
+ */
+class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ {
+public:
+
+    /**
+     * The character preceding a symbol reference name.
+     * @stable ICU 2.8
+     */
+    enum { SYMBOL_REF = 0x0024 /*$*/ };
+
+    /**
+     * Destructor.
+     * @stable ICU 2.8
+     */
+    virtual ~SymbolTable();
+
+    /**
+     * Lookup the characters associated with this string and return it.
+     * Return <tt>NULL</tt> if no such name exists.  The resultant
+     * string may have length zero.
+     * @param s the symbolic name to lookup
+     * @return a string containing the name's value, or <tt>NULL</tt> if
+     * there is no mapping for s.
+     * @stable ICU 2.8
+     */
+    virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
+
+    /**
+     * Lookup the UnicodeMatcher associated with the given character, and
+     * return it.  Return <tt>NULL</tt> if not found.
+     * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
+     * @return the UnicodeMatcher object represented by the given
+     * character, or NULL if there is no mapping for ch.
+     * @stable ICU 2.8
+     */
+    virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0;
+
+    /**
+     * Parse a symbol reference name from the given string, starting
+     * at the given position.  If no valid symbol reference name is
+     * found, return the empty string and leave pos unchanged.  That is, if the
+     * character at pos cannot start a name, or if pos is at or after
+     * text.length(), then return an empty string.  This indicates an
+     * isolated SYMBOL_REF character.
+     * @param text the text to parse for the name
+     * @param pos on entry, the index of the first character to parse.
+     * This is the character following the SYMBOL_REF character.  On
+     * exit, the index after the last parsed character.  If the parse
+     * failed, pos is unchanged on exit.
+     * @param limit the index after the last character to be parsed.
+     * @return the parsed name, or an empty string if there is no
+     * valid symbolic name at the given position.
+     * @stable ICU 2.8
+     */
+    virtual UnicodeString parseReference(const UnicodeString& text,
+                                         ParsePosition& pos, int32_t limit) const = 0;
+};
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/tblcoll.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/tblcoll.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/tblcoll.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,926 +0,0 @@
-/*
-******************************************************************************
-* Copyright (C) 1996-2008, International Business Machines Corporation and
-* others. All Rights Reserved.
-******************************************************************************
-*/
-
-/**
- * \file 
- * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
- */
-
-/**
-* File tblcoll.h
-*
-* Created by: Helena Shih
-*
-* Modification History:
-*
-*  Date        Name        Description
-*  2/5/97      aliu        Added streamIn and streamOut methods.  Added
-*                          constructor which reads RuleBasedCollator object from
-*                          a binary file.  Added writeToFile method which streams
-*                          RuleBasedCollator out to a binary file.  The streamIn
-*                          and streamOut methods use istream and ostream objects
-*                          in binary mode.
-*  2/12/97     aliu        Modified to use TableCollationData sub-object to
-*                          hold invariant data.
-*  2/13/97     aliu        Moved several methods into this class from Collation.
-*                          Added a private RuleBasedCollator(Locale&) constructor,
-*                          to be used by Collator::createDefault().  General
-*                          clean up.
-*  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
-*                          constructor and getDynamicClassID.
-*  3/5/97      aliu        Modified constructFromFile() to add parameter
-*                          specifying whether or not binary loading is to be
-*                          attempted.  This is required for dynamic rule loading.
-* 05/07/97     helena      Added memory allocation error detection.
-*  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
-*                          use MergeCollation::getPattern.
-*  6/20/97     helena      Java class name change.
-*  8/18/97     helena      Added internal API documentation.
-* 09/03/97     helena      Added createCollationKeyValues().
-* 02/10/98     damiba      Added compare with "length" parameter
-* 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
-* 04/23/99     stephen     Removed EDecompositionMode, merged with
-*                          Normalizer::EMode
-* 06/14/99     stephen     Removed kResourceBundleSuffix
-* 11/02/99     helena      Collator performance enhancements.  Eliminates the
-*                          UnicodeString construction and special case for NO_OP.
-* 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
-*                          internal state management.
-* 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
-*                          to implementation file.
-* 01/29/01     synwee      Modified into a C++ wrapper which calls C API
-*                          (ucol.h)
-*/
-
-#ifndef TBLCOLL_H
-#define TBLCOLL_H
-
-#include "unicode/utypes.h"
-
- 
-#if !UCONFIG_NO_COLLATION
-
-#include "unicode/coll.h"
-#include "unicode/ucol.h"
-#include "unicode/sortkey.h"
-#include "unicode/normlzr.h"
-
-U_NAMESPACE_BEGIN
-
-/**
-* @stable ICU 2.0
-*/
-class StringSearch;
-/**
-* @stable ICU 2.0
-*/
-class CollationElementIterator;
-
-/**
- * The RuleBasedCollator class provides the simple implementation of
- * Collator, using data-driven tables. The user can create a customized
- * table-based collation.
- * <P>
- * <em>Important: </em>The ICU collation service has been reimplemented 
- * in order to achieve better performance and UCA compliance. 
- * For details, see the 
- * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
- * collation design document</a>.
- * <p>
- * RuleBasedCollator is a thin C++ wrapper over the C implementation.
- * <p>
- * For more information about the collation service see 
- * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
- * <p>
- * Collation service provides correct sorting orders for most locales supported in ICU. 
- * If specific data for a locale is not available, the orders eventually falls back
- * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 
- * <p>
- * Sort ordering may be customized by providing your own set of rules. For more on
- * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
- * Collation customization</a> section of the users guide.
- * <p>
- * Note, RuleBasedCollator is not to be subclassed.
- * @see        Collator
- * @version    2.0 11/15/2001
- */
-class U_I18N_API RuleBasedCollator : public Collator
-{
-public:
-
-  // constructor -------------------------------------------------------------
-
-    /**
-     * RuleBasedCollator constructor. This takes the table rules and builds a
-     * collation table out of them. Please see RuleBasedCollator class
-     * description for more details on the collation rule syntax.
-     * @param rules the collation rules to build the collation table from.
-     * @param status reporting a success or an error.
-     * @see Locale
-     * @stable ICU 2.0
-     */
-    RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
-
-    /**
-     * RuleBasedCollator constructor. This takes the table rules and builds a
-     * collation table out of them. Please see RuleBasedCollator class
-     * description for more details on the collation rule syntax.
-     * @param rules the collation rules to build the collation table from.
-     * @param collationStrength default strength for comparison
-     * @param status reporting a success or an error.
-     * @see Locale
-     * @stable ICU 2.0
-     */
-    RuleBasedCollator(const UnicodeString& rules,
-                       ECollationStrength collationStrength,
-                       UErrorCode& status);
-
-    /**
-     * RuleBasedCollator constructor. This takes the table rules and builds a
-     * collation table out of them. Please see RuleBasedCollator class
-     * description for more details on the collation rule syntax.
-     * @param rules the collation rules to build the collation table from.
-     * @param decompositionMode the normalisation mode
-     * @param status reporting a success or an error.
-     * @see Locale
-     * @stable ICU 2.0
-     */
-    RuleBasedCollator(const UnicodeString& rules,
-                    UColAttributeValue decompositionMode,
-                    UErrorCode& status);
-
-    /**
-     * RuleBasedCollator constructor. This takes the table rules and builds a
-     * collation table out of them. Please see RuleBasedCollator class
-     * description for more details on the collation rule syntax.
-     * @param rules the collation rules to build the collation table from.
-     * @param collationStrength default strength for comparison
-     * @param decompositionMode the normalisation mode
-     * @param status reporting a success or an error.
-     * @see Locale
-     * @stable ICU 2.0
-     */
-    RuleBasedCollator(const UnicodeString& rules,
-                    ECollationStrength collationStrength,
-                    UColAttributeValue decompositionMode,
-                    UErrorCode& status);
-
-    /**
-     * Copy constructor.
-     * @param other the RuleBasedCollator object to be copied
-     * @see Locale
-     * @stable ICU 2.0
-     */
-    RuleBasedCollator(const RuleBasedCollator& other);
-
-
-    /** Opens a collator from a collator binary image created using
-    *  cloneBinary. Binary image used in instantiation of the 
-    *  collator remains owned by the user and should stay around for 
-    *  the lifetime of the collator. The API also takes a base collator
-    *  which usualy should be UCA.
-    *  @param bin binary image owned by the user and required through the
-    *             lifetime of the collator
-    *  @param length size of the image. If negative, the API will try to
-    *                figure out the length of the image
-    *  @param base fallback collator, usually UCA. Base is required to be
-    *              present through the lifetime of the collator. Currently 
-    *              it cannot be NULL.
-    *  @param status for catching errors
-    *  @return newly created collator
-    *  @see cloneBinary
-    *  @stable ICU 3.4
-    */
-    RuleBasedCollator(const uint8_t *bin, int32_t length, 
-                    const RuleBasedCollator *base, 
-                    UErrorCode &status);
-    // destructor --------------------------------------------------------------
-
-    /**
-     * Destructor.
-     * @stable ICU 2.0
-     */
-    virtual ~RuleBasedCollator();
-
-    // public methods ----------------------------------------------------------
-
-    /**
-     * Assignment operator.
-     * @param other other RuleBasedCollator object to compare with.
-     * @stable ICU 2.0
-     */
-    RuleBasedCollator& operator=(const RuleBasedCollator& other);
-
-    /**
-     * Returns true if argument is the same as this object.
-     * @param other Collator object to be compared.
-     * @return true if arguments is the same as this object.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const Collator& other) const;
-
-    /**
-     * Returns true if argument is not the same as this object.
-     * @param other Collator object to be compared
-     * @return returns true if argument is not the same as this object.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator!=(const Collator& other) const;
-
-    /**
-     * Makes a deep copy of the object.
-     * The caller owns the returned object.
-     * @return the cloned object.
-     * @stable ICU 2.0
-     */
-    virtual Collator* clone(void) const;
-
-    /**
-     * Creates a collation element iterator for the source string. The caller of
-     * this method is responsible for the memory management of the return
-     * pointer.
-     * @param source the string over which the CollationElementIterator will
-     *        iterate.
-     * @return the collation element iterator of the source string using this as
-     *         the based Collator.
-     * @stable ICU 2.2
-     */
-    virtual CollationElementIterator* createCollationElementIterator(
-                                           const UnicodeString& source) const;
-
-    /**
-     * Creates a collation element iterator for the source. The caller of this
-     * method is responsible for the memory management of the returned pointer.
-     * @param source the CharacterIterator which produces the characters over
-     *        which the CollationElementItgerator will iterate.
-     * @return the collation element iterator of the source using this as the
-     *         based Collator.
-     * @stable ICU 2.2
-     */
-    virtual CollationElementIterator* createCollationElementIterator(
-                                         const CharacterIterator& source) const;
-
-    /**
-     * Compares a range of character data stored in two different strings based
-     * on the collation rules. Returns information about whether a string is
-     * less than, greater than or equal to another string in a language.
-     * This can be overriden in a subclass.
-     * @param source the source string.
-     * @param target the target string to be compared with the source string.
-     * @return the comparison result. GREATER if the source string is greater
-     *         than the target string, LESS if the source is less than the
-     *         target. Otherwise, returns EQUAL.
-     * @deprecated ICU 2.6 Use overload with UErrorCode&
-     */
-    virtual EComparisonResult compare(const UnicodeString& source,
-                                      const UnicodeString& target) const;
-
-
-    /**
-    * The comparison function compares the character data stored in two
-    * different strings. Returns information about whether a string is less 
-    * than, greater than or equal to another string.
-    * @param source the source string to be compared with.
-    * @param target the string that is to be compared with the source string.
-    * @param status possible error code
-    * @return Returns an enum value. UCOL_GREATER if source is greater
-    * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
-    * than target
-    * @stable ICU 2.6
-    **/
-    virtual UCollationResult compare(const UnicodeString& source,
-                                      const UnicodeString& target,
-                                      UErrorCode &status) const;
-
-    /**
-     * Compares a range of character data stored in two different strings based
-     * on the collation rules up to the specified length. Returns information
-     * about whether a string is less than, greater than or equal to another
-     * string in a language. This can be overriden in a subclass.
-     * @param source the source string.
-     * @param target the target string to be compared with the source string.
-     * @param length compares up to the specified length
-     * @return the comparison result. GREATER if the source string is greater
-     *         than the target string, LESS if the source is less than the
-     *         target. Otherwise, returns EQUAL.
-     * @deprecated ICU 2.6 Use overload with UErrorCode&
-     */
-    virtual EComparisonResult compare(const UnicodeString& source,
-                                      const UnicodeString&  target,
-                                      int32_t length) const;
-
-    /**
-    * Does the same thing as compare but limits the comparison to a specified 
-    * length
-    * @param source the source string to be compared with.
-    * @param target the string that is to be compared with the source string.
-    * @param length the length the comparison is limited to
-    * @param status possible error code
-    * @return Returns an enum value. UCOL_GREATER if source (up to the specified 
-    *         length) is greater than target; UCOL_EQUAL if source (up to specified 
-    *         length) is equal to target; UCOL_LESS if source (up to the specified 
-    *         length) is less  than target.
-    * @stable ICU 2.6
-    */
-    virtual UCollationResult compare(const UnicodeString& source,
-                                      const UnicodeString& target,
-                                      int32_t length,
-                                      UErrorCode &status) const;
-
-    /**
-     * The comparison function compares the character data stored in two
-     * different string arrays. Returns information about whether a string array
-     * is less than, greater than or equal to another string array.
-     * <p>Example of use:
-     * <pre>
-     * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
-     * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
-     * .       UErrorCode status = U_ZERO_ERROR;
-     * .       Collator *myCollation =
-     * .                         Collator::createInstance(Locale::US, status);
-     * .       if (U_FAILURE(status)) return;
-     * .       myCollation->setStrength(Collator::PRIMARY);
-     * .       // result would be Collator::EQUAL ("abc" == "ABC")
-     * .       // (no primary difference between "abc" and "ABC")
-     * .       Collator::EComparisonResult result =
-     * .                             myCollation->compare(abc, 3, ABC, 3);
-     * .       myCollation->setStrength(Collator::TERTIARY);
-     * .       // result would be Collator::LESS ("abc" &lt;&lt;&lt; "ABC")
-     * .       // (with tertiary difference between "abc" and "ABC")
-     * .       result =  myCollation->compare(abc, 3, ABC, 3);
-     * </pre>
-     * @param source the source string array to be compared with.
-     * @param sourceLength the length of the source string array. If this value
-     *        is equal to -1, the string array is null-terminated.
-     * @param target the string that is to be compared with the source string.
-     * @param targetLength the length of the target string array. If this value
-     *        is equal to -1, the string array is null-terminated.
-     * @return Returns a byte value. GREATER if source is greater than target;
-     *         EQUAL if source is equal to target; LESS if source is less than
-     *         target
-     * @deprecated ICU 2.6 Use overload with UErrorCode&
-     */
-    virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
-                                      const UChar* target, int32_t targetLength)
-                                      const;
-
-    /**
-    * The comparison function compares the character data stored in two
-    * different string arrays. Returns information about whether a string array 
-    * is less than, greater than or equal to another string array.
-    * @param source the source string array to be compared with.
-    * @param sourceLength the length of the source string array.  If this value
-    *        is equal to -1, the string array is null-terminated.
-    * @param target the string that is to be compared with the source string.
-    * @param targetLength the length of the target string array.  If this value
-    *        is equal to -1, the string array is null-terminated.
-    * @param status possible error code
-    * @return Returns an enum value. UCOL_GREATER if source is greater
-    * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
-    * than target
-    * @stable ICU 2.6
-    */
-    virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
-                                      const UChar* target, int32_t targetLength,
-                                      UErrorCode &status) const;
-
-    /**
-    * Transforms a specified region of the string into a series of characters
-    * that can be compared with CollationKey.compare. Use a CollationKey when
-    * you need to do repeated comparisions on the same string. For a single
-    * comparison the compare method will be faster.
-    * @param source the source string.
-    * @param key the transformed key of the source string.
-    * @param status the error code status.
-    * @return the transformed key.
-    * @see CollationKey
-    * @deprecated ICU 2.8 Use getSortKey(...) instead
-    */
-    virtual CollationKey& getCollationKey(const UnicodeString& source,
-                                          CollationKey& key,
-                                          UErrorCode& status) const;
-
-    /**
-    * Transforms a specified region of the string into a series of characters
-    * that can be compared with CollationKey.compare. Use a CollationKey when
-    * you need to do repeated comparisions on the same string. For a single
-    * comparison the compare method will be faster.
-    * @param source the source string.
-    * @param sourceLength the length of the source string.
-    * @param key the transformed key of the source string.
-    * @param status the error code status.
-    * @return the transformed key.
-    * @see CollationKey
-    * @deprecated ICU 2.8 Use getSortKey(...) instead
-    */
-    virtual CollationKey& getCollationKey(const UChar *source,
-                                          int32_t sourceLength,
-                                          CollationKey& key,
-                                          UErrorCode& status) const;
-
-    /**
-     * Generates the hash code for the rule-based collation object.
-     * @return the hash code.
-     * @stable ICU 2.0
-     */
-    virtual int32_t hashCode(void) const;
-
-    /**
-    * Gets the locale of the Collator
-    * @param type can be either requested, valid or actual locale. For more
-    *             information see the definition of ULocDataLocaleType in
-    *             uloc.h
-    * @param status the error code status.
-    * @return locale where the collation data lives. If the collator
-    *         was instantiated from rules, locale is empty.
-    * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
-    */
-    virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
-
-    /**
-     * Gets the table-based rules for the collation object.
-     * @return returns the collation rules that the table collation object was
-     *         created from.
-     * @stable ICU 2.0
-     */
-    const UnicodeString& getRules(void) const;
-
-    /**
-     * Gets the version information for a Collator.
-     * @param info the version # information, the result will be filled in
-     * @stable ICU 2.0
-     */
-    virtual void getVersion(UVersionInfo info) const;
-
-    /**
-     * Return the maximum length of any expansion sequences that end with the
-     * specified comparison order.
-     * @param order a collation order returned by previous or next.
-     * @return maximum size of the expansion sequences ending with the collation
-     *         element or 1 if collation element does not occur at the end of
-     *         any expansion sequence
-     * @see CollationElementIterator#getMaxExpansion
-     * @stable ICU 2.0
-     */
-    int32_t getMaxExpansion(int32_t order) const;
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
-     * method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone()
-     * methods call this method.
-     * @return The class ID for this object. All objects of a given class have
-     *         the same class ID. Objects of other classes have different class
-     *         IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-
-    /**
-     * Returns the class ID for this class. This is useful only for comparing to
-     * a return value from getDynamicClassID(). For example:
-     * <pre>
-     * Base* polymorphic_pointer = createPolymorphicObject();
-     * if (polymorphic_pointer->getDynamicClassID() ==
-     *                                          Derived::getStaticClassID()) ...
-     * </pre>
-     * @return The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns the binary format of the class's rules. The format is that of
-     * .col files.
-     * @param length Returns the length of the data, in bytes
-     * @param status the error code status.
-     * @return memory, owned by the caller, of size 'length' bytes.
-     * @stable ICU 2.2
-     */
-    uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
-
-
-    /** Creates a binary image of a collator. This binary image can be stored and 
-    *  later used to instantiate a collator using ucol_openBinary.
-    *  This API supports preflighting.
-    *  @param buffer a fill-in buffer to receive the binary image
-    *  @param capacity capacity of the destination buffer
-    *  @param status for catching errors
-    *  @return size of the image
-    *  @see ucol_openBinary
-    *  @stable ICU 3.4
-    */
-    int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
-
-    /**
-     * Returns current rules. Delta defines whether full rules are returned or
-     * just the tailoring.
-     * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
-     * @param buffer UnicodeString to store the result rules
-     * @stable ICU 2.2
-     */
-    void getRules(UColRuleOption delta, UnicodeString &buffer);
-
-    /**
-     * Universal attribute setter
-     * @param attr attribute type
-     * @param value attribute value
-     * @param status to indicate whether the operation went on smoothly or there were errors
-     * @stable ICU 2.2
-     */
-    virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
-                              UErrorCode &status);
-
-    /**
-     * Universal attribute getter.
-     * @param attr attribute type
-     * @param status to indicate whether the operation went on smoothly or there were errors
-     * @return attribute value
-     * @stable ICU 2.2
-     */
-    virtual UColAttributeValue getAttribute(UColAttribute attr,
-                                            UErrorCode &status);
-
-    /**
-     * Sets the variable top to a collation element value of a string supplied.
-     * @param varTop one or more (if contraction) UChars to which the variable top should be set
-     * @param len length of variable top string. If -1 it is considered to be zero terminated.
-     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
-     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
-     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
-     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
-     * @stable ICU 2.0
-     */
-    virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
-
-    /**
-     * Sets the variable top to a collation element value of a string supplied.
-     * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
-     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
-     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
-     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
-     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
-     * @stable ICU 2.0
-     */
-    virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status);
-
-    /**
-     * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
-     * Lower 16 bits are ignored.
-     * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
-     * @param status error code (not changed by function)
-     * @stable ICU 2.0
-     */
-    virtual void setVariableTop(const uint32_t varTop, UErrorCode &status);
-
-    /**
-     * Gets the variable top value of a Collator.
-     * Lower 16 bits are undefined and should be ignored.
-     * @param status error code (not changed by function). If error code is set, the return value is undefined.
-     * @stable ICU 2.0
-     */
-    virtual uint32_t getVariableTop(UErrorCode &status) const;
-
-    /**
-     * Get an UnicodeSet that contains all the characters and sequences tailored in 
-     * this collator.
-     * @param status      error code of the operation
-     * @return a pointer to a UnicodeSet object containing all the 
-     *         code points and sequences that may sort differently than
-     *         in the UCA. The object must be disposed of by using delete
-     * @stable ICU 2.4
-     */
-    virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
-
-    /**
-     * Thread safe cloning operation.
-     * @return pointer to the new clone, user should remove it.
-     * @stable ICU 2.2
-     */
-    virtual Collator* safeClone(void);
-
-    /**
-     * Get the sort key as an array of bytes from an UnicodeString.
-     * @param source string to be processed.
-     * @param result buffer to store result in. If NULL, number of bytes needed
-     *        will be returned.
-     * @param resultLength length of the result buffer. If if not enough the
-     *        buffer will be filled to capacity.
-     * @return Number of bytes needed for storing the sort key
-     * @stable ICU 2.0
-     */
-    virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
-                               int32_t resultLength) const;
-
-    /**
-     * Get the sort key as an array of bytes from an UChar buffer.
-     * @param source string to be processed.
-     * @param sourceLength length of string to be processed. If -1, the string
-     *        is 0 terminated and length will be decided by the function.
-     * @param result buffer to store result in. If NULL, number of bytes needed
-     *        will be returned.
-     * @param resultLength length of the result buffer. If if not enough the
-     *        buffer will be filled to capacity.
-     * @return Number of bytes needed for storing the sort key
-     * @stable ICU 2.2
-     */
-    virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
-                               uint8_t *result, int32_t resultLength) const;
-
-    /**
-    * Determines the minimum strength that will be use in comparison or
-    * transformation.
-    * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
-    * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
-    * are ignored.
-    * @return the current comparison level.
-    * @see RuleBasedCollator#setStrength
-    * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
-    */
-    virtual ECollationStrength getStrength(void) const;
-
-    /**
-    * Sets the minimum strength to be used in comparison or transformation.
-    * @see RuleBasedCollator#getStrength
-    * @param newStrength the new comparison level.
-    * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
-    */
-    virtual void setStrength(ECollationStrength newStrength);
-
-private:
-
-    // private static constants -----------------------------------------------
-
-    enum {
-        /* need look up in .commit() */
-        CHARINDEX = 0x70000000,
-        /* Expand index follows */
-        EXPANDCHARINDEX = 0x7E000000,
-        /* contract indexes follows */
-        CONTRACTCHARINDEX = 0x7F000000,
-        /* unmapped character values */
-        UNMAPPED = 0xFFFFFFFF,
-        /* primary strength increment */
-        PRIMARYORDERINCREMENT = 0x00010000,
-        /* secondary strength increment */
-        SECONDARYORDERINCREMENT = 0x00000100,
-        /* tertiary strength increment */
-        TERTIARYORDERINCREMENT = 0x00000001,
-        /* mask off anything but primary order */
-        PRIMARYORDERMASK = 0xffff0000,
-        /* mask off anything but secondary order */
-        SECONDARYORDERMASK = 0x0000ff00,
-        /* mask off anything but tertiary order */
-        TERTIARYORDERMASK = 0x000000ff,
-        /* mask off ignorable char order */
-        IGNORABLEMASK = 0x0000ffff,
-        /* use only the primary difference */
-        PRIMARYDIFFERENCEONLY = 0xffff0000,
-        /* use only the primary and secondary difference */
-        SECONDARYDIFFERENCEONLY = 0xffffff00,
-        /* primary order shift */
-        PRIMARYORDERSHIFT = 16,
-        /* secondary order shift */
-        SECONDARYORDERSHIFT = 8,
-        /* starting value for collation elements */
-        COLELEMENTSTART = 0x02020202,
-        /* testing mask for primary low element */
-        PRIMARYLOWZEROMASK = 0x00FF0000,
-        /* reseting value for secondaries and tertiaries */
-        RESETSECONDARYTERTIARY = 0x00000202,
-        /* reseting value for tertiaries */
-        RESETTERTIARY = 0x00000002,
-
-        PRIMIGNORABLE = 0x0202
-    };
-
-    // private data members ---------------------------------------------------
-
-    UBool dataIsOwned;
-
-    UBool isWriteThroughAlias;
-
-    /**
-    * c struct for collation. All initialisation for it has to be done through
-    * setUCollator().
-    */
-    UCollator *ucollator;
-
-    /**
-    * Rule UnicodeString
-    */
-    UnicodeString urulestring;
-
-    // friend classes --------------------------------------------------------
-
-    /**
-    * Used to iterate over collation elements in a character source.
-    */
-    friend class CollationElementIterator;
-
-    /**
-    * Collator ONLY needs access to RuleBasedCollator(const Locale&,
-    *                                                       UErrorCode&)
-    */
-    friend class Collator;
-
-    /**
-    * Searching over collation elements in a character source
-    */
-    friend class StringSearch;
-
-    // private constructors --------------------------------------------------
-
-    /**
-     * Default constructor
-     */
-    RuleBasedCollator();
-
-    /**
-     * RuleBasedCollator constructor. This constructor takes a locale. The
-     * only caller of this class should be Collator::createInstance(). If
-     * createInstance() happens to know that the requested locale's collation is
-     * implemented as a RuleBasedCollator, it can then call this constructor.
-     * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
-     * COLLATION TABLE. It does this by falling back to defaults.
-     * @param desiredLocale locale used
-     * @param status error code status
-     */
-    RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
-
-    /**
-     * common constructor implementation
-     *
-     * @param rules the collation rules to build the collation table from.
-     * @param collationStrength default strength for comparison
-     * @param decompositionMode the normalisation mode
-     * @param status reporting a success or an error.
-     */
-    void
-    construct(const UnicodeString& rules,
-              UColAttributeValue collationStrength,
-              UColAttributeValue decompositionMode,
-              UErrorCode& status);
-
-    // private methods -------------------------------------------------------
-
-    /**
-    * Creates the c struct for ucollator
-    * @param locale desired locale
-    * @param status error status
-    */
-    void setUCollator(const Locale& locale, UErrorCode& status);
-
-    /**
-    * Creates the c struct for ucollator
-    * @param locale desired locale name
-    * @param status error status
-    */
-    void setUCollator(const char* locale, UErrorCode& status);
-
-    /**
-    * Creates the c struct for ucollator. This used internally by StringSearch.
-    * Hence the responsibility of cleaning up the ucollator is not done by
-    * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
-    * @param collator new ucollator data
-    * @param rules corresponding collation rules
-    */
-    void setUCollator(UCollator *collator);
-
-public:
-    /**
-    * Get UCollator data struct. Used only by StringSearch & intltest.
-    * @return UCollator data struct
-    * @internal
-    */
-    const UCollator * getUCollator();
-
-protected:
-   /**
-    * Used internally by registraton to define the requested and valid locales.
-    * @param requestedLocale the requsted locale
-    * @param validLocale the valid locale
-    * @param actualLocale the actual locale
-    * @internal
-    */
-    virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
-
-private:
-
-    // if not owned and not a write through alias, copy the ucollator
-    void checkOwned(void);
-
-    // utility to init rule string used by checkOwned and construct
-    void setRuleStringFromCollator();
-
-    /**
-    * Converts C's UCollationResult to EComparisonResult
-    * @param result member of the enum UComparisonResult
-    * @return EComparisonResult equivalent of UCollationResult
-    * @deprecated ICU 2.6. We will not need it.
-    */
-    Collator::EComparisonResult getEComparisonResult(
-                                            const UCollationResult &result) const;
-
-    /**
-    * Converts C's UCollationStrength to ECollationStrength
-    * @param strength member of the enum UCollationStrength
-    * @return ECollationStrength equivalent of UCollationStrength
-    */
-    Collator::ECollationStrength getECollationStrength(
-                                        const UCollationStrength &strength) const;
-
-    /**
-    * Converts C++'s ECollationStrength to UCollationStrength
-    * @param strength member of the enum ECollationStrength
-    * @return UCollationStrength equivalent of ECollationStrength
-    */
-    UCollationStrength getUCollationStrength(
-      const Collator::ECollationStrength &strength) const;
-};
-
-// inline method implementation ---------------------------------------------
-
-inline void RuleBasedCollator::setUCollator(const Locale &locale,
-                                               UErrorCode &status)
-{
-    setUCollator(locale.getName(), status);
-}
-
-
-inline void RuleBasedCollator::setUCollator(UCollator     *collator)
-{
-
-    if (ucollator && dataIsOwned) {
-        ucol_close(ucollator);
-    }
-    ucollator   = collator;
-    dataIsOwned = FALSE;
-    isWriteThroughAlias = TRUE;
-    setRuleStringFromCollator();
-}
-
-inline const UCollator * RuleBasedCollator::getUCollator()
-{
-    return ucollator;
-}
-
-inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult(
-                                           const UCollationResult &result) const
-{
-    switch (result)
-    {
-    case UCOL_LESS :
-        return Collator::LESS;
-    case UCOL_EQUAL :
-        return Collator::EQUAL;
-    default :
-        return Collator::GREATER;
-    }
-}
-
-inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength(
-                                       const UCollationStrength &strength) const
-{
-    switch (strength)
-    {
-    case UCOL_PRIMARY :
-        return Collator::PRIMARY;
-    case UCOL_SECONDARY :
-        return Collator::SECONDARY;
-    case UCOL_TERTIARY :
-        return Collator::TERTIARY;
-    case UCOL_QUATERNARY :
-        return Collator::QUATERNARY;
-    default :
-        return Collator::IDENTICAL;
-    }
-}
-
-inline UCollationStrength RuleBasedCollator::getUCollationStrength(
-                             const Collator::ECollationStrength &strength) const
-{
-    switch (strength)
-    {
-    case Collator::PRIMARY :
-        return UCOL_PRIMARY;
-    case Collator::SECONDARY :
-        return UCOL_SECONDARY;
-    case Collator::TERTIARY :
-        return UCOL_TERTIARY;
-    case Collator::QUATERNARY :
-        return UCOL_QUATERNARY;
-    default :
-        return UCOL_IDENTICAL;
-    }
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/tblcoll.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/tblcoll.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/tblcoll.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/tblcoll.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,926 @@
+/*
+******************************************************************************
+* Copyright (C) 1996-2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*/
+
+/**
+ * \file 
+ * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
+ */
+
+/**
+* File tblcoll.h
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+*  Date        Name        Description
+*  2/5/97      aliu        Added streamIn and streamOut methods.  Added
+*                          constructor which reads RuleBasedCollator object from
+*                          a binary file.  Added writeToFile method which streams
+*                          RuleBasedCollator out to a binary file.  The streamIn
+*                          and streamOut methods use istream and ostream objects
+*                          in binary mode.
+*  2/12/97     aliu        Modified to use TableCollationData sub-object to
+*                          hold invariant data.
+*  2/13/97     aliu        Moved several methods into this class from Collation.
+*                          Added a private RuleBasedCollator(Locale&) constructor,
+*                          to be used by Collator::createDefault().  General
+*                          clean up.
+*  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
+*                          constructor and getDynamicClassID.
+*  3/5/97      aliu        Modified constructFromFile() to add parameter
+*                          specifying whether or not binary loading is to be
+*                          attempted.  This is required for dynamic rule loading.
+* 05/07/97     helena      Added memory allocation error detection.
+*  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
+*                          use MergeCollation::getPattern.
+*  6/20/97     helena      Java class name change.
+*  8/18/97     helena      Added internal API documentation.
+* 09/03/97     helena      Added createCollationKeyValues().
+* 02/10/98     damiba      Added compare with "length" parameter
+* 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
+* 04/23/99     stephen     Removed EDecompositionMode, merged with
+*                          Normalizer::EMode
+* 06/14/99     stephen     Removed kResourceBundleSuffix
+* 11/02/99     helena      Collator performance enhancements.  Eliminates the
+*                          UnicodeString construction and special case for NO_OP.
+* 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
+*                          internal state management.
+* 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
+*                          to implementation file.
+* 01/29/01     synwee      Modified into a C++ wrapper which calls C API
+*                          (ucol.h)
+*/
+
+#ifndef TBLCOLL_H
+#define TBLCOLL_H
+
+#include "unicode/utypes.h"
+
+ 
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/coll.h"
+#include "unicode/ucol.h"
+#include "unicode/sortkey.h"
+#include "unicode/normlzr.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+* @stable ICU 2.0
+*/
+class StringSearch;
+/**
+* @stable ICU 2.0
+*/
+class CollationElementIterator;
+
+/**
+ * The RuleBasedCollator class provides the simple implementation of
+ * Collator, using data-driven tables. The user can create a customized
+ * table-based collation.
+ * <P>
+ * <em>Important: </em>The ICU collation service has been reimplemented 
+ * in order to achieve better performance and UCA compliance. 
+ * For details, see the 
+ * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
+ * collation design document</a>.
+ * <p>
+ * RuleBasedCollator is a thin C++ wrapper over the C implementation.
+ * <p>
+ * For more information about the collation service see 
+ * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
+ * <p>
+ * Collation service provides correct sorting orders for most locales supported in ICU. 
+ * If specific data for a locale is not available, the orders eventually falls back
+ * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 
+ * <p>
+ * Sort ordering may be customized by providing your own set of rules. For more on
+ * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
+ * Collation customization</a> section of the users guide.
+ * <p>
+ * Note, RuleBasedCollator is not to be subclassed.
+ * @see        Collator
+ * @version    2.0 11/15/2001
+ */
+class U_I18N_API RuleBasedCollator : public Collator
+{
+public:
+
+  // constructor -------------------------------------------------------------
+
+    /**
+     * RuleBasedCollator constructor. This takes the table rules and builds a
+     * collation table out of them. Please see RuleBasedCollator class
+     * description for more details on the collation rule syntax.
+     * @param rules the collation rules to build the collation table from.
+     * @param status reporting a success or an error.
+     * @see Locale
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
+
+    /**
+     * RuleBasedCollator constructor. This takes the table rules and builds a
+     * collation table out of them. Please see RuleBasedCollator class
+     * description for more details on the collation rule syntax.
+     * @param rules the collation rules to build the collation table from.
+     * @param collationStrength default strength for comparison
+     * @param status reporting a success or an error.
+     * @see Locale
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator(const UnicodeString& rules,
+                       ECollationStrength collationStrength,
+                       UErrorCode& status);
+
+    /**
+     * RuleBasedCollator constructor. This takes the table rules and builds a
+     * collation table out of them. Please see RuleBasedCollator class
+     * description for more details on the collation rule syntax.
+     * @param rules the collation rules to build the collation table from.
+     * @param decompositionMode the normalisation mode
+     * @param status reporting a success or an error.
+     * @see Locale
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator(const UnicodeString& rules,
+                    UColAttributeValue decompositionMode,
+                    UErrorCode& status);
+
+    /**
+     * RuleBasedCollator constructor. This takes the table rules and builds a
+     * collation table out of them. Please see RuleBasedCollator class
+     * description for more details on the collation rule syntax.
+     * @param rules the collation rules to build the collation table from.
+     * @param collationStrength default strength for comparison
+     * @param decompositionMode the normalisation mode
+     * @param status reporting a success or an error.
+     * @see Locale
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator(const UnicodeString& rules,
+                    ECollationStrength collationStrength,
+                    UColAttributeValue decompositionMode,
+                    UErrorCode& status);
+
+    /**
+     * Copy constructor.
+     * @param other the RuleBasedCollator object to be copied
+     * @see Locale
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator(const RuleBasedCollator& other);
+
+
+    /** Opens a collator from a collator binary image created using
+    *  cloneBinary. Binary image used in instantiation of the 
+    *  collator remains owned by the user and should stay around for 
+    *  the lifetime of the collator. The API also takes a base collator
+    *  which usualy should be UCA.
+    *  @param bin binary image owned by the user and required through the
+    *             lifetime of the collator
+    *  @param length size of the image. If negative, the API will try to
+    *                figure out the length of the image
+    *  @param base fallback collator, usually UCA. Base is required to be
+    *              present through the lifetime of the collator. Currently 
+    *              it cannot be NULL.
+    *  @param status for catching errors
+    *  @return newly created collator
+    *  @see cloneBinary
+    *  @stable ICU 3.4
+    */
+    RuleBasedCollator(const uint8_t *bin, int32_t length, 
+                    const RuleBasedCollator *base, 
+                    UErrorCode &status);
+    // destructor --------------------------------------------------------------
+
+    /**
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~RuleBasedCollator();
+
+    // public methods ----------------------------------------------------------
+
+    /**
+     * Assignment operator.
+     * @param other other RuleBasedCollator object to compare with.
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator& operator=(const RuleBasedCollator& other);
+
+    /**
+     * Returns true if argument is the same as this object.
+     * @param other Collator object to be compared.
+     * @return true if arguments is the same as this object.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const Collator& other) const;
+
+    /**
+     * Returns true if argument is not the same as this object.
+     * @param other Collator object to be compared
+     * @return returns true if argument is not the same as this object.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator!=(const Collator& other) const;
+
+    /**
+     * Makes a deep copy of the object.
+     * The caller owns the returned object.
+     * @return the cloned object.
+     * @stable ICU 2.0
+     */
+    virtual Collator* clone(void) const;
+
+    /**
+     * Creates a collation element iterator for the source string. The caller of
+     * this method is responsible for the memory management of the return
+     * pointer.
+     * @param source the string over which the CollationElementIterator will
+     *        iterate.
+     * @return the collation element iterator of the source string using this as
+     *         the based Collator.
+     * @stable ICU 2.2
+     */
+    virtual CollationElementIterator* createCollationElementIterator(
+                                           const UnicodeString& source) const;
+
+    /**
+     * Creates a collation element iterator for the source. The caller of this
+     * method is responsible for the memory management of the returned pointer.
+     * @param source the CharacterIterator which produces the characters over
+     *        which the CollationElementItgerator will iterate.
+     * @return the collation element iterator of the source using this as the
+     *         based Collator.
+     * @stable ICU 2.2
+     */
+    virtual CollationElementIterator* createCollationElementIterator(
+                                         const CharacterIterator& source) const;
+
+    /**
+     * Compares a range of character data stored in two different strings based
+     * on the collation rules. Returns information about whether a string is
+     * less than, greater than or equal to another string in a language.
+     * This can be overriden in a subclass.
+     * @param source the source string.
+     * @param target the target string to be compared with the source string.
+     * @return the comparison result. GREATER if the source string is greater
+     *         than the target string, LESS if the source is less than the
+     *         target. Otherwise, returns EQUAL.
+     * @deprecated ICU 2.6 Use overload with UErrorCode&
+     */
+    virtual EComparisonResult compare(const UnicodeString& source,
+                                      const UnicodeString& target) const;
+
+
+    /**
+    * The comparison function compares the character data stored in two
+    * different strings. Returns information about whether a string is less 
+    * than, greater than or equal to another string.
+    * @param source the source string to be compared with.
+    * @param target the string that is to be compared with the source string.
+    * @param status possible error code
+    * @return Returns an enum value. UCOL_GREATER if source is greater
+    * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
+    * than target
+    * @stable ICU 2.6
+    **/
+    virtual UCollationResult compare(const UnicodeString& source,
+                                      const UnicodeString& target,
+                                      UErrorCode &status) const;
+
+    /**
+     * Compares a range of character data stored in two different strings based
+     * on the collation rules up to the specified length. Returns information
+     * about whether a string is less than, greater than or equal to another
+     * string in a language. This can be overriden in a subclass.
+     * @param source the source string.
+     * @param target the target string to be compared with the source string.
+     * @param length compares up to the specified length
+     * @return the comparison result. GREATER if the source string is greater
+     *         than the target string, LESS if the source is less than the
+     *         target. Otherwise, returns EQUAL.
+     * @deprecated ICU 2.6 Use overload with UErrorCode&
+     */
+    virtual EComparisonResult compare(const UnicodeString& source,
+                                      const UnicodeString&  target,
+                                      int32_t length) const;
+
+    /**
+    * Does the same thing as compare but limits the comparison to a specified 
+    * length
+    * @param source the source string to be compared with.
+    * @param target the string that is to be compared with the source string.
+    * @param length the length the comparison is limited to
+    * @param status possible error code
+    * @return Returns an enum value. UCOL_GREATER if source (up to the specified 
+    *         length) is greater than target; UCOL_EQUAL if source (up to specified 
+    *         length) is equal to target; UCOL_LESS if source (up to the specified 
+    *         length) is less  than target.
+    * @stable ICU 2.6
+    */
+    virtual UCollationResult compare(const UnicodeString& source,
+                                      const UnicodeString& target,
+                                      int32_t length,
+                                      UErrorCode &status) const;
+
+    /**
+     * The comparison function compares the character data stored in two
+     * different string arrays. Returns information about whether a string array
+     * is less than, greater than or equal to another string array.
+     * <p>Example of use:
+     * <pre>
+     * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
+     * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
+     * .       UErrorCode status = U_ZERO_ERROR;
+     * .       Collator *myCollation =
+     * .                         Collator::createInstance(Locale::US, status);
+     * .       if (U_FAILURE(status)) return;
+     * .       myCollation->setStrength(Collator::PRIMARY);
+     * .       // result would be Collator::EQUAL ("abc" == "ABC")
+     * .       // (no primary difference between "abc" and "ABC")
+     * .       Collator::EComparisonResult result =
+     * .                             myCollation->compare(abc, 3, ABC, 3);
+     * .       myCollation->setStrength(Collator::TERTIARY);
+     * .       // result would be Collator::LESS ("abc" &lt;&lt;&lt; "ABC")
+     * .       // (with tertiary difference between "abc" and "ABC")
+     * .       result =  myCollation->compare(abc, 3, ABC, 3);
+     * </pre>
+     * @param source the source string array to be compared with.
+     * @param sourceLength the length of the source string array. If this value
+     *        is equal to -1, the string array is null-terminated.
+     * @param target the string that is to be compared with the source string.
+     * @param targetLength the length of the target string array. If this value
+     *        is equal to -1, the string array is null-terminated.
+     * @return Returns a byte value. GREATER if source is greater than target;
+     *         EQUAL if source is equal to target; LESS if source is less than
+     *         target
+     * @deprecated ICU 2.6 Use overload with UErrorCode&
+     */
+    virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
+                                      const UChar* target, int32_t targetLength)
+                                      const;
+
+    /**
+    * The comparison function compares the character data stored in two
+    * different string arrays. Returns information about whether a string array 
+    * is less than, greater than or equal to another string array.
+    * @param source the source string array to be compared with.
+    * @param sourceLength the length of the source string array.  If this value
+    *        is equal to -1, the string array is null-terminated.
+    * @param target the string that is to be compared with the source string.
+    * @param targetLength the length of the target string array.  If this value
+    *        is equal to -1, the string array is null-terminated.
+    * @param status possible error code
+    * @return Returns an enum value. UCOL_GREATER if source is greater
+    * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
+    * than target
+    * @stable ICU 2.6
+    */
+    virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
+                                      const UChar* target, int32_t targetLength,
+                                      UErrorCode &status) const;
+
+    /**
+    * Transforms a specified region of the string into a series of characters
+    * that can be compared with CollationKey.compare. Use a CollationKey when
+    * you need to do repeated comparisions on the same string. For a single
+    * comparison the compare method will be faster.
+    * @param source the source string.
+    * @param key the transformed key of the source string.
+    * @param status the error code status.
+    * @return the transformed key.
+    * @see CollationKey
+    * @deprecated ICU 2.8 Use getSortKey(...) instead
+    */
+    virtual CollationKey& getCollationKey(const UnicodeString& source,
+                                          CollationKey& key,
+                                          UErrorCode& status) const;
+
+    /**
+    * Transforms a specified region of the string into a series of characters
+    * that can be compared with CollationKey.compare. Use a CollationKey when
+    * you need to do repeated comparisions on the same string. For a single
+    * comparison the compare method will be faster.
+    * @param source the source string.
+    * @param sourceLength the length of the source string.
+    * @param key the transformed key of the source string.
+    * @param status the error code status.
+    * @return the transformed key.
+    * @see CollationKey
+    * @deprecated ICU 2.8 Use getSortKey(...) instead
+    */
+    virtual CollationKey& getCollationKey(const UChar *source,
+                                          int32_t sourceLength,
+                                          CollationKey& key,
+                                          UErrorCode& status) const;
+
+    /**
+     * Generates the hash code for the rule-based collation object.
+     * @return the hash code.
+     * @stable ICU 2.0
+     */
+    virtual int32_t hashCode(void) const;
+
+    /**
+    * Gets the locale of the Collator
+    * @param type can be either requested, valid or actual locale. For more
+    *             information see the definition of ULocDataLocaleType in
+    *             uloc.h
+    * @param status the error code status.
+    * @return locale where the collation data lives. If the collator
+    *         was instantiated from rules, locale is empty.
+    * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
+    */
+    virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+
+    /**
+     * Gets the table-based rules for the collation object.
+     * @return returns the collation rules that the table collation object was
+     *         created from.
+     * @stable ICU 2.0
+     */
+    const UnicodeString& getRules(void) const;
+
+    /**
+     * Gets the version information for a Collator.
+     * @param info the version # information, the result will be filled in
+     * @stable ICU 2.0
+     */
+    virtual void getVersion(UVersionInfo info) const;
+
+    /**
+     * Return the maximum length of any expansion sequences that end with the
+     * specified comparison order.
+     * @param order a collation order returned by previous or next.
+     * @return maximum size of the expansion sequences ending with the collation
+     *         element or 1 if collation element does not occur at the end of
+     *         any expansion sequence
+     * @see CollationElementIterator#getMaxExpansion
+     * @stable ICU 2.0
+     */
+    int32_t getMaxExpansion(int32_t order) const;
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+     * method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     * @return The class ID for this object. All objects of a given class have
+     *         the same class ID. Objects of other classes have different class
+     *         IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+    /**
+     * Returns the class ID for this class. This is useful only for comparing to
+     * a return value from getDynamicClassID(). For example:
+     * <pre>
+     * Base* polymorphic_pointer = createPolymorphicObject();
+     * if (polymorphic_pointer->getDynamicClassID() ==
+     *                                          Derived::getStaticClassID()) ...
+     * </pre>
+     * @return The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns the binary format of the class's rules. The format is that of
+     * .col files.
+     * @param length Returns the length of the data, in bytes
+     * @param status the error code status.
+     * @return memory, owned by the caller, of size 'length' bytes.
+     * @stable ICU 2.2
+     */
+    uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
+
+
+    /** Creates a binary image of a collator. This binary image can be stored and 
+    *  later used to instantiate a collator using ucol_openBinary.
+    *  This API supports preflighting.
+    *  @param buffer a fill-in buffer to receive the binary image
+    *  @param capacity capacity of the destination buffer
+    *  @param status for catching errors
+    *  @return size of the image
+    *  @see ucol_openBinary
+    *  @stable ICU 3.4
+    */
+    int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
+
+    /**
+     * Returns current rules. Delta defines whether full rules are returned or
+     * just the tailoring.
+     * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
+     * @param buffer UnicodeString to store the result rules
+     * @stable ICU 2.2
+     */
+    void getRules(UColRuleOption delta, UnicodeString &buffer);
+
+    /**
+     * Universal attribute setter
+     * @param attr attribute type
+     * @param value attribute value
+     * @param status to indicate whether the operation went on smoothly or there were errors
+     * @stable ICU 2.2
+     */
+    virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
+                              UErrorCode &status);
+
+    /**
+     * Universal attribute getter.
+     * @param attr attribute type
+     * @param status to indicate whether the operation went on smoothly or there were errors
+     * @return attribute value
+     * @stable ICU 2.2
+     */
+    virtual UColAttributeValue getAttribute(UColAttribute attr,
+                                            UErrorCode &status);
+
+    /**
+     * Sets the variable top to a collation element value of a string supplied.
+     * @param varTop one or more (if contraction) UChars to which the variable top should be set
+     * @param len length of variable top string. If -1 it is considered to be zero terminated.
+     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
+     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
+     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
+     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
+     * @stable ICU 2.0
+     */
+    virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
+
+    /**
+     * Sets the variable top to a collation element value of a string supplied.
+     * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
+     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
+     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
+     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
+     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
+     * @stable ICU 2.0
+     */
+    virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status);
+
+    /**
+     * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
+     * Lower 16 bits are ignored.
+     * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
+     * @param status error code (not changed by function)
+     * @stable ICU 2.0
+     */
+    virtual void setVariableTop(const uint32_t varTop, UErrorCode &status);
+
+    /**
+     * Gets the variable top value of a Collator.
+     * Lower 16 bits are undefined and should be ignored.
+     * @param status error code (not changed by function). If error code is set, the return value is undefined.
+     * @stable ICU 2.0
+     */
+    virtual uint32_t getVariableTop(UErrorCode &status) const;
+
+    /**
+     * Get an UnicodeSet that contains all the characters and sequences tailored in 
+     * this collator.
+     * @param status      error code of the operation
+     * @return a pointer to a UnicodeSet object containing all the 
+     *         code points and sequences that may sort differently than
+     *         in the UCA. The object must be disposed of by using delete
+     * @stable ICU 2.4
+     */
+    virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
+
+    /**
+     * Thread safe cloning operation.
+     * @return pointer to the new clone, user should remove it.
+     * @stable ICU 2.2
+     */
+    virtual Collator* safeClone(void);
+
+    /**
+     * Get the sort key as an array of bytes from an UnicodeString.
+     * @param source string to be processed.
+     * @param result buffer to store result in. If NULL, number of bytes needed
+     *        will be returned.
+     * @param resultLength length of the result buffer. If if not enough the
+     *        buffer will be filled to capacity.
+     * @return Number of bytes needed for storing the sort key
+     * @stable ICU 2.0
+     */
+    virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
+                               int32_t resultLength) const;
+
+    /**
+     * Get the sort key as an array of bytes from an UChar buffer.
+     * @param source string to be processed.
+     * @param sourceLength length of string to be processed. If -1, the string
+     *        is 0 terminated and length will be decided by the function.
+     * @param result buffer to store result in. If NULL, number of bytes needed
+     *        will be returned.
+     * @param resultLength length of the result buffer. If if not enough the
+     *        buffer will be filled to capacity.
+     * @return Number of bytes needed for storing the sort key
+     * @stable ICU 2.2
+     */
+    virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
+                               uint8_t *result, int32_t resultLength) const;
+
+    /**
+    * Determines the minimum strength that will be use in comparison or
+    * transformation.
+    * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
+    * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
+    * are ignored.
+    * @return the current comparison level.
+    * @see RuleBasedCollator#setStrength
+    * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
+    */
+    virtual ECollationStrength getStrength(void) const;
+
+    /**
+    * Sets the minimum strength to be used in comparison or transformation.
+    * @see RuleBasedCollator#getStrength
+    * @param newStrength the new comparison level.
+    * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
+    */
+    virtual void setStrength(ECollationStrength newStrength);
+
+private:
+
+    // private static constants -----------------------------------------------
+
+    enum {
+        /* need look up in .commit() */
+        CHARINDEX = 0x70000000,
+        /* Expand index follows */
+        EXPANDCHARINDEX = 0x7E000000,
+        /* contract indexes follows */
+        CONTRACTCHARINDEX = 0x7F000000,
+        /* unmapped character values */
+        UNMAPPED = 0xFFFFFFFF,
+        /* primary strength increment */
+        PRIMARYORDERINCREMENT = 0x00010000,
+        /* secondary strength increment */
+        SECONDARYORDERINCREMENT = 0x00000100,
+        /* tertiary strength increment */
+        TERTIARYORDERINCREMENT = 0x00000001,
+        /* mask off anything but primary order */
+        PRIMARYORDERMASK = 0xffff0000,
+        /* mask off anything but secondary order */
+        SECONDARYORDERMASK = 0x0000ff00,
+        /* mask off anything but tertiary order */
+        TERTIARYORDERMASK = 0x000000ff,
+        /* mask off ignorable char order */
+        IGNORABLEMASK = 0x0000ffff,
+        /* use only the primary difference */
+        PRIMARYDIFFERENCEONLY = 0xffff0000,
+        /* use only the primary and secondary difference */
+        SECONDARYDIFFERENCEONLY = 0xffffff00,
+        /* primary order shift */
+        PRIMARYORDERSHIFT = 16,
+        /* secondary order shift */
+        SECONDARYORDERSHIFT = 8,
+        /* starting value for collation elements */
+        COLELEMENTSTART = 0x02020202,
+        /* testing mask for primary low element */
+        PRIMARYLOWZEROMASK = 0x00FF0000,
+        /* reseting value for secondaries and tertiaries */
+        RESETSECONDARYTERTIARY = 0x00000202,
+        /* reseting value for tertiaries */
+        RESETTERTIARY = 0x00000002,
+
+        PRIMIGNORABLE = 0x0202
+    };
+
+    // private data members ---------------------------------------------------
+
+    UBool dataIsOwned;
+
+    UBool isWriteThroughAlias;
+
+    /**
+    * c struct for collation. All initialisation for it has to be done through
+    * setUCollator().
+    */
+    UCollator *ucollator;
+
+    /**
+    * Rule UnicodeString
+    */
+    UnicodeString urulestring;
+
+    // friend classes --------------------------------------------------------
+
+    /**
+    * Used to iterate over collation elements in a character source.
+    */
+    friend class CollationElementIterator;
+
+    /**
+    * Collator ONLY needs access to RuleBasedCollator(const Locale&,
+    *                                                       UErrorCode&)
+    */
+    friend class Collator;
+
+    /**
+    * Searching over collation elements in a character source
+    */
+    friend class StringSearch;
+
+    // private constructors --------------------------------------------------
+
+    /**
+     * Default constructor
+     */
+    RuleBasedCollator();
+
+    /**
+     * RuleBasedCollator constructor. This constructor takes a locale. The
+     * only caller of this class should be Collator::createInstance(). If
+     * createInstance() happens to know that the requested locale's collation is
+     * implemented as a RuleBasedCollator, it can then call this constructor.
+     * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
+     * COLLATION TABLE. It does this by falling back to defaults.
+     * @param desiredLocale locale used
+     * @param status error code status
+     */
+    RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
+
+    /**
+     * common constructor implementation
+     *
+     * @param rules the collation rules to build the collation table from.
+     * @param collationStrength default strength for comparison
+     * @param decompositionMode the normalisation mode
+     * @param status reporting a success or an error.
+     */
+    void
+    construct(const UnicodeString& rules,
+              UColAttributeValue collationStrength,
+              UColAttributeValue decompositionMode,
+              UErrorCode& status);
+
+    // private methods -------------------------------------------------------
+
+    /**
+    * Creates the c struct for ucollator
+    * @param locale desired locale
+    * @param status error status
+    */
+    void setUCollator(const Locale& locale, UErrorCode& status);
+
+    /**
+    * Creates the c struct for ucollator
+    * @param locale desired locale name
+    * @param status error status
+    */
+    void setUCollator(const char* locale, UErrorCode& status);
+
+    /**
+    * Creates the c struct for ucollator. This used internally by StringSearch.
+    * Hence the responsibility of cleaning up the ucollator is not done by
+    * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
+    * @param collator new ucollator data
+    * @param rules corresponding collation rules
+    */
+    void setUCollator(UCollator *collator);
+
+public:
+    /**
+    * Get UCollator data struct. Used only by StringSearch & intltest.
+    * @return UCollator data struct
+    * @internal
+    */
+    const UCollator * getUCollator();
+
+protected:
+   /**
+    * Used internally by registraton to define the requested and valid locales.
+    * @param requestedLocale the requsted locale
+    * @param validLocale the valid locale
+    * @param actualLocale the actual locale
+    * @internal
+    */
+    virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
+
+private:
+
+    // if not owned and not a write through alias, copy the ucollator
+    void checkOwned(void);
+
+    // utility to init rule string used by checkOwned and construct
+    void setRuleStringFromCollator();
+
+    /**
+    * Converts C's UCollationResult to EComparisonResult
+    * @param result member of the enum UComparisonResult
+    * @return EComparisonResult equivalent of UCollationResult
+    * @deprecated ICU 2.6. We will not need it.
+    */
+    Collator::EComparisonResult getEComparisonResult(
+                                            const UCollationResult &result) const;
+
+    /**
+    * Converts C's UCollationStrength to ECollationStrength
+    * @param strength member of the enum UCollationStrength
+    * @return ECollationStrength equivalent of UCollationStrength
+    */
+    Collator::ECollationStrength getECollationStrength(
+                                        const UCollationStrength &strength) const;
+
+    /**
+    * Converts C++'s ECollationStrength to UCollationStrength
+    * @param strength member of the enum ECollationStrength
+    * @return UCollationStrength equivalent of ECollationStrength
+    */
+    UCollationStrength getUCollationStrength(
+      const Collator::ECollationStrength &strength) const;
+};
+
+// inline method implementation ---------------------------------------------
+
+inline void RuleBasedCollator::setUCollator(const Locale &locale,
+                                               UErrorCode &status)
+{
+    setUCollator(locale.getName(), status);
+}
+
+
+inline void RuleBasedCollator::setUCollator(UCollator     *collator)
+{
+
+    if (ucollator && dataIsOwned) {
+        ucol_close(ucollator);
+    }
+    ucollator   = collator;
+    dataIsOwned = FALSE;
+    isWriteThroughAlias = TRUE;
+    setRuleStringFromCollator();
+}
+
+inline const UCollator * RuleBasedCollator::getUCollator()
+{
+    return ucollator;
+}
+
+inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult(
+                                           const UCollationResult &result) const
+{
+    switch (result)
+    {
+    case UCOL_LESS :
+        return Collator::LESS;
+    case UCOL_EQUAL :
+        return Collator::EQUAL;
+    default :
+        return Collator::GREATER;
+    }
+}
+
+inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength(
+                                       const UCollationStrength &strength) const
+{
+    switch (strength)
+    {
+    case UCOL_PRIMARY :
+        return Collator::PRIMARY;
+    case UCOL_SECONDARY :
+        return Collator::SECONDARY;
+    case UCOL_TERTIARY :
+        return Collator::TERTIARY;
+    case UCOL_QUATERNARY :
+        return Collator::QUATERNARY;
+    default :
+        return Collator::IDENTICAL;
+    }
+}
+
+inline UCollationStrength RuleBasedCollator::getUCollationStrength(
+                             const Collator::ECollationStrength &strength) const
+{
+    switch (strength)
+    {
+    case Collator::PRIMARY :
+        return UCOL_PRIMARY;
+    case Collator::SECONDARY :
+        return UCOL_SECONDARY;
+    case Collator::TERTIARY :
+        return UCOL_TERTIARY;
+    case Collator::QUATERNARY :
+        return UCOL_QUATERNARY;
+    default :
+        return UCOL_IDENTICAL;
+    }
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/timezone.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/timezone.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/timezone.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,803 +0,0 @@
-/*************************************************************************
-* Copyright (c) 1997-2008, International Business Machines Corporation
-* and others. All Rights Reserved.
-**************************************************************************
-*
-* File TIMEZONE.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   04/21/97    aliu        Overhauled header.
-*   07/09/97    helena      Changed createInstance to createDefault.
-*   08/06/97    aliu        Removed dependency on internal header for Hashtable.
-*   08/10/98    stephen        Changed getDisplayName() API conventions to match
-*   08/19/98    stephen        Changed createTimeZone() to never return 0
-*   09/02/98    stephen        Sync to JDK 1.2 8/31
-*                            - Added getOffset(... monthlen ...)
-*                            - Added hasSameRules()
-*   09/15/98    stephen        Added getStaticClassID
-*   12/03/99    aliu        Moved data out of static table into icudata.dll.
-*                           Hashtable replaced by new static data structures.
-*   12/14/99    aliu        Made GMT public.
-*   08/15/01    grhoten     Made GMT private and added the getGMT() function
-**************************************************************************
-*/
-
-#ifndef TIMEZONE_H
-#define TIMEZONE_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: TimeZone object
- */
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-#include "unicode/ures.h"
-
-U_NAMESPACE_BEGIN
-
-class StringEnumeration;
-
-/**
- *
- * <code>TimeZone</code> represents a time zone offset, and also figures out daylight
- * savings.
- *
- * <p>
- * Typically, you get a <code>TimeZone</code> using <code>createDefault</code>
- * which creates a <code>TimeZone</code> based on the time zone where the program
- * is running. For example, for a program running in Japan, <code>createDefault</code>
- * creates a <code>TimeZone</code> object based on Japanese Standard Time.
- *
- * <p>
- * You can also get a <code>TimeZone</code> using <code>createTimeZone</code> along
- * with a time zone ID. For instance, the time zone ID for the Pacific
- * Standard Time zone is "PST". So, you can get a PST <code>TimeZone</code> object
- * with:
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * TimeZone *tz = TimeZone::createTimeZone("PST");
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- * You can use <code>getAvailableIDs</code> method to iterate through
- * all the supported time zone IDs. You can then choose a
- * supported ID to get a <code>TimeZone</code>.
- * If the time zone you want is not represented by one of the
- * supported IDs, then you can create a custom time zone ID with
- * the following syntax:
- *
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * GMT[+|-]hh[[:]mm]
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- *
- * For example, you might specify GMT+14:00 as a custom
- * time zone ID.  The <code>TimeZone</code> that is returned
- * when you specify a custom time zone ID does not include
- * daylight savings time.
- *
- * TimeZone is an abstract class representing a time zone.  A TimeZone is needed for
- * Calendar to produce local time for a particular time zone.  A TimeZone comprises
- * three basic pieces of information:
- * <ul>
- *    <li>A time zone offset; that, is the number of milliseconds to add or subtract
- *      from a time expressed in terms of GMT to convert it to the same time in that
- *      time zone (without taking daylight savings time into account).</li>
- *    <li>Logic necessary to take daylight savings time into account if daylight savings
- *      time is observed in that time zone (e.g., the days and hours on which daylight
- *      savings time begins and ends).</li>
- *    <li>An ID.  This is a text string that uniquely identifies the time zone.</li>
- * </ul>
- *
- * (Only the ID is actually implemented in TimeZone; subclasses of TimeZone may handle
- * daylight savings time and GMT offset in different ways.  Currently we only have one
- * TimeZone subclass: SimpleTimeZone.)
- * <P>
- * The TimeZone class contains a static list containing a TimeZone object for every
- * combination of GMT offset and daylight-savings time rules currently in use in the
- * world, each with a unique ID.  Each ID consists of a region (usually a continent or
- * ocean) and a city in that region, separated by a slash, (for example, Pacific
- * Standard Time is "America/Los_Angeles.")  Because older versions of this class used
- * three- or four-letter abbreviations instead, there is also a table that maps the older
- * abbreviations to the newer ones (for example, "PST" maps to "America/LosAngeles").
- * Anywhere the API requires an ID, you can use either form.
- * <P>
- * To create a new TimeZone, you call the factory function TimeZone::createTimeZone()
- * and pass it a time zone ID.  You can use the createEnumeration() function to
- * obtain a list of all the time zone IDs recognized by createTimeZone().
- * <P>
- * You can also use TimeZone::createDefault() to create a TimeZone.  This function uses
- * platform-specific APIs to produce a TimeZone for the time zone corresponding to
- * the client's computer's physical location.  For example, if you're in Japan (assuming
- * your machine is set up correctly), TimeZone::createDefault() will return a TimeZone
- * for Japanese Standard Time ("Asia/Tokyo").
- */
-class U_I18N_API TimeZone : public UObject {
-public:
-    /**
-     * @stable ICU 2.0
-     */
-    virtual ~TimeZone();
-
-    /**
-     * The GMT time zone has a raw offset of zero and does not use daylight
-     * savings time. This is a commonly used time zone.
-     * @return the GMT time zone.
-     * @stable ICU 2.0
-     */
-    static const TimeZone* U_EXPORT2 getGMT(void);
-
-    /**
-     * Creates a <code>TimeZone</code> for the given ID.
-     * @param ID the ID for a <code>TimeZone</code>, either an abbreviation such as
-     * "PST", a full name such as "America/Los_Angeles", or a custom ID
-     * such as "GMT-8:00".
-     * @return the specified <code>TimeZone</code>, or the GMT zone if the given ID
-     * cannot be understood.  Return result guaranteed to be non-null.  If you
-     * require that the specific zone asked for be returned, check the ID of the
-     * return result.
-     * @stable ICU 2.0
-     */
-    static TimeZone* U_EXPORT2 createTimeZone(const UnicodeString& ID);
-
-    /**
-     * Returns an enumeration over all recognized time zone IDs. (i.e.,
-     * all strings that createTimeZone() accepts)
-     *
-     * @return an enumeration object, owned by the caller.
-     * @stable ICU 2.4
-     */
-    static StringEnumeration* U_EXPORT2 createEnumeration();
-
-    /**
-     * Returns an enumeration over time zone IDs with a given raw
-     * offset from GMT.  There may be several times zones with the
-     * same GMT offset that differ in the way they handle daylight
-     * savings time.  For example, the state of Arizona doesn't
-     * observe daylight savings time.  If you ask for the time zone
-     * IDs corresponding to GMT-7:00, you'll get back an enumeration
-     * over two time zone IDs: "America/Denver," which corresponds to
-     * Mountain Standard Time in the winter and Mountain Daylight Time
-     * in the summer, and "America/Phoenix", which corresponds to
-     * Mountain Standard Time year-round, even in the summer.
-     *
-     * @param rawOffset an offset from GMT in milliseconds, ignoring
-     * the effect of daylight savings time, if any
-     * @return an enumeration object, owned by the caller
-     * @stable ICU 2.4
-     */
-    static StringEnumeration* U_EXPORT2 createEnumeration(int32_t rawOffset);
-
-    /**
-     * Returns an enumeration over time zone IDs associated with the
-     * given country.  Some zones are affiliated with no country
-     * (e.g., "UTC"); these may also be retrieved, as a group.
-     *
-     * @param country The ISO 3166 two-letter country code, or NULL to
-     * retrieve zones not affiliated with any country.
-     * @return an enumeration object, owned by the caller
-     * @stable ICU 2.4
-     */
-    static StringEnumeration* U_EXPORT2 createEnumeration(const char* country);
-
-#ifdef U_USE_TIMEZONE_OBSOLETE_2_8
-    /**
-     * Returns a list of time zone IDs, one for each time zone with a given GMT offset.
-     * The return value is a list because there may be several times zones with the same
-     * GMT offset that differ in the way they handle daylight savings time.  For example,
-     * the state of Arizona doesn't observe Daylight Savings time.  So if you ask for
-     * the time zone IDs corresponding to GMT-7:00, you'll get back two time zone IDs:
-     * "America/Denver," which corresponds to Mountain Standard Time in the winter and
-     * Mountain Daylight Time in the summer, and "America/Phoenix", which corresponds to
-     * Mountain Standard Time year-round, even in the summer.
-     * <P>
-     * The caller owns the list that is returned, but does not own the strings contained
-     * in that list.  Delete the array with uprv_free(), but DON'T delete the elements in the array.
-     *
-     * <p>NOTE: uprv_free() is declared in the private header source/common/cmemory.h.
-     *
-     * @param rawOffset  An offset from GMT in milliseconds.
-     * @param numIDs     Receives the number of items in the array that is returned.
-     * @return           An array of UnicodeString pointers, where each UnicodeString is
-     *                   a time zone ID for a time zone with the given GMT offset.  If
-     *                   there is no timezone that matches the GMT offset
-     *                   specified, NULL is returned.
-     * @obsolete ICU 2.8.  Use createEnumeration(int32_t) instead since this API will be removed in that release.
-     */
-    static const UnicodeString** createAvailableIDs(int32_t rawOffset, int32_t& numIDs);
-
-    /**
-     * Returns a list of time zone IDs associated with the given
-     * country.  Some zones are affiliated with no country (e.g.,
-     * "UTC"); these may also be retrieved, as a group.
-     *
-     * <P>The caller owns the list that is returned, but does not own
-     * the strings contained in that list.  Delete the array with uprv_free(), but
-     * <b>DON'T</b> delete the elements in the array.
-     *
-     * <p>NOTE: uprv_free() is declared in the private header source/common/cmemory.h.
-     *
-     * @param country The ISO 3166 two-letter country code, or NULL to
-     * retrieve zones not affiliated with any country.
-     * @param numIDs Receives the number of items in the array that is
-     * returned.
-     * @return An array of UnicodeString pointers, where each
-     * UnicodeString is a time zone ID for a time zone with the given
-     * country.  If there is no timezone that matches the country
-     * specified, NULL is returned.
-     * @obsolete ICU 2.8.  Use createEnumeration(const char*) instead since this API will be removed in that release.
-     */
-    static const UnicodeString** createAvailableIDs(const char* country,
-                                                          int32_t& numIDs);
-
-    /**
-     * Returns a list of all time zone IDs supported by the TimeZone class (i.e., all
-     * IDs that it's legal to pass to createTimeZone()).  The caller owns the list that
-     * is returned, but does not own the strings contained in that list.  Delete the array with uprv_free(),
-     * but DON'T delete the elements in the array.
-     *
-     * <p>NOTE: uprv_free() is declared in the private header source/common/cmemory.h.
-     *
-     * @param numIDs  Receives the number of zone IDs returned.
-     * @return        An array of UnicodeString pointers, where each is a time zone ID
-     *                supported by the TimeZone class.
-     * @obsolete ICU 2.8.  Use createEnumeration(void) instead since this API will be removed in that release.
-     */
-    static const UnicodeString** createAvailableIDs(int32_t& numIDs);
-#endif
-
-    /**
-     * Returns the number of IDs in the equivalency group that
-     * includes the given ID.  An equivalency group contains zones
-     * that have the same GMT offset and rules.
-     *
-     * <p>The returned count includes the given ID; it is always >= 1.
-     * The given ID must be a system time zone.  If it is not, returns
-     * zero.
-     * @param id a system time zone ID
-     * @return the number of zones in the equivalency group containing
-     * 'id', or zero if 'id' is not a valid system ID
-     * @see #getEquivalentID
-     * @stable ICU 2.0
-     */
-    static int32_t U_EXPORT2 countEquivalentIDs(const UnicodeString& id);
-
-    /**
-     * Returns an ID in the equivalency group that
-     * includes the given ID.  An equivalency group contains zones
-     * that have the same GMT offset and rules.
-     *
-     * <p>The given index must be in the range 0..n-1, where n is the
-     * value returned by <code>countEquivalentIDs(id)</code>.  For
-     * some value of 'index', the returned value will be equal to the
-     * given id.  If the given id is not a valid system time zone, or
-     * if 'index' is out of range, then returns an empty string.
-     * @param id a system time zone ID
-     * @param index a value from 0 to n-1, where n is the value
-     * returned by <code>countEquivalentIDs(id)</code>
-     * @return the ID of the index-th zone in the equivalency group
-     * containing 'id', or an empty string if 'id' is not a valid
-     * system ID or 'index' is out of range
-     * @see #countEquivalentIDs
-     * @stable ICU 2.0
-     */
-    static const UnicodeString U_EXPORT2 getEquivalentID(const UnicodeString& id,
-                                               int32_t index);
-
-    /**
-     * Creates a new copy of the default TimeZone for this host. Unless the default time
-     * zone has already been set using adoptDefault() or setDefault(), the default is
-     * determined by querying the system using methods in TPlatformUtilities. If the
-     * system routines fail, or if they specify a TimeZone or TimeZone offset which is not
-     * recognized, the TimeZone indicated by the ID kLastResortID is instantiated
-     * and made the default.
-     *
-     * @return   A default TimeZone. Clients are responsible for deleting the time zone
-     *           object returned.
-     * @stable ICU 2.0
-     */
-    static TimeZone* U_EXPORT2 createDefault(void);
-
-    /**
-     * Sets the default time zone (i.e., what's returned by createDefault()) to be the
-     * specified time zone.  If NULL is specified for the time zone, the default time
-     * zone is set to the default host time zone.  This call adopts the TimeZone object
-     * passed in; the clent is no longer responsible for deleting it.
-     *
-     * @param zone  A pointer to the new TimeZone object to use as the default.
-     * @stable ICU 2.0
-     */
-    static void U_EXPORT2 adoptDefault(TimeZone* zone);
-
-    /**
-     * Same as adoptDefault(), except that the TimeZone object passed in is NOT adopted;
-     * the caller remains responsible for deleting it.
-     *
-     * @param zone  The given timezone.
-     * @system
-     */
-    static void U_EXPORT2 setDefault(const TimeZone& zone);
-
-    /**
-     * Returns the timezone data version currently used by ICU.
-     * @param status Output param to filled in with a success or an error.
-     * @return the version string, such as "2007f"
-     * @stable ICU 4.0
-     */
-    static const char* U_EXPORT2 getTZDataVersion(UErrorCode& status);
-
-    /**
-     * Returns the canonical system timezone ID or the normalized
-     * custom time zone ID for the given time zone ID.
-     * @param id            The input timezone ID to be canonicalized.
-     * @param canonicalID   Receives the canonical system timezone ID
-     *                      or the custom timezone ID in normalized format.
-     * @param status        Recevies the status.  When the given timezone ID
-     *                      is neither a known system time zone ID nor a
-     *                      valid custom timezone ID, U_ILLEGAL_ARGUMENT_ERROR
-     *                      is set.
-     * @return A reference to the result.
-     * @draft ICU 4.0
-     */
-    static UnicodeString& U_EXPORT2 getCanonicalID(const UnicodeString& id,
-        UnicodeString& canonicalID, UErrorCode& status);
-
-    /**
-     * Returns the canonical system timezone ID or the normalized
-     * custom time zone ID for the given time zone ID.
-     * @param id            The input timezone ID to be canonicalized.
-     * @param canonicalID   Receives the canonical system timezone ID
-     *                      or the custom timezone ID in normalized format.
-     * @param isSystemID    Receives if the given ID is a known system
-     *                      timezone ID.
-     * @param status        Recevies the status.  When the given timezone ID
-     *                      is neither a known system time zone ID nor a
-     *                      valid custom timezone ID, U_ILLEGAL_ARGUMENT_ERROR
-     *                      is set.
-     * @return A reference to the result.
-     * @draft ICU 4.0
-     */
-    static UnicodeString& U_EXPORT2 getCanonicalID(const UnicodeString& id,
-        UnicodeString& canonicalID, UBool& isSystemID, UErrorCode& status);
-
-    /**
-     * Returns true if the two TimeZones are equal.  (The TimeZone version only compares
-     * IDs, but subclasses are expected to also compare the fields they add.)
-     *
-     * @param that  The TimeZone object to be compared with.
-     * @return      True if the given TimeZone is equal to this TimeZone; false
-     *              otherwise.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const TimeZone& that) const;
-
-    /**
-     * Returns true if the two TimeZones are NOT equal; that is, if operator==() returns
-     * false.
-     *
-     * @param that  The TimeZone object to be compared with.
-     * @return      True if the given TimeZone is not equal to this TimeZone; false
-     *              otherwise.
-     * @stable ICU 2.0
-     */
-    UBool operator!=(const TimeZone& that) const {return !operator==(that);}
-
-    /**
-     * Returns the TimeZone's adjusted GMT offset (i.e., the number of milliseconds to add
-     * to GMT to get local time in this time zone, taking daylight savings time into
-     * account) as of a particular reference date.  The reference date is used to determine
-     * whether daylight savings time is in effect and needs to be figured into the offset
-     * that is returned (in other words, what is the adjusted GMT offset in this time zone
-     * at this particular date and time?).  For the time zones produced by createTimeZone(),
-     * the reference data is specified according to the Gregorian calendar, and the date
-     * and time fields are local standard time.
-     *
-     * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload,
-     * which returns both the raw and the DST offset for a given time. This method
-     * is retained only for backward compatibility.
-     *
-     * @param era        The reference date's era
-     * @param year       The reference date's year
-     * @param month      The reference date's month (0-based; 0 is January)
-     * @param day        The reference date's day-in-month (1-based)
-     * @param dayOfWeek  The reference date's day-of-week (1-based; 1 is Sunday)
-     * @param millis     The reference date's milliseconds in day, local standard time
-     * @param status     Output param to filled in with a success or an error.
-     * @return           The offset in milliseconds to add to GMT to get local time.
-     * @stable ICU 2.0
-     */
-    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
-                              uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const = 0;
-
-    /**
-     * Gets the time zone offset, for current date, modified in case of
-     * daylight savings. This is the offset to add *to* UTC to get local time.
-     *
-     * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload,
-     * which returns both the raw and the DST offset for a given time. This method
-     * is retained only for backward compatibility.
-     *
-     * @param era the era of the given date.
-     * @param year the year in the given date.
-     * @param month the month in the given date.
-     * Month is 0-based. e.g., 0 for January.
-     * @param day the day-in-month of the given date.
-     * @param dayOfWeek the day-of-week of the given date.
-     * @param milliseconds the millis in day in <em>standard</em> local time.
-     * @param monthLength the length of the given month in days.
-     * @param status     Output param to filled in with a success or an error.
-     * @return the offset to add *to* GMT to get local time.
-     * @stable ICU 2.0
-     */
-    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
-                           uint8_t dayOfWeek, int32_t milliseconds,
-                           int32_t monthLength, UErrorCode& status) const = 0;
-
-    /**
-     * Returns the time zone raw and GMT offset for the given moment
-     * in time.  Upon return, local-millis = GMT-millis + rawOffset +
-     * dstOffset.  All computations are performed in the proleptic
-     * Gregorian calendar.  The default implementation in the TimeZone
-     * class delegates to the 8-argument getOffset().
-     *
-     * @param date moment in time for which to return offsets, in
-     * units of milliseconds from January 1, 1970 0:00 GMT, either GMT
-     * time or local wall time, depending on `local'.
-     * @param local if true, `date' is local wall time; otherwise it
-     * is in GMT time.
-     * @param rawOffset output parameter to receive the raw offset, that
-     * is, the offset not including DST adjustments
-     * @param dstOffset output parameter to receive the DST offset,
-     * that is, the offset to be added to `rawOffset' to obtain the
-     * total offset between local and GMT time. If DST is not in
-     * effect, this value is zero; otherwise it is a positive value,
-     * typically one hour.
-     * @param ec input-output error code
-     *
-     * @stable ICU 2.8
-     */
-    virtual void getOffset(UDate date, UBool local, int32_t& rawOffset,
-                           int32_t& dstOffset, UErrorCode& ec) const;
-
-    /**
-     * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
-     * to GMT to get local time, before taking daylight savings time into account).
-     *
-     * @param offsetMillis  The new raw GMT offset for this time zone.
-     * @stable ICU 2.0
-     */
-    virtual void setRawOffset(int32_t offsetMillis) = 0;
-
-    /**
-     * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
-     * to GMT to get local time, before taking daylight savings time into account).
-     *
-     * @return   The TimeZone's raw GMT offset.
-     * @stable ICU 2.0
-     */
-    virtual int32_t getRawOffset(void) const = 0;
-
-    /**
-     * Fills in "ID" with the TimeZone's ID.
-     *
-     * @param ID  Receives this TimeZone's ID.
-     * @return    A reference to 'ID'
-     * @stable ICU 2.0
-     */
-    UnicodeString& getID(UnicodeString& ID) const;
-
-    /**
-     * Sets the TimeZone's ID to the specified value.  This doesn't affect any other
-     * fields (for example, if you say<
-     * blockquote><pre>
-     * .     TimeZone* foo = TimeZone::createTimeZone("America/New_York");
-     * .     foo.setID("America/Los_Angeles");
-     * </pre>\htmlonly</blockquote>\endhtmlonly
-     * the time zone's GMT offset and daylight-savings rules don't change to those for
-     * Los Angeles.  They're still those for New York.  Only the ID has changed.)
-     *
-     * @param ID  The new timezone ID.
-     * @stable ICU 2.0
-     */
-    void setID(const UnicodeString& ID);
-
-    /**
-     * Enum for use with getDisplayName
-     * @stable ICU 2.4
-     */
-    enum EDisplayType {
-        /**
-         * Selector for short display name
-         * @stable ICU 2.4
-         */
-        SHORT = 1,
-        /**
-         * Selector for long display name
-         * @stable ICU 2.4
-         */
-        LONG
-    };
-
-    /**
-     * Returns a name of this time zone suitable for presentation to the user
-     * in the default locale.
-     * This method returns the long name, not including daylight savings.
-     * If the display name is not available for the locale,
-     * then this method returns a string in the format
-     * <code>GMT[+-]hh:mm</code>.
-     * @param result the human-readable name of this time zone in the default locale.
-     * @return       A reference to 'result'.
-     * @stable ICU 2.0
-     */
-    UnicodeString& getDisplayName(UnicodeString& result) const;
-
-    /**
-     * Returns a name of this time zone suitable for presentation to the user
-     * in the specified locale.
-     * This method returns the long name, not including daylight savings.
-     * If the display name is not available for the locale,
-     * then this method returns a string in the format
-     * <code>GMT[+-]hh:mm</code>.
-     * @param locale the locale in which to supply the display name.
-     * @param result the human-readable name of this time zone in the given locale
-     *               or in the default locale if the given locale is not recognized.
-     * @return       A reference to 'result'.
-     * @stable ICU 2.0
-     */
-    UnicodeString& getDisplayName(const Locale& locale, UnicodeString& result) const;
-
-    /**
-     * Returns a name of this time zone suitable for presentation to the user
-     * in the default locale.
-     * If the display name is not available for the locale,
-     * then this method returns a string in the format
-     * <code>GMT[+-]hh:mm</code>.
-     * @param daylight if true, return the daylight savings name.
-     * @param style either <code>LONG</code> or <code>SHORT</code>
-     * @param result the human-readable name of this time zone in the default locale.
-     * @return       A reference to 'result'.
-     * @stable ICU 2.0
-     */
-    UnicodeString& getDisplayName(UBool daylight, EDisplayType style, UnicodeString& result) const;
-
-    /**
-     * Returns a name of this time zone suitable for presentation to the user
-     * in the specified locale.
-     * If the display name is not available for the locale,
-     * then this method returns a string in the format
-     * <code>GMT[+-]hh:mm</code>.
-     * @param daylight if true, return the daylight savings name.
-     * @param style either <code>LONG</code> or <code>SHORT</code>
-     * @param locale the locale in which to supply the display name.
-     * @param result the human-readable name of this time zone in the given locale
-     *               or in the default locale if the given locale is not recognized.
-     * @return       A refence to 'result'.
-     * @stable ICU 2.0
-     */
-    UnicodeString& getDisplayName(UBool daylight, EDisplayType style, const Locale& locale, UnicodeString& result) const;
-
-    /**
-     * Queries if this time zone uses daylight savings time.
-     * @return true if this time zone uses daylight savings time,
-     * false, otherwise.
-     * @stable ICU 2.0
-     */
-    virtual UBool useDaylightTime(void) const = 0;
-
-    /**
-     * Queries if the given date is in daylight savings time in
-     * this time zone.
-     * This method is wasteful since it creates a new GregorianCalendar and
-     * deletes it each time it is called. This is a deprecated method
-     * and provided only for Java compatibility.
-     *
-     * @param date the given UDate.
-     * @param status Output param filled in with success/error code.
-     * @return true if the given date is in daylight savings time,
-     * false, otherwise.
-     * @deprecated ICU 2.4. Use Calendar::inDaylightTime() instead.
-     */
-    virtual UBool inDaylightTime(UDate date, UErrorCode& status) const = 0;
-
-    /**
-     * Returns true if this zone has the same rule and offset as another zone.
-     * That is, if this zone differs only in ID, if at all.
-     * @param other the <code>TimeZone</code> object to be compared with
-     * @return true if the given zone is the same as this one,
-     * with the possible exception of the ID
-     * @stable ICU 2.0
-     */
-    virtual UBool hasSameRules(const TimeZone& other) const;
-
-    /**
-     * Clones TimeZone objects polymorphically. Clients are responsible for deleting
-     * the TimeZone object cloned.
-     *
-     * @return   A new copy of this TimeZone object.
-     * @stable ICU 2.0
-     */
-    virtual TimeZone* clone(void) const = 0;
-
-    /**
-     * Return the class ID for this class.  This is useful only for
-     * comparing to a return value from getDynamicClassID().
-     * @return The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. This method is to
-     * implement a simple version of RTTI, since not all C++ compilers support genuine
-     * RTTI. Polymorphic operator==() and clone() methods call this method.
-     * <P>
-     * Concrete subclasses of TimeZone must use the UOBJECT_DEFINE_RTTI_IMPLEMENTATION
-     *  macro from uobject.h in their implementation to provide correct RTTI information.
-     * @return   The class ID for this object. All objects of a given class have the
-     *           same class ID. Objects of other classes have different class IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const = 0;
-    
-    /**
-     * Returns the amount of time to be added to local standard time
-     * to get local wall clock time.
-     * <p>
-     * The default implementation always returns 3600000 milliseconds
-     * (i.e., one hour) if this time zone observes Daylight Saving
-     * Time. Otherwise, 0 (zero) is returned.
-     * <p>
-     * If an underlying TimeZone implementation subclass supports
-     * historical Daylight Saving Time changes, this method returns
-     * the known latest daylight saving value.
-     *
-     * @return the amount of saving time in milliseconds
-     * @stable ICU 3.6
-     */
-    virtual int32_t getDSTSavings() const;
-
-protected:
-
-    /**
-     * Default constructor.  ID is initialized to the empty string.
-     * @stable ICU 2.0
-     */
-    TimeZone();
-
-    /**
-     * Construct a timezone with a given ID.
-     * @param id a system time zone ID
-     * @stable ICU 2.0
-     */
-    TimeZone(const UnicodeString &id);
-
-    /**
-     * Copy constructor.
-     * @param source the object to be copied.
-     * @stable ICU 2.0
-     */
-    TimeZone(const TimeZone& source);
-
-    /**
-     * Default assignment operator.
-     * @param right the object to be copied.
-     * @stable ICU 2.0
-     */
-    TimeZone& operator=(const TimeZone& right);
-
-    /**
-     * Utility function. For internally loading rule data.
-     * @param top Top resource bundle for tz data
-     * @param ruleid ID of rule to load
-     * @param oldbundle Old bundle to reuse or NULL
-     * @param status Status parameter
-     * @return either a new bundle or *oldbundle
-     * @internal
-     */
-    static UResourceBundle* loadRule(const UResourceBundle* top, const UnicodeString& ruleid, UResourceBundle* oldbundle, UErrorCode&status);
-
-private:
-    friend class ZoneMeta;
-
-
-    static TimeZone*        createCustomTimeZone(const UnicodeString&); // Creates a time zone based on the string.
-
-    /**
-     * Resolve a link in Olson tzdata.  When the given id is known and it's not a link,
-     * the id itself is returned.  When the given id is known and it is a link, then
-     * dereferenced zone id is returned.  When the given id is unknown, then it returns
-     * empty string.
-     * @param linkTo Input zone id string
-     * @param linkFrom Receives the dereferenced zone id string
-     * @return The reference to the result (linkFrom)
-     */
-    static UnicodeString& dereferOlsonLink(const UnicodeString& linkTo, UnicodeString& linkFrom);
-
-    /**
-     * Parses the given custom time zone identifier
-     * @param id id A string of the form GMT[+-]hh:mm, GMT[+-]hhmm, or
-     * GMT[+-]hh.
-     * @param sign Receves parsed sign, 1 for positive, -1 for negative.
-     * @param hour Receives parsed hour field
-     * @param minute Receives parsed minute field
-     * @param second Receives parsed second field
-     * @return Returns TRUE when the given custom id is valid.
-     */
-    static UBool parseCustomID(const UnicodeString& id, int32_t& sign, int32_t& hour,
-        int32_t& min, int32_t& sec);
-
-    /**
-     * Parse a custom time zone identifier and return the normalized
-     * custom time zone identifier for the given custom id string.
-     * @param id a string of the form GMT[+-]hh:mm, GMT[+-]hhmm, or
-     * GMT[+-]hh.
-     * @param normalized Receives the normalized custom ID
-     * @param status Receives the status.  When the input ID string is invalid,
-     * U_ILLEGAL_ARGUMENT_ERROR is set.
-     * @return The normalized custom id string.
-    */
-    static UnicodeString& getCustomID(const UnicodeString& id, UnicodeString& normalized,
-        UErrorCode& status);
-
-    /**
-     * Returns the normalized custome timezone ID for the given offset fields.
-     * @param hour offset hours
-     * @param min offset minutes
-     * @param sec offset seconds
-     * @param netative sign of the offset, TRUE for negative offset.
-     * @param id Receves the format result (normalized custom ID)
-     * @return The reference to id
-     */
-    static UnicodeString& formatCustomID(int32_t hour, int32_t min, int32_t sec,
-        UBool negative, UnicodeString& id);
-
-    /**
-     * Responsible for setting up DEFAULT_ZONE.  Uses routines in TPlatformUtilities
-     * (i.e., platform-specific calls) to get the current system time zone.  Failing
-     * that, uses the platform-specific default time zone.  Failing that, uses GMT.
-     */
-    static void             initDefault(void);
-
-    // See source file for documentation
-    /**
-     * Lookup the given name in our system zone table.  If found,
-     * instantiate a new zone of that name and return it.  If not
-     * found, return 0.
-     * @param name tthe given name of a system time zone.
-     * @return the timezone indicated by the 'name'.
-     */
-    static TimeZone*        createSystemTimeZone(const UnicodeString& name);
-
-    UnicodeString           fID;    // this time zone's ID
-};
-
-
-// -------------------------------------
-
-inline UnicodeString&
-TimeZone::getID(UnicodeString& ID) const
-{
-    ID = fID;
-    return ID;
-}
-
-// -------------------------------------
-
-inline void
-TimeZone::setID(const UnicodeString& ID)
-{
-    fID = ID;
-}
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif //_TIMEZONE
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/timezone.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/timezone.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/timezone.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/timezone.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,803 @@
+/*************************************************************************
+* Copyright (c) 1997-2008, International Business Machines Corporation
+* and others. All Rights Reserved.
+**************************************************************************
+*
+* File TIMEZONE.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   04/21/97    aliu        Overhauled header.
+*   07/09/97    helena      Changed createInstance to createDefault.
+*   08/06/97    aliu        Removed dependency on internal header for Hashtable.
+*   08/10/98    stephen        Changed getDisplayName() API conventions to match
+*   08/19/98    stephen        Changed createTimeZone() to never return 0
+*   09/02/98    stephen        Sync to JDK 1.2 8/31
+*                            - Added getOffset(... monthlen ...)
+*                            - Added hasSameRules()
+*   09/15/98    stephen        Added getStaticClassID
+*   12/03/99    aliu        Moved data out of static table into icudata.dll.
+*                           Hashtable replaced by new static data structures.
+*   12/14/99    aliu        Made GMT public.
+*   08/15/01    grhoten     Made GMT private and added the getGMT() function
+**************************************************************************
+*/
+
+#ifndef TIMEZONE_H
+#define TIMEZONE_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: TimeZone object
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/ures.h"
+
+U_NAMESPACE_BEGIN
+
+class StringEnumeration;
+
+/**
+ *
+ * <code>TimeZone</code> represents a time zone offset, and also figures out daylight
+ * savings.
+ *
+ * <p>
+ * Typically, you get a <code>TimeZone</code> using <code>createDefault</code>
+ * which creates a <code>TimeZone</code> based on the time zone where the program
+ * is running. For example, for a program running in Japan, <code>createDefault</code>
+ * creates a <code>TimeZone</code> object based on Japanese Standard Time.
+ *
+ * <p>
+ * You can also get a <code>TimeZone</code> using <code>createTimeZone</code> along
+ * with a time zone ID. For instance, the time zone ID for the Pacific
+ * Standard Time zone is "PST". So, you can get a PST <code>TimeZone</code> object
+ * with:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * TimeZone *tz = TimeZone::createTimeZone("PST");
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * You can use <code>getAvailableIDs</code> method to iterate through
+ * all the supported time zone IDs. You can then choose a
+ * supported ID to get a <code>TimeZone</code>.
+ * If the time zone you want is not represented by one of the
+ * supported IDs, then you can create a custom time zone ID with
+ * the following syntax:
+ *
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * GMT[+|-]hh[[:]mm]
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * For example, you might specify GMT+14:00 as a custom
+ * time zone ID.  The <code>TimeZone</code> that is returned
+ * when you specify a custom time zone ID does not include
+ * daylight savings time.
+ *
+ * TimeZone is an abstract class representing a time zone.  A TimeZone is needed for
+ * Calendar to produce local time for a particular time zone.  A TimeZone comprises
+ * three basic pieces of information:
+ * <ul>
+ *    <li>A time zone offset; that, is the number of milliseconds to add or subtract
+ *      from a time expressed in terms of GMT to convert it to the same time in that
+ *      time zone (without taking daylight savings time into account).</li>
+ *    <li>Logic necessary to take daylight savings time into account if daylight savings
+ *      time is observed in that time zone (e.g., the days and hours on which daylight
+ *      savings time begins and ends).</li>
+ *    <li>An ID.  This is a text string that uniquely identifies the time zone.</li>
+ * </ul>
+ *
+ * (Only the ID is actually implemented in TimeZone; subclasses of TimeZone may handle
+ * daylight savings time and GMT offset in different ways.  Currently we only have one
+ * TimeZone subclass: SimpleTimeZone.)
+ * <P>
+ * The TimeZone class contains a static list containing a TimeZone object for every
+ * combination of GMT offset and daylight-savings time rules currently in use in the
+ * world, each with a unique ID.  Each ID consists of a region (usually a continent or
+ * ocean) and a city in that region, separated by a slash, (for example, Pacific
+ * Standard Time is "America/Los_Angeles.")  Because older versions of this class used
+ * three- or four-letter abbreviations instead, there is also a table that maps the older
+ * abbreviations to the newer ones (for example, "PST" maps to "America/LosAngeles").
+ * Anywhere the API requires an ID, you can use either form.
+ * <P>
+ * To create a new TimeZone, you call the factory function TimeZone::createTimeZone()
+ * and pass it a time zone ID.  You can use the createEnumeration() function to
+ * obtain a list of all the time zone IDs recognized by createTimeZone().
+ * <P>
+ * You can also use TimeZone::createDefault() to create a TimeZone.  This function uses
+ * platform-specific APIs to produce a TimeZone for the time zone corresponding to
+ * the client's computer's physical location.  For example, if you're in Japan (assuming
+ * your machine is set up correctly), TimeZone::createDefault() will return a TimeZone
+ * for Japanese Standard Time ("Asia/Tokyo").
+ */
+class U_I18N_API TimeZone : public UObject {
+public:
+    /**
+     * @stable ICU 2.0
+     */
+    virtual ~TimeZone();
+
+    /**
+     * The GMT time zone has a raw offset of zero and does not use daylight
+     * savings time. This is a commonly used time zone.
+     * @return the GMT time zone.
+     * @stable ICU 2.0
+     */
+    static const TimeZone* U_EXPORT2 getGMT(void);
+
+    /**
+     * Creates a <code>TimeZone</code> for the given ID.
+     * @param ID the ID for a <code>TimeZone</code>, either an abbreviation such as
+     * "PST", a full name such as "America/Los_Angeles", or a custom ID
+     * such as "GMT-8:00".
+     * @return the specified <code>TimeZone</code>, or the GMT zone if the given ID
+     * cannot be understood.  Return result guaranteed to be non-null.  If you
+     * require that the specific zone asked for be returned, check the ID of the
+     * return result.
+     * @stable ICU 2.0
+     */
+    static TimeZone* U_EXPORT2 createTimeZone(const UnicodeString& ID);
+
+    /**
+     * Returns an enumeration over all recognized time zone IDs. (i.e.,
+     * all strings that createTimeZone() accepts)
+     *
+     * @return an enumeration object, owned by the caller.
+     * @stable ICU 2.4
+     */
+    static StringEnumeration* U_EXPORT2 createEnumeration();
+
+    /**
+     * Returns an enumeration over time zone IDs with a given raw
+     * offset from GMT.  There may be several times zones with the
+     * same GMT offset that differ in the way they handle daylight
+     * savings time.  For example, the state of Arizona doesn't
+     * observe daylight savings time.  If you ask for the time zone
+     * IDs corresponding to GMT-7:00, you'll get back an enumeration
+     * over two time zone IDs: "America/Denver," which corresponds to
+     * Mountain Standard Time in the winter and Mountain Daylight Time
+     * in the summer, and "America/Phoenix", which corresponds to
+     * Mountain Standard Time year-round, even in the summer.
+     *
+     * @param rawOffset an offset from GMT in milliseconds, ignoring
+     * the effect of daylight savings time, if any
+     * @return an enumeration object, owned by the caller
+     * @stable ICU 2.4
+     */
+    static StringEnumeration* U_EXPORT2 createEnumeration(int32_t rawOffset);
+
+    /**
+     * Returns an enumeration over time zone IDs associated with the
+     * given country.  Some zones are affiliated with no country
+     * (e.g., "UTC"); these may also be retrieved, as a group.
+     *
+     * @param country The ISO 3166 two-letter country code, or NULL to
+     * retrieve zones not affiliated with any country.
+     * @return an enumeration object, owned by the caller
+     * @stable ICU 2.4
+     */
+    static StringEnumeration* U_EXPORT2 createEnumeration(const char* country);
+
+#ifdef U_USE_TIMEZONE_OBSOLETE_2_8
+    /**
+     * Returns a list of time zone IDs, one for each time zone with a given GMT offset.
+     * The return value is a list because there may be several times zones with the same
+     * GMT offset that differ in the way they handle daylight savings time.  For example,
+     * the state of Arizona doesn't observe Daylight Savings time.  So if you ask for
+     * the time zone IDs corresponding to GMT-7:00, you'll get back two time zone IDs:
+     * "America/Denver," which corresponds to Mountain Standard Time in the winter and
+     * Mountain Daylight Time in the summer, and "America/Phoenix", which corresponds to
+     * Mountain Standard Time year-round, even in the summer.
+     * <P>
+     * The caller owns the list that is returned, but does not own the strings contained
+     * in that list.  Delete the array with uprv_free(), but DON'T delete the elements in the array.
+     *
+     * <p>NOTE: uprv_free() is declared in the private header source/common/cmemory.h.
+     *
+     * @param rawOffset  An offset from GMT in milliseconds.
+     * @param numIDs     Receives the number of items in the array that is returned.
+     * @return           An array of UnicodeString pointers, where each UnicodeString is
+     *                   a time zone ID for a time zone with the given GMT offset.  If
+     *                   there is no timezone that matches the GMT offset
+     *                   specified, NULL is returned.
+     * @obsolete ICU 2.8.  Use createEnumeration(int32_t) instead since this API will be removed in that release.
+     */
+    static const UnicodeString** createAvailableIDs(int32_t rawOffset, int32_t& numIDs);
+
+    /**
+     * Returns a list of time zone IDs associated with the given
+     * country.  Some zones are affiliated with no country (e.g.,
+     * "UTC"); these may also be retrieved, as a group.
+     *
+     * <P>The caller owns the list that is returned, but does not own
+     * the strings contained in that list.  Delete the array with uprv_free(), but
+     * <b>DON'T</b> delete the elements in the array.
+     *
+     * <p>NOTE: uprv_free() is declared in the private header source/common/cmemory.h.
+     *
+     * @param country The ISO 3166 two-letter country code, or NULL to
+     * retrieve zones not affiliated with any country.
+     * @param numIDs Receives the number of items in the array that is
+     * returned.
+     * @return An array of UnicodeString pointers, where each
+     * UnicodeString is a time zone ID for a time zone with the given
+     * country.  If there is no timezone that matches the country
+     * specified, NULL is returned.
+     * @obsolete ICU 2.8.  Use createEnumeration(const char*) instead since this API will be removed in that release.
+     */
+    static const UnicodeString** createAvailableIDs(const char* country,
+                                                          int32_t& numIDs);
+
+    /**
+     * Returns a list of all time zone IDs supported by the TimeZone class (i.e., all
+     * IDs that it's legal to pass to createTimeZone()).  The caller owns the list that
+     * is returned, but does not own the strings contained in that list.  Delete the array with uprv_free(),
+     * but DON'T delete the elements in the array.
+     *
+     * <p>NOTE: uprv_free() is declared in the private header source/common/cmemory.h.
+     *
+     * @param numIDs  Receives the number of zone IDs returned.
+     * @return        An array of UnicodeString pointers, where each is a time zone ID
+     *                supported by the TimeZone class.
+     * @obsolete ICU 2.8.  Use createEnumeration(void) instead since this API will be removed in that release.
+     */
+    static const UnicodeString** createAvailableIDs(int32_t& numIDs);
+#endif
+
+    /**
+     * Returns the number of IDs in the equivalency group that
+     * includes the given ID.  An equivalency group contains zones
+     * that have the same GMT offset and rules.
+     *
+     * <p>The returned count includes the given ID; it is always >= 1.
+     * The given ID must be a system time zone.  If it is not, returns
+     * zero.
+     * @param id a system time zone ID
+     * @return the number of zones in the equivalency group containing
+     * 'id', or zero if 'id' is not a valid system ID
+     * @see #getEquivalentID
+     * @stable ICU 2.0
+     */
+    static int32_t U_EXPORT2 countEquivalentIDs(const UnicodeString& id);
+
+    /**
+     * Returns an ID in the equivalency group that
+     * includes the given ID.  An equivalency group contains zones
+     * that have the same GMT offset and rules.
+     *
+     * <p>The given index must be in the range 0..n-1, where n is the
+     * value returned by <code>countEquivalentIDs(id)</code>.  For
+     * some value of 'index', the returned value will be equal to the
+     * given id.  If the given id is not a valid system time zone, or
+     * if 'index' is out of range, then returns an empty string.
+     * @param id a system time zone ID
+     * @param index a value from 0 to n-1, where n is the value
+     * returned by <code>countEquivalentIDs(id)</code>
+     * @return the ID of the index-th zone in the equivalency group
+     * containing 'id', or an empty string if 'id' is not a valid
+     * system ID or 'index' is out of range
+     * @see #countEquivalentIDs
+     * @stable ICU 2.0
+     */
+    static const UnicodeString U_EXPORT2 getEquivalentID(const UnicodeString& id,
+                                               int32_t index);
+
+    /**
+     * Creates a new copy of the default TimeZone for this host. Unless the default time
+     * zone has already been set using adoptDefault() or setDefault(), the default is
+     * determined by querying the system using methods in TPlatformUtilities. If the
+     * system routines fail, or if they specify a TimeZone or TimeZone offset which is not
+     * recognized, the TimeZone indicated by the ID kLastResortID is instantiated
+     * and made the default.
+     *
+     * @return   A default TimeZone. Clients are responsible for deleting the time zone
+     *           object returned.
+     * @stable ICU 2.0
+     */
+    static TimeZone* U_EXPORT2 createDefault(void);
+
+    /**
+     * Sets the default time zone (i.e., what's returned by createDefault()) to be the
+     * specified time zone.  If NULL is specified for the time zone, the default time
+     * zone is set to the default host time zone.  This call adopts the TimeZone object
+     * passed in; the clent is no longer responsible for deleting it.
+     *
+     * @param zone  A pointer to the new TimeZone object to use as the default.
+     * @stable ICU 2.0
+     */
+    static void U_EXPORT2 adoptDefault(TimeZone* zone);
+
+    /**
+     * Same as adoptDefault(), except that the TimeZone object passed in is NOT adopted;
+     * the caller remains responsible for deleting it.
+     *
+     * @param zone  The given timezone.
+     * @system
+     */
+    static void U_EXPORT2 setDefault(const TimeZone& zone);
+
+    /**
+     * Returns the timezone data version currently used by ICU.
+     * @param status Output param to filled in with a success or an error.
+     * @return the version string, such as "2007f"
+     * @stable ICU 4.0
+     */
+    static const char* U_EXPORT2 getTZDataVersion(UErrorCode& status);
+
+    /**
+     * Returns the canonical system timezone ID or the normalized
+     * custom time zone ID for the given time zone ID.
+     * @param id            The input timezone ID to be canonicalized.
+     * @param canonicalID   Receives the canonical system timezone ID
+     *                      or the custom timezone ID in normalized format.
+     * @param status        Recevies the status.  When the given timezone ID
+     *                      is neither a known system time zone ID nor a
+     *                      valid custom timezone ID, U_ILLEGAL_ARGUMENT_ERROR
+     *                      is set.
+     * @return A reference to the result.
+     * @draft ICU 4.0
+     */
+    static UnicodeString& U_EXPORT2 getCanonicalID(const UnicodeString& id,
+        UnicodeString& canonicalID, UErrorCode& status);
+
+    /**
+     * Returns the canonical system timezone ID or the normalized
+     * custom time zone ID for the given time zone ID.
+     * @param id            The input timezone ID to be canonicalized.
+     * @param canonicalID   Receives the canonical system timezone ID
+     *                      or the custom timezone ID in normalized format.
+     * @param isSystemID    Receives if the given ID is a known system
+     *                      timezone ID.
+     * @param status        Recevies the status.  When the given timezone ID
+     *                      is neither a known system time zone ID nor a
+     *                      valid custom timezone ID, U_ILLEGAL_ARGUMENT_ERROR
+     *                      is set.
+     * @return A reference to the result.
+     * @draft ICU 4.0
+     */
+    static UnicodeString& U_EXPORT2 getCanonicalID(const UnicodeString& id,
+        UnicodeString& canonicalID, UBool& isSystemID, UErrorCode& status);
+
+    /**
+     * Returns true if the two TimeZones are equal.  (The TimeZone version only compares
+     * IDs, but subclasses are expected to also compare the fields they add.)
+     *
+     * @param that  The TimeZone object to be compared with.
+     * @return      True if the given TimeZone is equal to this TimeZone; false
+     *              otherwise.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const TimeZone& that) const;
+
+    /**
+     * Returns true if the two TimeZones are NOT equal; that is, if operator==() returns
+     * false.
+     *
+     * @param that  The TimeZone object to be compared with.
+     * @return      True if the given TimeZone is not equal to this TimeZone; false
+     *              otherwise.
+     * @stable ICU 2.0
+     */
+    UBool operator!=(const TimeZone& that) const {return !operator==(that);}
+
+    /**
+     * Returns the TimeZone's adjusted GMT offset (i.e., the number of milliseconds to add
+     * to GMT to get local time in this time zone, taking daylight savings time into
+     * account) as of a particular reference date.  The reference date is used to determine
+     * whether daylight savings time is in effect and needs to be figured into the offset
+     * that is returned (in other words, what is the adjusted GMT offset in this time zone
+     * at this particular date and time?).  For the time zones produced by createTimeZone(),
+     * the reference data is specified according to the Gregorian calendar, and the date
+     * and time fields are local standard time.
+     *
+     * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload,
+     * which returns both the raw and the DST offset for a given time. This method
+     * is retained only for backward compatibility.
+     *
+     * @param era        The reference date's era
+     * @param year       The reference date's year
+     * @param month      The reference date's month (0-based; 0 is January)
+     * @param day        The reference date's day-in-month (1-based)
+     * @param dayOfWeek  The reference date's day-of-week (1-based; 1 is Sunday)
+     * @param millis     The reference date's milliseconds in day, local standard time
+     * @param status     Output param to filled in with a success or an error.
+     * @return           The offset in milliseconds to add to GMT to get local time.
+     * @stable ICU 2.0
+     */
+    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
+                              uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const = 0;
+
+    /**
+     * Gets the time zone offset, for current date, modified in case of
+     * daylight savings. This is the offset to add *to* UTC to get local time.
+     *
+     * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload,
+     * which returns both the raw and the DST offset for a given time. This method
+     * is retained only for backward compatibility.
+     *
+     * @param era the era of the given date.
+     * @param year the year in the given date.
+     * @param month the month in the given date.
+     * Month is 0-based. e.g., 0 for January.
+     * @param day the day-in-month of the given date.
+     * @param dayOfWeek the day-of-week of the given date.
+     * @param milliseconds the millis in day in <em>standard</em> local time.
+     * @param monthLength the length of the given month in days.
+     * @param status     Output param to filled in with a success or an error.
+     * @return the offset to add *to* GMT to get local time.
+     * @stable ICU 2.0
+     */
+    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
+                           uint8_t dayOfWeek, int32_t milliseconds,
+                           int32_t monthLength, UErrorCode& status) const = 0;
+
+    /**
+     * Returns the time zone raw and GMT offset for the given moment
+     * in time.  Upon return, local-millis = GMT-millis + rawOffset +
+     * dstOffset.  All computations are performed in the proleptic
+     * Gregorian calendar.  The default implementation in the TimeZone
+     * class delegates to the 8-argument getOffset().
+     *
+     * @param date moment in time for which to return offsets, in
+     * units of milliseconds from January 1, 1970 0:00 GMT, either GMT
+     * time or local wall time, depending on `local'.
+     * @param local if true, `date' is local wall time; otherwise it
+     * is in GMT time.
+     * @param rawOffset output parameter to receive the raw offset, that
+     * is, the offset not including DST adjustments
+     * @param dstOffset output parameter to receive the DST offset,
+     * that is, the offset to be added to `rawOffset' to obtain the
+     * total offset between local and GMT time. If DST is not in
+     * effect, this value is zero; otherwise it is a positive value,
+     * typically one hour.
+     * @param ec input-output error code
+     *
+     * @stable ICU 2.8
+     */
+    virtual void getOffset(UDate date, UBool local, int32_t& rawOffset,
+                           int32_t& dstOffset, UErrorCode& ec) const;
+
+    /**
+     * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
+     * to GMT to get local time, before taking daylight savings time into account).
+     *
+     * @param offsetMillis  The new raw GMT offset for this time zone.
+     * @stable ICU 2.0
+     */
+    virtual void setRawOffset(int32_t offsetMillis) = 0;
+
+    /**
+     * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
+     * to GMT to get local time, before taking daylight savings time into account).
+     *
+     * @return   The TimeZone's raw GMT offset.
+     * @stable ICU 2.0
+     */
+    virtual int32_t getRawOffset(void) const = 0;
+
+    /**
+     * Fills in "ID" with the TimeZone's ID.
+     *
+     * @param ID  Receives this TimeZone's ID.
+     * @return    A reference to 'ID'
+     * @stable ICU 2.0
+     */
+    UnicodeString& getID(UnicodeString& ID) const;
+
+    /**
+     * Sets the TimeZone's ID to the specified value.  This doesn't affect any other
+     * fields (for example, if you say<
+     * blockquote><pre>
+     * .     TimeZone* foo = TimeZone::createTimeZone("America/New_York");
+     * .     foo.setID("America/Los_Angeles");
+     * </pre>\htmlonly</blockquote>\endhtmlonly
+     * the time zone's GMT offset and daylight-savings rules don't change to those for
+     * Los Angeles.  They're still those for New York.  Only the ID has changed.)
+     *
+     * @param ID  The new timezone ID.
+     * @stable ICU 2.0
+     */
+    void setID(const UnicodeString& ID);
+
+    /**
+     * Enum for use with getDisplayName
+     * @stable ICU 2.4
+     */
+    enum EDisplayType {
+        /**
+         * Selector for short display name
+         * @stable ICU 2.4
+         */
+        SHORT = 1,
+        /**
+         * Selector for long display name
+         * @stable ICU 2.4
+         */
+        LONG
+    };
+
+    /**
+     * Returns a name of this time zone suitable for presentation to the user
+     * in the default locale.
+     * This method returns the long name, not including daylight savings.
+     * If the display name is not available for the locale,
+     * then this method returns a string in the format
+     * <code>GMT[+-]hh:mm</code>.
+     * @param result the human-readable name of this time zone in the default locale.
+     * @return       A reference to 'result'.
+     * @stable ICU 2.0
+     */
+    UnicodeString& getDisplayName(UnicodeString& result) const;
+
+    /**
+     * Returns a name of this time zone suitable for presentation to the user
+     * in the specified locale.
+     * This method returns the long name, not including daylight savings.
+     * If the display name is not available for the locale,
+     * then this method returns a string in the format
+     * <code>GMT[+-]hh:mm</code>.
+     * @param locale the locale in which to supply the display name.
+     * @param result the human-readable name of this time zone in the given locale
+     *               or in the default locale if the given locale is not recognized.
+     * @return       A reference to 'result'.
+     * @stable ICU 2.0
+     */
+    UnicodeString& getDisplayName(const Locale& locale, UnicodeString& result) const;
+
+    /**
+     * Returns a name of this time zone suitable for presentation to the user
+     * in the default locale.
+     * If the display name is not available for the locale,
+     * then this method returns a string in the format
+     * <code>GMT[+-]hh:mm</code>.
+     * @param daylight if true, return the daylight savings name.
+     * @param style either <code>LONG</code> or <code>SHORT</code>
+     * @param result the human-readable name of this time zone in the default locale.
+     * @return       A reference to 'result'.
+     * @stable ICU 2.0
+     */
+    UnicodeString& getDisplayName(UBool daylight, EDisplayType style, UnicodeString& result) const;
+
+    /**
+     * Returns a name of this time zone suitable for presentation to the user
+     * in the specified locale.
+     * If the display name is not available for the locale,
+     * then this method returns a string in the format
+     * <code>GMT[+-]hh:mm</code>.
+     * @param daylight if true, return the daylight savings name.
+     * @param style either <code>LONG</code> or <code>SHORT</code>
+     * @param locale the locale in which to supply the display name.
+     * @param result the human-readable name of this time zone in the given locale
+     *               or in the default locale if the given locale is not recognized.
+     * @return       A refence to 'result'.
+     * @stable ICU 2.0
+     */
+    UnicodeString& getDisplayName(UBool daylight, EDisplayType style, const Locale& locale, UnicodeString& result) const;
+
+    /**
+     * Queries if this time zone uses daylight savings time.
+     * @return true if this time zone uses daylight savings time,
+     * false, otherwise.
+     * @stable ICU 2.0
+     */
+    virtual UBool useDaylightTime(void) const = 0;
+
+    /**
+     * Queries if the given date is in daylight savings time in
+     * this time zone.
+     * This method is wasteful since it creates a new GregorianCalendar and
+     * deletes it each time it is called. This is a deprecated method
+     * and provided only for Java compatibility.
+     *
+     * @param date the given UDate.
+     * @param status Output param filled in with success/error code.
+     * @return true if the given date is in daylight savings time,
+     * false, otherwise.
+     * @deprecated ICU 2.4. Use Calendar::inDaylightTime() instead.
+     */
+    virtual UBool inDaylightTime(UDate date, UErrorCode& status) const = 0;
+
+    /**
+     * Returns true if this zone has the same rule and offset as another zone.
+     * That is, if this zone differs only in ID, if at all.
+     * @param other the <code>TimeZone</code> object to be compared with
+     * @return true if the given zone is the same as this one,
+     * with the possible exception of the ID
+     * @stable ICU 2.0
+     */
+    virtual UBool hasSameRules(const TimeZone& other) const;
+
+    /**
+     * Clones TimeZone objects polymorphically. Clients are responsible for deleting
+     * the TimeZone object cloned.
+     *
+     * @return   A new copy of this TimeZone object.
+     * @stable ICU 2.0
+     */
+    virtual TimeZone* clone(void) const = 0;
+
+    /**
+     * Return the class ID for this class.  This is useful only for
+     * comparing to a return value from getDynamicClassID().
+     * @return The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. This method is to
+     * implement a simple version of RTTI, since not all C++ compilers support genuine
+     * RTTI. Polymorphic operator==() and clone() methods call this method.
+     * <P>
+     * Concrete subclasses of TimeZone must use the UOBJECT_DEFINE_RTTI_IMPLEMENTATION
+     *  macro from uobject.h in their implementation to provide correct RTTI information.
+     * @return   The class ID for this object. All objects of a given class have the
+     *           same class ID. Objects of other classes have different class IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const = 0;
+    
+    /**
+     * Returns the amount of time to be added to local standard time
+     * to get local wall clock time.
+     * <p>
+     * The default implementation always returns 3600000 milliseconds
+     * (i.e., one hour) if this time zone observes Daylight Saving
+     * Time. Otherwise, 0 (zero) is returned.
+     * <p>
+     * If an underlying TimeZone implementation subclass supports
+     * historical Daylight Saving Time changes, this method returns
+     * the known latest daylight saving value.
+     *
+     * @return the amount of saving time in milliseconds
+     * @stable ICU 3.6
+     */
+    virtual int32_t getDSTSavings() const;
+
+protected:
+
+    /**
+     * Default constructor.  ID is initialized to the empty string.
+     * @stable ICU 2.0
+     */
+    TimeZone();
+
+    /**
+     * Construct a timezone with a given ID.
+     * @param id a system time zone ID
+     * @stable ICU 2.0
+     */
+    TimeZone(const UnicodeString &id);
+
+    /**
+     * Copy constructor.
+     * @param source the object to be copied.
+     * @stable ICU 2.0
+     */
+    TimeZone(const TimeZone& source);
+
+    /**
+     * Default assignment operator.
+     * @param right the object to be copied.
+     * @stable ICU 2.0
+     */
+    TimeZone& operator=(const TimeZone& right);
+
+    /**
+     * Utility function. For internally loading rule data.
+     * @param top Top resource bundle for tz data
+     * @param ruleid ID of rule to load
+     * @param oldbundle Old bundle to reuse or NULL
+     * @param status Status parameter
+     * @return either a new bundle or *oldbundle
+     * @internal
+     */
+    static UResourceBundle* loadRule(const UResourceBundle* top, const UnicodeString& ruleid, UResourceBundle* oldbundle, UErrorCode&status);
+
+private:
+    friend class ZoneMeta;
+
+
+    static TimeZone*        createCustomTimeZone(const UnicodeString&); // Creates a time zone based on the string.
+
+    /**
+     * Resolve a link in Olson tzdata.  When the given id is known and it's not a link,
+     * the id itself is returned.  When the given id is known and it is a link, then
+     * dereferenced zone id is returned.  When the given id is unknown, then it returns
+     * empty string.
+     * @param linkTo Input zone id string
+     * @param linkFrom Receives the dereferenced zone id string
+     * @return The reference to the result (linkFrom)
+     */
+    static UnicodeString& dereferOlsonLink(const UnicodeString& linkTo, UnicodeString& linkFrom);
+
+    /**
+     * Parses the given custom time zone identifier
+     * @param id id A string of the form GMT[+-]hh:mm, GMT[+-]hhmm, or
+     * GMT[+-]hh.
+     * @param sign Receves parsed sign, 1 for positive, -1 for negative.
+     * @param hour Receives parsed hour field
+     * @param minute Receives parsed minute field
+     * @param second Receives parsed second field
+     * @return Returns TRUE when the given custom id is valid.
+     */
+    static UBool parseCustomID(const UnicodeString& id, int32_t& sign, int32_t& hour,
+        int32_t& min, int32_t& sec);
+
+    /**
+     * Parse a custom time zone identifier and return the normalized
+     * custom time zone identifier for the given custom id string.
+     * @param id a string of the form GMT[+-]hh:mm, GMT[+-]hhmm, or
+     * GMT[+-]hh.
+     * @param normalized Receives the normalized custom ID
+     * @param status Receives the status.  When the input ID string is invalid,
+     * U_ILLEGAL_ARGUMENT_ERROR is set.
+     * @return The normalized custom id string.
+    */
+    static UnicodeString& getCustomID(const UnicodeString& id, UnicodeString& normalized,
+        UErrorCode& status);
+
+    /**
+     * Returns the normalized custome timezone ID for the given offset fields.
+     * @param hour offset hours
+     * @param min offset minutes
+     * @param sec offset seconds
+     * @param netative sign of the offset, TRUE for negative offset.
+     * @param id Receves the format result (normalized custom ID)
+     * @return The reference to id
+     */
+    static UnicodeString& formatCustomID(int32_t hour, int32_t min, int32_t sec,
+        UBool negative, UnicodeString& id);
+
+    /**
+     * Responsible for setting up DEFAULT_ZONE.  Uses routines in TPlatformUtilities
+     * (i.e., platform-specific calls) to get the current system time zone.  Failing
+     * that, uses the platform-specific default time zone.  Failing that, uses GMT.
+     */
+    static void             initDefault(void);
+
+    // See source file for documentation
+    /**
+     * Lookup the given name in our system zone table.  If found,
+     * instantiate a new zone of that name and return it.  If not
+     * found, return 0.
+     * @param name tthe given name of a system time zone.
+     * @return the timezone indicated by the 'name'.
+     */
+    static TimeZone*        createSystemTimeZone(const UnicodeString& name);
+
+    UnicodeString           fID;    // this time zone's ID
+};
+
+
+// -------------------------------------
+
+inline UnicodeString&
+TimeZone::getID(UnicodeString& ID) const
+{
+    ID = fID;
+    return ID;
+}
+
+// -------------------------------------
+
+inline void
+TimeZone::setID(const UnicodeString& ID)
+{
+    fID = ID;
+}
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif //_TIMEZONE
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/translit.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/translit.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/translit.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1323 +0,0 @@
-/*
-**********************************************************************
-* Copyright (C) 1999-2008, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*   Date        Name        Description
-*   11/17/99    aliu        Creation.
-**********************************************************************
-*/
-#ifndef TRANSLIT_H
-#define TRANSLIT_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Tranforms text from one format to another.
- */
- 
-#if !UCONFIG_NO_TRANSLITERATION
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-#include "unicode/parseerr.h"
-#include "unicode/utrans.h" // UTransPosition, UTransDirection
-#include "unicode/strenum.h"
-
-U_NAMESPACE_BEGIN
-
-class UnicodeFilter;
-class UnicodeSet;
-class CompoundTransliterator;
-class TransliteratorParser;
-class NormalizationTransliterator;
-class TransliteratorIDParser;
-
-/**
- *
- * <code>Transliterator</code> is an abstract class that
- * transliterates text from one format to another.  The most common
- * kind of transliterator is a script, or alphabet, transliterator.
- * For example, a Russian to Latin transliterator changes Russian text
- * written in Cyrillic characters to phonetically equivalent Latin
- * characters.  It does not <em>translate</em> Russian to English!
- * Transliteration, unlike translation, operates on characters, without
- * reference to the meanings of words and sentences.
- *
- * <p>Although script conversion is its most common use, a
- * transliterator can actually perform a more general class of tasks.
- * In fact, <code>Transliterator</code> defines a very general API
- * which specifies only that a segment of the input text is replaced
- * by new text.  The particulars of this conversion are determined
- * entirely by subclasses of <code>Transliterator</code>.
- *
- * <p><b>Transliterators are stateless</b>
- *
- * <p><code>Transliterator</code> objects are <em>stateless</em>; they
- * retain no information between calls to
- * <code>transliterate()</code>.  (However, this does <em>not</em>
- * mean that threads may share transliterators without synchronizing
- * them.  Transliterators are not immutable, so they must be
- * synchronized when shared between threads.)  This might seem to
- * limit the complexity of the transliteration operation.  In
- * practice, subclasses perform complex transliterations by delaying
- * the replacement of text until it is known that no other
- * replacements are possible.  In other words, although the
- * <code>Transliterator</code> objects are stateless, the source text
- * itself embodies all the needed information, and delayed operation
- * allows arbitrary complexity.
- *
- * <p><b>Batch transliteration</b>
- *
- * <p>The simplest way to perform transliteration is all at once, on a
- * string of existing text.  This is referred to as <em>batch</em>
- * transliteration.  For example, given a string <code>input</code>
- * and a transliterator <code>t</code>, the call
- *
- * \htmlonly<blockquote>\endhtmlonly<code>String result = t.transliterate(input);
- * </code>\htmlonly</blockquote>\endhtmlonly
- *
- * will transliterate it and return the result.  Other methods allow
- * the client to specify a substring to be transliterated and to use
- * {@link Replaceable } objects instead of strings, in order to
- * preserve out-of-band information (such as text styles).
- *
- * <p><b>Keyboard transliteration</b>
- *
- * <p>Somewhat more involved is <em>keyboard</em>, or incremental
- * transliteration.  This is the transliteration of text that is
- * arriving from some source (typically the user's keyboard) one
- * character at a time, or in some other piecemeal fashion.
- *
- * <p>In keyboard transliteration, a <code>Replaceable</code> buffer
- * stores the text.  As text is inserted, as much as possible is
- * transliterated on the fly.  This means a GUI that displays the
- * contents of the buffer may show text being modified as each new
- * character arrives.
- *
- * <p>Consider the simple <code>RuleBasedTransliterator</code>:
- *
- * \htmlonly<blockquote>\endhtmlonly<code>
- * th&gt;{theta}<br>
- * t&gt;{tau}
- * </code>\htmlonly</blockquote>\endhtmlonly
- *
- * When the user types 't', nothing will happen, since the
- * transliterator is waiting to see if the next character is 'h'.  To
- * remedy this, we introduce the notion of a cursor, marked by a '|'
- * in the output string:
- *
- * \htmlonly<blockquote>\endhtmlonly<code>
- * t&gt;|{tau}<br>
- * {tau}h&gt;{theta}
- * </code>\htmlonly</blockquote>\endhtmlonly
- *
- * Now when the user types 't', tau appears, and if the next character
- * is 'h', the tau changes to a theta.  This is accomplished by
- * maintaining a cursor position (independent of the insertion point,
- * and invisible in the GUI) across calls to
- * <code>transliterate()</code>.  Typically, the cursor will
- * be coincident with the insertion point, but in a case like the one
- * above, it will precede the insertion point.
- *
- * <p>Keyboard transliteration methods maintain a set of three indices
- * that are updated with each call to
- * <code>transliterate()</code>, including the cursor, start,
- * and limit.  Since these indices are changed by the method, they are
- * passed in an <code>int[]</code> array. The <code>START</code> index
- * marks the beginning of the substring that the transliterator will
- * look at.  It is advanced as text becomes committed (but it is not
- * the committed index; that's the <code>CURSOR</code>).  The
- * <code>CURSOR</code> index, described above, marks the point at
- * which the transliterator last stopped, either because it reached
- * the end, or because it required more characters to disambiguate
- * between possible inputs.  The <code>CURSOR</code> can also be
- * explicitly set by rules in a <code>RuleBasedTransliterator</code>.
- * Any characters before the <code>CURSOR</code> index are frozen;
- * future keyboard transliteration calls within this input sequence
- * will not change them.  New text is inserted at the
- * <code>LIMIT</code> index, which marks the end of the substring that
- * the transliterator looks at.
- *
- * <p>Because keyboard transliteration assumes that more characters
- * are to arrive, it is conservative in its operation.  It only
- * transliterates when it can do so unambiguously.  Otherwise it waits
- * for more characters to arrive.  When the client code knows that no
- * more characters are forthcoming, perhaps because the user has
- * performed some input termination operation, then it should call
- * <code>finishTransliteration()</code> to complete any
- * pending transliterations.
- *
- * <p><b>Inverses</b>
- *
- * <p>Pairs of transliterators may be inverses of one another.  For
- * example, if transliterator <b>A</b> transliterates characters by
- * incrementing their Unicode value (so "abc" -> "def"), and
- * transliterator <b>B</b> decrements character values, then <b>A</b>
- * is an inverse of <b>B</b> and vice versa.  If we compose <b>A</b>
- * with <b>B</b> in a compound transliterator, the result is the
- * indentity transliterator, that is, a transliterator that does not
- * change its input text.
- *
- * The <code>Transliterator</code> method <code>getInverse()</code>
- * returns a transliterator's inverse, if one exists, or
- * <code>null</code> otherwise.  However, the result of
- * <code>getInverse()</code> usually will <em>not</em> be a true
- * mathematical inverse.  This is because true inverse transliterators
- * are difficult to formulate.  For example, consider two
- * transliterators: <b>AB</b>, which transliterates the character 'A'
- * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'.  It might
- * seem that these are exact inverses, since
- *
- * \htmlonly<blockquote>\endhtmlonly"A" x <b>AB</b> -> "B"<br>
- * "B" x <b>BA</b> -> "A"\htmlonly</blockquote>\endhtmlonly
- *
- * where 'x' represents transliteration.  However,
- *
- * \htmlonly<blockquote>\endhtmlonly"ABCD" x <b>AB</b> -> "BBCD"<br>
- * "BBCD" x <b>BA</b> -> "AACD"\htmlonly</blockquote>\endhtmlonly
- *
- * so <b>AB</b> composed with <b>BA</b> is not the
- * identity. Nonetheless, <b>BA</b> may be usefully considered to be
- * <b>AB</b>'s inverse, and it is on this basis that
- * <b>AB</b><code>.getInverse()</code> could legitimately return
- * <b>BA</b>.
- *
- * <p><b>IDs and display names</b>
- *
- * <p>A transliterator is designated by a short identifier string or
- * <em>ID</em>.  IDs follow the format <em>source-destination</em>,
- * where <em>source</em> describes the entity being replaced, and
- * <em>destination</em> describes the entity replacing
- * <em>source</em>.  The entities may be the names of scripts,
- * particular sequences of characters, or whatever else it is that the
- * transliterator converts to or from.  For example, a transliterator
- * from Russian to Latin might be named "Russian-Latin".  A
- * transliterator from keyboard escape sequences to Latin-1 characters
- * might be named "KeyboardEscape-Latin1".  By convention, system
- * entity names are in English, with the initial letters of words
- * capitalized; user entity names may follow any format so long as
- * they do not contain dashes.
- *
- * <p>In addition to programmatic IDs, transliterator objects have
- * display names for presentation in user interfaces, returned by
- * {@link #getDisplayName }.
- *
- * <p><b>Factory methods and registration</b>
- *
- * <p>In general, client code should use the factory method
- * {@link #createInstance } to obtain an instance of a
- * transliterator given its ID.  Valid IDs may be enumerated using
- * <code>getAvailableIDs()</code>.  Since transliterators are mutable,
- * multiple calls to {@link #createInstance } with the same ID will
- * return distinct objects.
- *
- * <p>In addition to the system transliterators registered at startup,
- * user transliterators may be registered by calling
- * <code>registerInstance()</code> at run time.  A registered instance
- * acts a template; future calls to {@link #createInstance } with the ID
- * of the registered object return clones of that object.  Thus any
- * object passed to <tt>registerInstance()</tt> must implement
- * <tt>clone()</tt> propertly.  To register a transliterator subclass
- * without instantiating it (until it is needed), users may call
- * {@link #registerFactory }.  In this case, the objects are
- * instantiated by invoking the zero-argument public constructor of
- * the class.
- *
- * <p><b>Subclassing</b>
- *
- * Subclasses must implement the abstract method
- * <code>handleTransliterate()</code>.  <p>Subclasses should override
- * the <code>transliterate()</code> method taking a
- * <code>Replaceable</code> and the <code>transliterate()</code>
- * method taking a <code>String</code> and <code>StringBuffer</code>
- * if the performance of these methods can be improved over the
- * performance obtained by the default implementations in this class.
- *
- * @author Alan Liu
- * @stable ICU 2.0
- */
-class U_I18N_API Transliterator : public UObject {
-
-private:
-
-    /**
-     * Programmatic name, e.g., "Latin-Arabic".
-     */
-    UnicodeString ID;
-
-    /**
-     * This transliterator's filter.  Any character for which
-     * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
-     * altered by this transliterator.  If <tt>filter</tt> is
-     * <tt>null</tt> then no filtering is applied.
-     */
-    UnicodeFilter* filter;
-
-    int32_t maximumContextLength;
-
- public:
-
-    /**
-     * A context integer or pointer for a factory function, passed by
-     * value.
-     * @stable ICU 2.4
-     */
-    union Token {
-        /**
-         * This token, interpreted as a 32-bit integer.
-         * @stable ICU 2.4
-         */
-        int32_t integer;
-        /**
-         * This token, interpreted as a native pointer.
-         * @stable ICU 2.4
-         */
-        void*   pointer;
-    };
-
-    /**
-     * Return a token containing an integer.
-     * @return a token containing an integer.
-     * @internal
-     */
-    inline static Token integerToken(int32_t);
-
-    /**
-     * Return a token containing a pointer.
-     * @return a token containing a pointer.
-     * @internal
-     */
-    inline static Token pointerToken(void*);
-
-    /**
-     * A function that creates and returns a Transliterator.  When
-     * invoked, it will be passed the ID string that is being
-     * instantiated, together with the context pointer that was passed
-     * in when the factory function was first registered.  Many
-     * factory functions will ignore both parameters, however,
-     * functions that are registered to more than one ID may use the
-     * ID or the context parameter to parameterize the transliterator
-     * they create.
-     * @param ID      the string identifier for this transliterator
-     * @param context a context pointer that will be stored and
-     *                later passed to the factory function when an ID matching
-     *                the registration ID is being instantiated with this factory.
-     * @stable ICU 2.4
-     */
-    typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
-
-protected:
-
-    /**
-     * Default constructor.
-     * @param ID the string identifier for this transliterator
-     * @param adoptedFilter the filter.  Any character for which
-     * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
-     * altered by this transliterator.  If <tt>filter</tt> is
-     * <tt>null</tt> then no filtering is applied.
-     * @stable ICU 2.4
-     */
-    Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
-
-    /**
-     * Copy constructor.
-     * @stable ICU 2.4
-     */
-    Transliterator(const Transliterator&);
-
-    /**
-     * Assignment operator.
-     * @stable ICU 2.4
-     */
-    Transliterator& operator=(const Transliterator&);
-
-    /**
-     * Create a transliterator from a basic ID.  This is an ID
-     * containing only the forward direction source, target, and
-     * variant.
-     * @param id a basic ID of the form S-T or S-T/V.
-     * @param canon canonical ID to assign to the object, or
-     * NULL to leave the ID unchanged
-     * @return a newly created Transliterator or null if the ID is
-     * invalid.
-     * @stable ICU 2.4
-     */
-    static Transliterator* createBasicInstance(const UnicodeString& id,
-                                               const UnicodeString* canon);
-
-    friend class TransliteratorParser; // for parseID()
-    friend class TransliteratorIDParser; // for createBasicInstance()
-    friend class TransliteratorAlias; // for setID()
-
-public:
-
-    /**
-     * Destructor.
-     * @stable ICU 2.0
-     */
-    virtual ~Transliterator();
-
-    /**
-     * Implements Cloneable.
-     * All subclasses are encouraged to implement this method if it is
-     * possible and reasonable to do so.  Subclasses that are to be
-     * registered with the system using <tt>registerInstance()</tt>
-     * are required to implement this method.  If a subclass does not
-     * implement clone() properly and is registered with the system
-     * using registerInstance(), then the default clone() implementation
-     * will return null, and calls to createInstance() will fail.
-     *
-     * @return a copy of the object.
-     * @see #registerInstance
-     * @stable ICU 2.0
-     */
-    virtual Transliterator* clone() const;
-
-    /**
-     * Transliterates a segment of a string, with optional filtering.
-     *
-     * @param text the string to be transliterated
-     * @param start the beginning index, inclusive; <code>0 <= start
-     * <= limit</code>.
-     * @param limit the ending index, exclusive; <code>start <= limit
-     * <= text.length()</code>.
-     * @return The new limit index.  The text previously occupying <code>[start,
-     * limit)</code> has been transliterated, possibly to a string of a different
-     * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where
-     * <em>new-limit</em> is the return value. If the input offsets are out of bounds,
-     * the returned value is -1 and the input string remains unchanged.
-     * @stable ICU 2.0
-     */
-    virtual int32_t transliterate(Replaceable& text,
-                                  int32_t start, int32_t limit) const;
-
-    /**
-     * Transliterates an entire string in place. Convenience method.
-     * @param text the string to be transliterated
-     * @stable ICU 2.0
-     */
-    virtual void transliterate(Replaceable& text) const;
-
-    /**
-     * Transliterates the portion of the text buffer that can be
-     * transliterated unambiguosly after new text has been inserted,
-     * typically as a result of a keyboard event.  The new text in
-     * <code>insertion</code> will be inserted into <code>text</code>
-     * at <code>index.limit</code>, advancing
-     * <code>index.limit</code> by <code>insertion.length()</code>.
-     * Then the transliterator will try to transliterate characters of
-     * <code>text</code> between <code>index.cursor</code> and
-     * <code>index.limit</code>.  Characters before
-     * <code>index.cursor</code> will not be changed.
-     *
-     * <p>Upon return, values in <code>index</code> will be updated.
-     * <code>index.start</code> will be advanced to the first
-     * character that future calls to this method will read.
-     * <code>index.cursor</code> and <code>index.limit</code> will
-     * be adjusted to delimit the range of text that future calls to
-     * this method may change.
-     *
-     * <p>Typical usage of this method begins with an initial call
-     * with <code>index.start</code> and <code>index.limit</code>
-     * set to indicate the portion of <code>text</code> to be
-     * transliterated, and <code>index.cursor == index.start</code>.
-     * Thereafter, <code>index</code> can be used without
-     * modification in future calls, provided that all changes to
-     * <code>text</code> are made via this method.
-     *
-     * <p>This method assumes that future calls may be made that will
-     * insert new text into the buffer.  As a result, it only performs
-     * unambiguous transliterations.  After the last call to this
-     * method, there may be untransliterated text that is waiting for
-     * more input to resolve an ambiguity.  In order to perform these
-     * pending transliterations, clients should call {@link
-     * #finishTransliteration } after the last call to this
-     * method has been made.
-     *
-     * @param text the buffer holding transliterated and untransliterated text
-     * @param index an array of three integers.
-     *
-     * <ul><li><code>index.start</code>: the beginning index,
-     * inclusive; <code>0 <= index.start <= index.limit</code>.
-     *
-     * <li><code>index.limit</code>: the ending index, exclusive;
-     * <code>index.start <= index.limit <= text.length()</code>.
-     * <code>insertion</code> is inserted at
-     * <code>index.limit</code>.
-     *
-     * <li><code>index.cursor</code>: the next character to be
-     * considered for transliteration; <code>index.start <=
-     * index.cursor <= index.limit</code>.  Characters before
-     * <code>index.cursor</code> will not be changed by future calls
-     * to this method.</ul>
-     *
-     * @param insertion text to be inserted and possibly
-     * transliterated into the translation buffer at
-     * <code>index.limit</code>.  If <code>null</code> then no text
-     * is inserted.
-     * @param status    Output param to filled in with a success or an error.
-     * @see #handleTransliterate
-     * @exception IllegalArgumentException if <code>index</code>
-     * is invalid
-     * @see UTransPosition
-     * @stable ICU 2.0
-     */
-    virtual void transliterate(Replaceable& text, UTransPosition& index,
-                               const UnicodeString& insertion,
-                               UErrorCode& status) const;
-
-    /**
-     * Transliterates the portion of the text buffer that can be
-     * transliterated unambiguosly after a new character has been
-     * inserted, typically as a result of a keyboard event.  This is a
-     * convenience method; see {@link
-     * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const}
-     * for details.
-     * @param text the buffer holding transliterated and
-     * untransliterated text
-     * @param index an array of three integers.  See {@link
-     * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }.
-     * @param insertion text to be inserted and possibly
-     * transliterated into the translation buffer at
-     * <code>index.limit</code>.
-     * @param status    Output param to filled in with a success or an error.
-     * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const
-     * @stable ICU 2.0
-     */
-    virtual void transliterate(Replaceable& text, UTransPosition& index,
-                               UChar32 insertion,
-                               UErrorCode& status) const;
-
-    /**
-     * Transliterates the portion of the text buffer that can be
-     * transliterated unambiguosly.  This is a convenience method; see
-     * {@link
-     * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }
-     * for details.
-     * @param text the buffer holding transliterated and
-     * untransliterated text
-     * @param index an array of three integers.  See {@link
-     * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }.
-     * @param status    Output param to filled in with a success or an error.
-     * @see #transliterate(Replaceable, int[], String)
-     * @stable ICU 2.0
-     */
-    virtual void transliterate(Replaceable& text, UTransPosition& index,
-                               UErrorCode& status) const;
-
-    /**
-     * Finishes any pending transliterations that were waiting for
-     * more characters.  Clients should call this method as the last
-     * call after a sequence of one or more calls to
-     * <code>transliterate()</code>.
-     * @param text the buffer holding transliterated and
-     * untransliterated text.
-     * @param index the array of indices previously passed to {@link
-     * #transliterate }
-     * @stable ICU 2.0
-     */
-    virtual void finishTransliteration(Replaceable& text,
-                                       UTransPosition& index) const;
-
-private:
-
-    /**
-     * This internal method does incremental transliteration.  If the
-     * 'insertion' is non-null then we append it to 'text' before
-     * proceeding.  This method calls through to the pure virtual
-     * framework method handleTransliterate() to do the actual
-     * work.
-     * @param text the buffer holding transliterated and
-     * untransliterated text
-     * @param index an array of three integers.  See {@link
-     * #transliterate(Replaceable, int[], String)}.
-     * @param insertion text to be inserted and possibly
-     * transliterated into the translation buffer at
-     * <code>index.limit</code>.
-     * @param status    Output param to filled in with a success or an error.
-     */
-    void _transliterate(Replaceable& text,
-                        UTransPosition& index,
-                        const UnicodeString* insertion,
-                        UErrorCode &status) const;
-
-protected:
-
-    /**
-     * Abstract method that concrete subclasses define to implement
-     * their transliteration algorithm.  This method handles both
-     * incremental and non-incremental transliteration.  Let
-     * <code>originalStart</code> refer to the value of
-     * <code>pos.start</code> upon entry.
-     *
-     * <ul>
-     *  <li>If <code>incremental</code> is false, then this method
-     *  should transliterate all characters between
-     *  <code>pos.start</code> and <code>pos.limit</code>. Upon return
-     *  <code>pos.start</code> must == <code> pos.limit</code>.</li>
-     *
-     *  <li>If <code>incremental</code> is true, then this method
-     *  should transliterate all characters between
-     *  <code>pos.start</code> and <code>pos.limit</code> that can be
-     *  unambiguously transliterated, regardless of future insertions
-     *  of text at <code>pos.limit</code>.  Upon return,
-     *  <code>pos.start</code> should be in the range
-     *  [<code>originalStart</code>, <code>pos.limit</code>).
-     *  <code>pos.start</code> should be positioned such that
-     *  characters [<code>originalStart</code>, <code>
-     *  pos.start</code>) will not be changed in the future by this
-     *  transliterator and characters [<code>pos.start</code>,
-     *  <code>pos.limit</code>) are unchanged.</li>
-     * </ul>
-     *
-     * <p>Implementations of this method should also obey the
-     * following invariants:</p>
-     *
-     * <ul>
-     *  <li> <code>pos.limit</code> and <code>pos.contextLimit</code>
-     *  should be updated to reflect changes in length of the text
-     *  between <code>pos.start</code> and <code>pos.limit</code>. The
-     *  difference <code> pos.contextLimit - pos.limit</code> should
-     *  not change.</li>
-     *
-     *  <li><code>pos.contextStart</code> should not change.</li>
-     *
-     *  <li>Upon return, neither <code>pos.start</code> nor
-     *  <code>pos.limit</code> should be less than
-     *  <code>originalStart</code>.</li>
-     *
-     *  <li>Text before <code>originalStart</code> and text after
-     *  <code>pos.limit</code> should not change.</li>
-     *
-     *  <li>Text before <code>pos.contextStart</code> and text after
-     *  <code> pos.contextLimit</code> should be ignored.</li>
-     * </ul>
-     *
-     * <p>Subclasses may safely assume that all characters in
-     * [<code>pos.start</code>, <code>pos.limit</code>) are filtered.
-     * In other words, the filter has already been applied by the time
-     * this method is called.  See
-     * <code>filteredTransliterate()</code>.
-     *
-     * <p>This method is <b>not</b> for public consumption.  Calling
-     * this method directly will transliterate
-     * [<code>pos.start</code>, <code>pos.limit</code>) without
-     * applying the filter. End user code should call <code>
-     * transliterate()</code> instead of this method. Subclass code
-     * and wrapping transliterators should call
-     * <code>filteredTransliterate()</code> instead of this method.<p>
-     *
-     * @param text the buffer holding transliterated and
-     * untransliterated text
-     *
-     * @param pos the indices indicating the start, limit, context
-     * start, and context limit of the text.
-     *
-     * @param incremental if true, assume more text may be inserted at
-     * <code>pos.limit</code> and act accordingly.  Otherwise,
-     * transliterate all text between <code>pos.start</code> and
-     * <code>pos.limit</code> and move <code>pos.start</code> up to
-     * <code>pos.limit</code>.
-     *
-     * @see #transliterate
-     * @stable ICU 2.4
-     */
-    virtual void handleTransliterate(Replaceable& text,
-                                     UTransPosition& pos,
-                                     UBool incremental) const = 0;
-
-public:
-    /**
-     * Transliterate a substring of text, as specified by index, taking filters
-     * into account.  This method is for subclasses that need to delegate to
-     * another transliterator, such as CompoundTransliterator.
-     * @param text the text to be transliterated
-     * @param index the position indices
-     * @param incremental if TRUE, then assume more characters may be inserted
-     * at index.limit, and postpone processing to accomodate future incoming
-     * characters
-     * @stable ICU 2.4
-     */
-    virtual void filteredTransliterate(Replaceable& text,
-                                       UTransPosition& index,
-                                       UBool incremental) const;
-
-private:
-
-    /**
-     * Top-level transliteration method, handling filtering, incremental and
-     * non-incremental transliteration, and rollback.  All transliteration
-     * public API methods eventually call this method with a rollback argument
-     * of TRUE.  Other entities may call this method but rollback should be
-     * FALSE.
-     *
-     * <p>If this transliterator has a filter, break up the input text into runs
-     * of unfiltered characters.  Pass each run to
-     * <subclass>.handleTransliterate().
-     *
-     * <p>In incremental mode, if rollback is TRUE, perform a special
-     * incremental procedure in which several passes are made over the input
-     * text, adding one character at a time, and committing successful
-     * transliterations as they occur.  Unsuccessful transliterations are rolled
-     * back and retried with additional characters to give correct results.
-     *
-     * @param text the text to be transliterated
-     * @param index the position indices
-     * @param incremental if TRUE, then assume more characters may be inserted
-     * at index.limit, and postpone processing to accomodate future incoming
-     * characters
-     * @param rollback if TRUE and if incremental is TRUE, then perform special
-     * incremental processing, as described above, and undo partial
-     * transliterations where necessary.  If incremental is FALSE then this
-     * parameter is ignored.
-     */
-    virtual void filteredTransliterate(Replaceable& text,
-                                       UTransPosition& index,
-                                       UBool incremental,
-                                       UBool rollback) const;
-
-public:
-
-    /**
-     * Returns the length of the longest context required by this transliterator.
-     * This is <em>preceding</em> context.  The default implementation supplied
-     * by <code>Transliterator</code> returns zero; subclasses
-     * that use preceding context should override this method to return the
-     * correct value.  For example, if a transliterator translates "ddd" (where
-     * d is any digit) to "555" when preceded by "(ddd)", then the preceding
-     * context length is 5, the length of "(ddd)".
-     *
-     * @return The maximum number of preceding context characters this
-     * transliterator needs to examine
-     * @stable ICU 2.0
-     */
-    int32_t getMaximumContextLength(void) const;
-
-protected:
-
-    /**
-     * Method for subclasses to use to set the maximum context length.
-     * @param maxContextLength the new value to be set.
-     * @see #getMaximumContextLength
-     * @stable ICU 2.4
-     */
-    void setMaximumContextLength(int32_t maxContextLength);
-
-public:
-
-    /**
-     * Returns a programmatic identifier for this transliterator.
-     * If this identifier is passed to <code>createInstance()</code>, it
-     * will return this object, if it has been registered.
-     * @return a programmatic identifier for this transliterator.
-     * @see #registerInstance
-     * @see #registerFactory
-     * @see #getAvailableIDs
-     * @stable ICU 2.0
-     */
-    virtual const UnicodeString& getID(void) const;
-
-    /**
-     * Returns a name for this transliterator that is appropriate for
-     * display to the user in the default locale.  See {@link
-     * #getDisplayName } for details.
-     * @param ID     the string identifier for this transliterator
-     * @param result Output param to receive the display name
-     * @return       A reference to 'result'.
-     * @stable ICU 2.0
-     */
-    static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
-                                         UnicodeString& result);
-
-    /**
-     * Returns a name for this transliterator that is appropriate for
-     * display to the user in the given locale.  This name is taken
-     * from the locale resource data in the standard manner of the
-     * <code>java.text</code> package.
-     *
-     * <p>If no localized names exist in the system resource bundles,
-     * a name is synthesized using a localized
-     * <code>MessageFormat</code> pattern from the resource data.  The
-     * arguments to this pattern are an integer followed by one or two
-     * strings.  The integer is the number of strings, either 1 or 2.
-     * The strings are formed by splitting the ID for this
-     * transliterator at the first '-'.  If there is no '-', then the
-     * entire ID forms the only string.
-     * @param ID       the string identifier for this transliterator
-     * @param inLocale the Locale in which the display name should be
-     *                 localized.
-     * @param result   Output param to receive the display name
-     * @return         A reference to 'result'.
-     * @stable ICU 2.0
-     */
-    static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
-                                         const Locale& inLocale,
-                                         UnicodeString& result);
-
-    /**
-     * Returns the filter used by this transliterator, or <tt>NULL</tt>
-     * if this transliterator uses no filter.
-     * @return the filter used by this transliterator, or <tt>NULL</tt>
-     *         if this transliterator uses no filter.
-     * @stable ICU 2.0
-     */
-    const UnicodeFilter* getFilter(void) const;
-
-    /**
-     * Returns the filter used by this transliterator, or <tt>NULL</tt> if this
-     * transliterator uses no filter.  The caller must eventually delete the
-     * result.  After this call, this transliterator's filter is set to
-     * <tt>NULL</tt>.
-     * @return the filter used by this transliterator, or <tt>NULL</tt> if this
-     *         transliterator uses no filter.
-     * @stable ICU 2.4
-     */
-    UnicodeFilter* orphanFilter(void);
-
-    /**
-     * Changes the filter used by this transliterator.  If the filter
-     * is set to <tt>null</tt> then no filtering will occur.
-     *
-     * <p>Callers must take care if a transliterator is in use by
-     * multiple threads.  The filter should not be changed by one
-     * thread while another thread may be transliterating.
-     * @param adoptedFilter the new filter to be adopted.
-     * @stable ICU 2.0
-     */
-    void adoptFilter(UnicodeFilter* adoptedFilter);
-
-    /**
-     * Returns this transliterator's inverse.  See the class
-     * documentation for details.  This implementation simply inverts
-     * the two entities in the ID and attempts to retrieve the
-     * resulting transliterator.  That is, if <code>getID()</code>
-     * returns "A-B", then this method will return the result of
-     * <code>createInstance("B-A")</code>, or <code>null</code> if that
-     * call fails.
-     *
-     * <p>Subclasses with knowledge of their inverse may wish to
-     * override this method.
-     *
-     * @param status Output param to filled in with a success or an error.
-     * @return a transliterator that is an inverse, not necessarily
-     * exact, of this transliterator, or <code>null</code> if no such
-     * transliterator is registered.
-     * @see #registerInstance
-     * @stable ICU 2.0
-     */
-    Transliterator* createInverse(UErrorCode& status) const;
-
-    /**
-     * Returns a <code>Transliterator</code> object given its ID.
-     * The ID must be either a system transliterator ID or a ID registered
-     * using <code>registerInstance()</code>.
-     *
-     * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
-     * @param dir        either FORWARD or REVERSE.
-     * @param parseError Struct to recieve information on position
-     *                   of error if an error is encountered
-     * @param status     Output param to filled in with a success or an error.
-     * @return A <code>Transliterator</code> object with the given ID
-     * @see #registerInstance
-     * @see #getAvailableIDs
-     * @see #getID
-     * @stable ICU 2.0
-     */
-    static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
-                                          UTransDirection dir,
-                                          UParseError& parseError,
-                                          UErrorCode& status);
-
-    /**
-     * Returns a <code>Transliterator</code> object given its ID.
-     * The ID must be either a system transliterator ID or a ID registered
-     * using <code>registerInstance()</code>.
-     * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
-     * @param dir        either FORWARD or REVERSE.
-     * @param status     Output param to filled in with a success or an error.
-     * @return A <code>Transliterator</code> object with the given ID
-     * @stable ICU 2.0
-     */
-    static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
-                                          UTransDirection dir,
-                                          UErrorCode& status);
-
-    /**
-     * Returns a <code>Transliterator</code> object constructed from
-     * the given rule string.  This will be a RuleBasedTransliterator,
-     * if the rule string contains only rules, or a
-     * CompoundTransliterator, if it contains ID blocks, or a
-     * NullTransliterator, if it contains ID blocks which parse as
-     * empty for the given direction.
-     * @param ID            the id for the transliterator.
-     * @param rules         rules, separated by ';'
-     * @param dir           either FORWARD or REVERSE.
-     * @param parseError    Struct to recieve information on position
-     *                      of error if an error is encountered
-     * @param status        Output param set to success/failure code.
-     * @stable ICU 2.0
-     */
-    static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
-                                           const UnicodeString& rules,
-                                           UTransDirection dir,
-                                           UParseError& parseError,
-                                           UErrorCode& status);
-
-    /**
-     * Create a rule string that can be passed to createFromRules()
-     * to recreate this transliterator.
-     * @param result the string to receive the rules.  Previous
-     * contents will be deleted.
-     * @param escapeUnprintable if TRUE then convert unprintable
-     * character to their hex escape representations, \\uxxxx or
-     * \\Uxxxxxxxx.  Unprintable characters are those other than
-     * U+000A, U+0020..U+007E.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& toRules(UnicodeString& result,
-                                   UBool escapeUnprintable) const;
-
-    /**
-     * Return the number of elements that make up this transliterator.
-     * For example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
-     * were created, the return value of this method would be 3.
-     *
-     * <p>If this transliterator is not composed of other
-     * transliterators, then this method returns 1.
-     * @return the number of transliterators that compose this
-     * transliterator, or 1 if this transliterator is not composed of
-     * multiple transliterators
-     * @stable ICU 3.0
-     */
-    int32_t countElements() const;
-
-    /**
-     * Return an element that makes up this transliterator.  For
-     * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
-     * were created, the return value of this method would be one
-     * of the three transliterator objects that make up that
-     * transliterator: [NFD, Jamo-Latin, Latin-Greek].
-     *
-     * <p>If this transliterator is not composed of other
-     * transliterators, then this method will return a reference to
-     * this transliterator when given the index 0.
-     * @param index a value from 0..countElements()-1 indicating the
-     * transliterator to return
-     * @param ec input-output error code
-     * @return one of the transliterators that makes up this
-     * transliterator, if this transliterator is made up of multiple
-     * transliterators, otherwise a reference to this object if given
-     * an index of 0
-     * @stable ICU 3.0
-     */
-    const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
-
-    /**
-     * Returns the set of all characters that may be modified in the
-     * input text by this Transliterator.  This incorporates this
-     * object's current filter; if the filter is changed, the return
-     * value of this function will change.  The default implementation
-     * returns an empty set.  Some subclasses may override {@link
-     * #handleGetSourceSet } to return a more precise result.  The
-     * return result is approximate in any case and is intended for
-     * use by tests, tools, or utilities.
-     * @param result receives result set; previous contents lost
-     * @return a reference to result
-     * @see #getTargetSet
-     * @see #handleGetSourceSet
-     * @stable ICU 2.4
-     */
-    UnicodeSet& getSourceSet(UnicodeSet& result) const;
-
-    /**
-     * Framework method that returns the set of all characters that
-     * may be modified in the input text by this Transliterator,
-     * ignoring the effect of this object's filter.  The base class
-     * implementation returns the empty set.  Subclasses that wish to
-     * implement this should override this method.
-     * @return the set of characters that this transliterator may
-     * modify.  The set may be modified, so subclasses should return a
-     * newly-created object.
-     * @param result receives result set; previous contents lost
-     * @see #getSourceSet
-     * @see #getTargetSet
-     * @stable ICU 2.4
-     */
-    virtual void handleGetSourceSet(UnicodeSet& result) const;
-
-    /**
-     * Returns the set of all characters that may be generated as
-     * replacement text by this transliterator.  The default
-     * implementation returns the empty set.  Some subclasses may
-     * override this method to return a more precise result.  The
-     * return result is approximate in any case and is intended for
-     * use by tests, tools, or utilities requiring such
-     * meta-information.
-     * @param result receives result set; previous contents lost
-     * @return a reference to result
-     * @see #getTargetSet
-     * @stable ICU 2.4
-     */
-    virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
-
-public:
-
-    /**
-     * Registers a factory function that creates transliterators of
-     * a given ID.
-     * @param id the ID being registered
-     * @param factory a function pointer that will be copied and
-     * called later when the given ID is passed to createInstance()
-     * @param context a context pointer that will be stored and
-     * later passed to the factory function when an ID matching
-     * the registration ID is being instantiated with this factory.
-     * @stable ICU 2.0
-     */
-    static void U_EXPORT2 registerFactory(const UnicodeString& id,
-                                Factory factory,
-                                Token context);
-
-    /**
-     * Registers an instance <tt>obj</tt> of a subclass of
-     * <code>Transliterator</code> with the system.  When
-     * <tt>createInstance()</tt> is called with an ID string that is
-     * equal to <tt>obj->getID()</tt>, then <tt>obj->clone()</tt> is
-     * returned.
-     *
-     * After this call the Transliterator class owns the adoptedObj
-     * and will delete it.
-     *
-     * @param adoptedObj an instance of subclass of
-     * <code>Transliterator</code> that defines <tt>clone()</tt>
-     * @see #createInstance
-     * @see #registerFactory
-     * @see #unregister
-     * @stable ICU 2.0
-     */
-    static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
-
-    /**
-     * Registers an ID string as an alias of another ID string.
-     * That is, after calling this function, <tt>createInstance(aliasID)</tt>
-     * will return the same thing as <tt>createInstance(realID)</tt>.
-     * This is generally used to create shorter, more mnemonic aliases
-     * for long compound IDs.
-     *
-     * @param aliasID The new ID being registered.
-     * @param realID The ID that the new ID is to be an alias for.
-     * This can be a compound ID and can include filters and should
-     * refer to transliterators that have already been registered with
-     * the framework, although this isn't checked.
-     * @stable ICU 3.6
-     */
-     static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
-                                         const UnicodeString& realID);
-
-protected:
-
-    /**
-     * @internal
-     * @param id the ID being registered
-     * @param factory a function pointer that will be copied and
-     * called later when the given ID is passed to createInstance()
-     * @param context a context pointer that will be stored and
-     * later passed to the factory function when an ID matching
-     * the registration ID is being instantiated with this factory.
-     */
-    static void _registerFactory(const UnicodeString& id,
-                                 Factory factory,
-                                 Token context);
-
-    /**
-     * @internal
-     */
-    static void _registerInstance(Transliterator* adoptedObj);
-
-    /**
-     * @internal
-     */
-    static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
-
-    /**
-     * Register two targets as being inverses of one another.  For
-     * example, calling registerSpecialInverse("NFC", "NFD", true) causes
-     * Transliterator to form the following inverse relationships:
-     *
-     * <pre>NFC => NFD
-     * Any-NFC => Any-NFD
-     * NFD => NFC
-     * Any-NFD => Any-NFC</pre>
-     *
-     * (Without the special inverse registration, the inverse of NFC
-     * would be NFC-Any.)  Note that NFD is shorthand for Any-NFD, but
-     * that the presence or absence of "Any-" is preserved.
-     *
-     * <p>The relationship is symmetrical; registering (a, b) is
-     * equivalent to registering (b, a).
-     *
-     * <p>The relevant IDs must still be registered separately as
-     * factories or classes.
-     *
-     * <p>Only the targets are specified.  Special inverses always
-     * have the form Any-Target1 <=> Any-Target2.  The target should
-     * have canonical casing (the casing desired to be produced when
-     * an inverse is formed) and should contain no whitespace or other
-     * extraneous characters.
-     *
-     * @param target the target against which to register the inverse
-     * @param inverseTarget the inverse of target, that is
-     * Any-target.getInverse() => Any-inverseTarget
-     * @param bidirectional if true, register the reverse relation
-     * as well, that is, Any-inverseTarget.getInverse() => Any-target
-     * @internal
-     */
-    static void _registerSpecialInverse(const UnicodeString& target,
-                                        const UnicodeString& inverseTarget,
-                                        UBool bidirectional);
-
-public:
-
-    /**
-     * Unregisters a transliterator or class.  This may be either
-     * a system transliterator or a user transliterator or class.
-     * Any attempt to construct an unregistered transliterator based
-     * on its ID will fail.
-     *
-     * @param ID the ID of the transliterator or class
-     * @return the <code>Object</code> that was registered with
-     * <code>ID</code>, or <code>null</code> if none was
-     * @see #registerInstance
-     * @see #registerFactory
-     * @stable ICU 2.0
-     */
-    static void U_EXPORT2 unregister(const UnicodeString& ID);
-
-public:
-
-    /**
-     * Return a StringEnumeration over the IDs available at the time of the
-     * call, including user-registered IDs.
-     * @param ec input-output error code
-     * @return a newly-created StringEnumeration over the transliterators
-     * available at the time of the call. The caller should delete this object
-     * when done using it.
-     * @stable ICU 3.0
-     */
-    static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec);
-
-    /**
-     * Return the number of registered source specifiers.
-     * @return the number of registered source specifiers.
-     * @stable ICU 2.0
-     */
-    static int32_t U_EXPORT2 countAvailableSources(void);
-
-    /**
-     * Return a registered source specifier.
-     * @param index which specifier to return, from 0 to n-1, where
-     * n = countAvailableSources()
-     * @param result fill-in paramter to receive the source specifier.
-     * If index is out of range, result will be empty.
-     * @return reference to result
-     * @stable ICU 2.0
-     */
-    static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
-                                             UnicodeString& result);
-
-    /**
-     * Return the number of registered target specifiers for a given
-     * source specifier.
-     * @param source the given source specifier.
-     * @return the number of registered target specifiers for a given
-     *         source specifier.
-     * @stable ICU 2.0
-     */
-    static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
-
-    /**
-     * Return a registered target specifier for a given source.
-     * @param index which specifier to return, from 0 to n-1, where
-     * n = countAvailableTargets(source)
-     * @param source the source specifier
-     * @param result fill-in paramter to receive the target specifier.
-     * If source is invalid or if index is out of range, result will
-     * be empty.
-     * @return reference to result
-     * @stable ICU 2.0
-     */
-    static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
-                                             const UnicodeString& source,
-                                             UnicodeString& result);
-
-    /**
-     * Return the number of registered variant specifiers for a given
-     * source-target pair.
-     * @param source    the source specifiers.
-     * @param target    the target specifiers.
-     * @stable ICU 2.0
-     */
-    static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
-                                          const UnicodeString& target);
-
-    /**
-     * Return a registered variant specifier for a given source-target
-     * pair.
-     * @param index which specifier to return, from 0 to n-1, where
-     * n = countAvailableVariants(source, target)
-     * @param source the source specifier
-     * @param target the target specifier
-     * @param result fill-in paramter to receive the variant
-     * specifier.  If source is invalid or if target is invalid or if
-     * index is out of range, result will be empty.
-     * @return reference to result
-     * @stable ICU 2.0
-     */
-    static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
-                                              const UnicodeString& source,
-                                              const UnicodeString& target,
-                                              UnicodeString& result);
-
-protected:
-
-    /**
-     * Non-mutexed internal method
-     * @internal
-     */
-    static int32_t _countAvailableSources(void);
-
-    /**
-     * Non-mutexed internal method
-     * @internal
-     */
-    static UnicodeString& _getAvailableSource(int32_t index,
-                                              UnicodeString& result);
-
-    /**
-     * Non-mutexed internal method
-     * @internal
-     */
-    static int32_t _countAvailableTargets(const UnicodeString& source);
-
-    /**
-     * Non-mutexed internal method
-     * @internal
-     */
-    static UnicodeString& _getAvailableTarget(int32_t index,
-                                              const UnicodeString& source,
-                                              UnicodeString& result);
-
-    /**
-     * Non-mutexed internal method
-     * @internal
-     */
-    static int32_t _countAvailableVariants(const UnicodeString& source,
-                                           const UnicodeString& target);
-
-    /**
-     * Non-mutexed internal method
-     * @internal
-     */
-    static UnicodeString& _getAvailableVariant(int32_t index,
-                                               const UnicodeString& source,
-                                               const UnicodeString& target,
-                                               UnicodeString& result);
-
-protected:
-
-    /**
-     * Set the ID of this transliterators.  Subclasses shouldn't do
-     * this, unless the underlying script behavior has changed.
-     * @param id the new id t to be set.
-     * @stable ICU 2.4
-     */
-    void setID(const UnicodeString& id);
-
-public:
-
-    /**
-     * Return the class ID for this class.  This is useful only for
-     * comparing to a return value from getDynamicClassID().
-     * Note that Transliterator is an abstract base class, and therefor
-     * no fully constructed object will  have a dynamic
-     * UCLassID that equals the UClassID returned from
-     * TRansliterator::getStaticClassID().
-     * @return       The class ID for class Transliterator.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID <b>polymorphically</b>.  This method
-     * is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI.  Polymorphic operator==() and
-     * clone() methods call this method.
-     *
-     * <p>Concrete subclasses of Transliterator must use the
-     *    UOBJECT_DEFINE_RTTI_IMPLEMENTATION macro from
-     *    uobject.h to provide the RTTI functions.
-     *
-     * @return The class ID for this object. All objects of a given
-     * class have the same class ID.  Objects of other classes have
-     * different class IDs.
-     * @stable ICU 2.0
-     */
-    virtual UClassID getDynamicClassID(void) const = 0;
-
-private:
-    static UBool initializeRegistry(UErrorCode &status);
-
-public:
-    /**
-     * Return the number of IDs currently registered with the system.
-     * To retrieve the actual IDs, call getAvailableID(i) with
-     * i from 0 to countAvailableIDs() - 1.
-     * @return the number of IDs currently registered with the system.
-     * @obsolete ICU 3.4 use getAvailableIDs() instead
-     */
-    static int32_t U_EXPORT2 countAvailableIDs(void);
-
-    /**
-     * Return the index-th available ID.  index must be between 0
-     * and countAvailableIDs() - 1, inclusive.  If index is out of
-     * range, the result of getAvailableID(0) is returned.
-     * @param index the given ID index.
-     * @return      the index-th available ID.  index must be between 0
-     *              and countAvailableIDs() - 1, inclusive.  If index is out of
-     *              range, the result of getAvailableID(0) is returned.
-     * @obsolete ICU 3.4 use getAvailableIDs() instead; this function
-     * is not thread safe, since it returns a reference to storage that
-     * may become invalid if another thread calls unregister
-     */
-    static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
-};
-
-inline int32_t Transliterator::getMaximumContextLength(void) const {
-    return maximumContextLength;
-}
-
-inline void Transliterator::setID(const UnicodeString& id) {
-    ID = id;
-    // NUL-terminate the ID string, which is a non-aliased copy.
-    ID.append((UChar)0);
-    ID.truncate(ID.length()-1);
-}
-
-inline Transliterator::Token Transliterator::integerToken(int32_t i) {
-    Token t;
-    t.integer = i;
-    return t;
-}
-
-inline Transliterator::Token Transliterator::pointerToken(void* p) {
-    Token t;
-    t.pointer = p;
-    return t;
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_TRANSLITERATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/translit.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/translit.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/translit.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/translit.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1323 @@
+/*
+**********************************************************************
+* Copyright (C) 1999-2008, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef TRANSLIT_H
+#define TRANSLIT_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Tranforms text from one format to another.
+ */
+ 
+#if !UCONFIG_NO_TRANSLITERATION
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/parseerr.h"
+#include "unicode/utrans.h" // UTransPosition, UTransDirection
+#include "unicode/strenum.h"
+
+U_NAMESPACE_BEGIN
+
+class UnicodeFilter;
+class UnicodeSet;
+class CompoundTransliterator;
+class TransliteratorParser;
+class NormalizationTransliterator;
+class TransliteratorIDParser;
+
+/**
+ *
+ * <code>Transliterator</code> is an abstract class that
+ * transliterates text from one format to another.  The most common
+ * kind of transliterator is a script, or alphabet, transliterator.
+ * For example, a Russian to Latin transliterator changes Russian text
+ * written in Cyrillic characters to phonetically equivalent Latin
+ * characters.  It does not <em>translate</em> Russian to English!
+ * Transliteration, unlike translation, operates on characters, without
+ * reference to the meanings of words and sentences.
+ *
+ * <p>Although script conversion is its most common use, a
+ * transliterator can actually perform a more general class of tasks.
+ * In fact, <code>Transliterator</code> defines a very general API
+ * which specifies only that a segment of the input text is replaced
+ * by new text.  The particulars of this conversion are determined
+ * entirely by subclasses of <code>Transliterator</code>.
+ *
+ * <p><b>Transliterators are stateless</b>
+ *
+ * <p><code>Transliterator</code> objects are <em>stateless</em>; they
+ * retain no information between calls to
+ * <code>transliterate()</code>.  (However, this does <em>not</em>
+ * mean that threads may share transliterators without synchronizing
+ * them.  Transliterators are not immutable, so they must be
+ * synchronized when shared between threads.)  This might seem to
+ * limit the complexity of the transliteration operation.  In
+ * practice, subclasses perform complex transliterations by delaying
+ * the replacement of text until it is known that no other
+ * replacements are possible.  In other words, although the
+ * <code>Transliterator</code> objects are stateless, the source text
+ * itself embodies all the needed information, and delayed operation
+ * allows arbitrary complexity.
+ *
+ * <p><b>Batch transliteration</b>
+ *
+ * <p>The simplest way to perform transliteration is all at once, on a
+ * string of existing text.  This is referred to as <em>batch</em>
+ * transliteration.  For example, given a string <code>input</code>
+ * and a transliterator <code>t</code>, the call
+ *
+ * \htmlonly<blockquote>\endhtmlonly<code>String result = t.transliterate(input);
+ * </code>\htmlonly</blockquote>\endhtmlonly
+ *
+ * will transliterate it and return the result.  Other methods allow
+ * the client to specify a substring to be transliterated and to use
+ * {@link Replaceable } objects instead of strings, in order to
+ * preserve out-of-band information (such as text styles).
+ *
+ * <p><b>Keyboard transliteration</b>
+ *
+ * <p>Somewhat more involved is <em>keyboard</em>, or incremental
+ * transliteration.  This is the transliteration of text that is
+ * arriving from some source (typically the user's keyboard) one
+ * character at a time, or in some other piecemeal fashion.
+ *
+ * <p>In keyboard transliteration, a <code>Replaceable</code> buffer
+ * stores the text.  As text is inserted, as much as possible is
+ * transliterated on the fly.  This means a GUI that displays the
+ * contents of the buffer may show text being modified as each new
+ * character arrives.
+ *
+ * <p>Consider the simple <code>RuleBasedTransliterator</code>:
+ *
+ * \htmlonly<blockquote>\endhtmlonly<code>
+ * th&gt;{theta}<br>
+ * t&gt;{tau}
+ * </code>\htmlonly</blockquote>\endhtmlonly
+ *
+ * When the user types 't', nothing will happen, since the
+ * transliterator is waiting to see if the next character is 'h'.  To
+ * remedy this, we introduce the notion of a cursor, marked by a '|'
+ * in the output string:
+ *
+ * \htmlonly<blockquote>\endhtmlonly<code>
+ * t&gt;|{tau}<br>
+ * {tau}h&gt;{theta}
+ * </code>\htmlonly</blockquote>\endhtmlonly
+ *
+ * Now when the user types 't', tau appears, and if the next character
+ * is 'h', the tau changes to a theta.  This is accomplished by
+ * maintaining a cursor position (independent of the insertion point,
+ * and invisible in the GUI) across calls to
+ * <code>transliterate()</code>.  Typically, the cursor will
+ * be coincident with the insertion point, but in a case like the one
+ * above, it will precede the insertion point.
+ *
+ * <p>Keyboard transliteration methods maintain a set of three indices
+ * that are updated with each call to
+ * <code>transliterate()</code>, including the cursor, start,
+ * and limit.  Since these indices are changed by the method, they are
+ * passed in an <code>int[]</code> array. The <code>START</code> index
+ * marks the beginning of the substring that the transliterator will
+ * look at.  It is advanced as text becomes committed (but it is not
+ * the committed index; that's the <code>CURSOR</code>).  The
+ * <code>CURSOR</code> index, described above, marks the point at
+ * which the transliterator last stopped, either because it reached
+ * the end, or because it required more characters to disambiguate
+ * between possible inputs.  The <code>CURSOR</code> can also be
+ * explicitly set by rules in a <code>RuleBasedTransliterator</code>.
+ * Any characters before the <code>CURSOR</code> index are frozen;
+ * future keyboard transliteration calls within this input sequence
+ * will not change them.  New text is inserted at the
+ * <code>LIMIT</code> index, which marks the end of the substring that
+ * the transliterator looks at.
+ *
+ * <p>Because keyboard transliteration assumes that more characters
+ * are to arrive, it is conservative in its operation.  It only
+ * transliterates when it can do so unambiguously.  Otherwise it waits
+ * for more characters to arrive.  When the client code knows that no
+ * more characters are forthcoming, perhaps because the user has
+ * performed some input termination operation, then it should call
+ * <code>finishTransliteration()</code> to complete any
+ * pending transliterations.
+ *
+ * <p><b>Inverses</b>
+ *
+ * <p>Pairs of transliterators may be inverses of one another.  For
+ * example, if transliterator <b>A</b> transliterates characters by
+ * incrementing their Unicode value (so "abc" -> "def"), and
+ * transliterator <b>B</b> decrements character values, then <b>A</b>
+ * is an inverse of <b>B</b> and vice versa.  If we compose <b>A</b>
+ * with <b>B</b> in a compound transliterator, the result is the
+ * indentity transliterator, that is, a transliterator that does not
+ * change its input text.
+ *
+ * The <code>Transliterator</code> method <code>getInverse()</code>
+ * returns a transliterator's inverse, if one exists, or
+ * <code>null</code> otherwise.  However, the result of
+ * <code>getInverse()</code> usually will <em>not</em> be a true
+ * mathematical inverse.  This is because true inverse transliterators
+ * are difficult to formulate.  For example, consider two
+ * transliterators: <b>AB</b>, which transliterates the character 'A'
+ * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'.  It might
+ * seem that these are exact inverses, since
+ *
+ * \htmlonly<blockquote>\endhtmlonly"A" x <b>AB</b> -> "B"<br>
+ * "B" x <b>BA</b> -> "A"\htmlonly</blockquote>\endhtmlonly
+ *
+ * where 'x' represents transliteration.  However,
+ *
+ * \htmlonly<blockquote>\endhtmlonly"ABCD" x <b>AB</b> -> "BBCD"<br>
+ * "BBCD" x <b>BA</b> -> "AACD"\htmlonly</blockquote>\endhtmlonly
+ *
+ * so <b>AB</b> composed with <b>BA</b> is not the
+ * identity. Nonetheless, <b>BA</b> may be usefully considered to be
+ * <b>AB</b>'s inverse, and it is on this basis that
+ * <b>AB</b><code>.getInverse()</code> could legitimately return
+ * <b>BA</b>.
+ *
+ * <p><b>IDs and display names</b>
+ *
+ * <p>A transliterator is designated by a short identifier string or
+ * <em>ID</em>.  IDs follow the format <em>source-destination</em>,
+ * where <em>source</em> describes the entity being replaced, and
+ * <em>destination</em> describes the entity replacing
+ * <em>source</em>.  The entities may be the names of scripts,
+ * particular sequences of characters, or whatever else it is that the
+ * transliterator converts to or from.  For example, a transliterator
+ * from Russian to Latin might be named "Russian-Latin".  A
+ * transliterator from keyboard escape sequences to Latin-1 characters
+ * might be named "KeyboardEscape-Latin1".  By convention, system
+ * entity names are in English, with the initial letters of words
+ * capitalized; user entity names may follow any format so long as
+ * they do not contain dashes.
+ *
+ * <p>In addition to programmatic IDs, transliterator objects have
+ * display names for presentation in user interfaces, returned by
+ * {@link #getDisplayName }.
+ *
+ * <p><b>Factory methods and registration</b>
+ *
+ * <p>In general, client code should use the factory method
+ * {@link #createInstance } to obtain an instance of a
+ * transliterator given its ID.  Valid IDs may be enumerated using
+ * <code>getAvailableIDs()</code>.  Since transliterators are mutable,
+ * multiple calls to {@link #createInstance } with the same ID will
+ * return distinct objects.
+ *
+ * <p>In addition to the system transliterators registered at startup,
+ * user transliterators may be registered by calling
+ * <code>registerInstance()</code> at run time.  A registered instance
+ * acts a template; future calls to {@link #createInstance } with the ID
+ * of the registered object return clones of that object.  Thus any
+ * object passed to <tt>registerInstance()</tt> must implement
+ * <tt>clone()</tt> propertly.  To register a transliterator subclass
+ * without instantiating it (until it is needed), users may call
+ * {@link #registerFactory }.  In this case, the objects are
+ * instantiated by invoking the zero-argument public constructor of
+ * the class.
+ *
+ * <p><b>Subclassing</b>
+ *
+ * Subclasses must implement the abstract method
+ * <code>handleTransliterate()</code>.  <p>Subclasses should override
+ * the <code>transliterate()</code> method taking a
+ * <code>Replaceable</code> and the <code>transliterate()</code>
+ * method taking a <code>String</code> and <code>StringBuffer</code>
+ * if the performance of these methods can be improved over the
+ * performance obtained by the default implementations in this class.
+ *
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+class U_I18N_API Transliterator : public UObject {
+
+private:
+
+    /**
+     * Programmatic name, e.g., "Latin-Arabic".
+     */
+    UnicodeString ID;
+
+    /**
+     * This transliterator's filter.  Any character for which
+     * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    UnicodeFilter* filter;
+
+    int32_t maximumContextLength;
+
+ public:
+
+    /**
+     * A context integer or pointer for a factory function, passed by
+     * value.
+     * @stable ICU 2.4
+     */
+    union Token {
+        /**
+         * This token, interpreted as a 32-bit integer.
+         * @stable ICU 2.4
+         */
+        int32_t integer;
+        /**
+         * This token, interpreted as a native pointer.
+         * @stable ICU 2.4
+         */
+        void*   pointer;
+    };
+
+    /**
+     * Return a token containing an integer.
+     * @return a token containing an integer.
+     * @internal
+     */
+    inline static Token integerToken(int32_t);
+
+    /**
+     * Return a token containing a pointer.
+     * @return a token containing a pointer.
+     * @internal
+     */
+    inline static Token pointerToken(void*);
+
+    /**
+     * A function that creates and returns a Transliterator.  When
+     * invoked, it will be passed the ID string that is being
+     * instantiated, together with the context pointer that was passed
+     * in when the factory function was first registered.  Many
+     * factory functions will ignore both parameters, however,
+     * functions that are registered to more than one ID may use the
+     * ID or the context parameter to parameterize the transliterator
+     * they create.
+     * @param ID      the string identifier for this transliterator
+     * @param context a context pointer that will be stored and
+     *                later passed to the factory function when an ID matching
+     *                the registration ID is being instantiated with this factory.
+     * @stable ICU 2.4
+     */
+    typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
+
+protected:
+
+    /**
+     * Default constructor.
+     * @param ID the string identifier for this transliterator
+     * @param adoptedFilter the filter.  Any character for which
+     * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @stable ICU 2.4
+     */
+    Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
+
+    /**
+     * Copy constructor.
+     * @stable ICU 2.4
+     */
+    Transliterator(const Transliterator&);
+
+    /**
+     * Assignment operator.
+     * @stable ICU 2.4
+     */
+    Transliterator& operator=(const Transliterator&);
+
+    /**
+     * Create a transliterator from a basic ID.  This is an ID
+     * containing only the forward direction source, target, and
+     * variant.
+     * @param id a basic ID of the form S-T or S-T/V.
+     * @param canon canonical ID to assign to the object, or
+     * NULL to leave the ID unchanged
+     * @return a newly created Transliterator or null if the ID is
+     * invalid.
+     * @stable ICU 2.4
+     */
+    static Transliterator* createBasicInstance(const UnicodeString& id,
+                                               const UnicodeString* canon);
+
+    friend class TransliteratorParser; // for parseID()
+    friend class TransliteratorIDParser; // for createBasicInstance()
+    friend class TransliteratorAlias; // for setID()
+
+public:
+
+    /**
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~Transliterator();
+
+    /**
+     * Implements Cloneable.
+     * All subclasses are encouraged to implement this method if it is
+     * possible and reasonable to do so.  Subclasses that are to be
+     * registered with the system using <tt>registerInstance()</tt>
+     * are required to implement this method.  If a subclass does not
+     * implement clone() properly and is registered with the system
+     * using registerInstance(), then the default clone() implementation
+     * will return null, and calls to createInstance() will fail.
+     *
+     * @return a copy of the object.
+     * @see #registerInstance
+     * @stable ICU 2.0
+     */
+    virtual Transliterator* clone() const;
+
+    /**
+     * Transliterates a segment of a string, with optional filtering.
+     *
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return The new limit index.  The text previously occupying <code>[start,
+     * limit)</code> has been transliterated, possibly to a string of a different
+     * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where
+     * <em>new-limit</em> is the return value. If the input offsets are out of bounds,
+     * the returned value is -1 and the input string remains unchanged.
+     * @stable ICU 2.0
+     */
+    virtual int32_t transliterate(Replaceable& text,
+                                  int32_t start, int32_t limit) const;
+
+    /**
+     * Transliterates an entire string in place. Convenience method.
+     * @param text the string to be transliterated
+     * @stable ICU 2.0
+     */
+    virtual void transliterate(Replaceable& text) const;
+
+    /**
+     * Transliterates the portion of the text buffer that can be
+     * transliterated unambiguosly after new text has been inserted,
+     * typically as a result of a keyboard event.  The new text in
+     * <code>insertion</code> will be inserted into <code>text</code>
+     * at <code>index.limit</code>, advancing
+     * <code>index.limit</code> by <code>insertion.length()</code>.
+     * Then the transliterator will try to transliterate characters of
+     * <code>text</code> between <code>index.cursor</code> and
+     * <code>index.limit</code>.  Characters before
+     * <code>index.cursor</code> will not be changed.
+     *
+     * <p>Upon return, values in <code>index</code> will be updated.
+     * <code>index.start</code> will be advanced to the first
+     * character that future calls to this method will read.
+     * <code>index.cursor</code> and <code>index.limit</code> will
+     * be adjusted to delimit the range of text that future calls to
+     * this method may change.
+     *
+     * <p>Typical usage of this method begins with an initial call
+     * with <code>index.start</code> and <code>index.limit</code>
+     * set to indicate the portion of <code>text</code> to be
+     * transliterated, and <code>index.cursor == index.start</code>.
+     * Thereafter, <code>index</code> can be used without
+     * modification in future calls, provided that all changes to
+     * <code>text</code> are made via this method.
+     *
+     * <p>This method assumes that future calls may be made that will
+     * insert new text into the buffer.  As a result, it only performs
+     * unambiguous transliterations.  After the last call to this
+     * method, there may be untransliterated text that is waiting for
+     * more input to resolve an ambiguity.  In order to perform these
+     * pending transliterations, clients should call {@link
+     * #finishTransliteration } after the last call to this
+     * method has been made.
+     *
+     * @param text the buffer holding transliterated and untransliterated text
+     * @param index an array of three integers.
+     *
+     * <ul><li><code>index.start</code>: the beginning index,
+     * inclusive; <code>0 <= index.start <= index.limit</code>.
+     *
+     * <li><code>index.limit</code>: the ending index, exclusive;
+     * <code>index.start <= index.limit <= text.length()</code>.
+     * <code>insertion</code> is inserted at
+     * <code>index.limit</code>.
+     *
+     * <li><code>index.cursor</code>: the next character to be
+     * considered for transliteration; <code>index.start <=
+     * index.cursor <= index.limit</code>.  Characters before
+     * <code>index.cursor</code> will not be changed by future calls
+     * to this method.</ul>
+     *
+     * @param insertion text to be inserted and possibly
+     * transliterated into the translation buffer at
+     * <code>index.limit</code>.  If <code>null</code> then no text
+     * is inserted.
+     * @param status    Output param to filled in with a success or an error.
+     * @see #handleTransliterate
+     * @exception IllegalArgumentException if <code>index</code>
+     * is invalid
+     * @see UTransPosition
+     * @stable ICU 2.0
+     */
+    virtual void transliterate(Replaceable& text, UTransPosition& index,
+                               const UnicodeString& insertion,
+                               UErrorCode& status) const;
+
+    /**
+     * Transliterates the portion of the text buffer that can be
+     * transliterated unambiguosly after a new character has been
+     * inserted, typically as a result of a keyboard event.  This is a
+     * convenience method; see {@link
+     * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const}
+     * for details.
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     * @param index an array of three integers.  See {@link
+     * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }.
+     * @param insertion text to be inserted and possibly
+     * transliterated into the translation buffer at
+     * <code>index.limit</code>.
+     * @param status    Output param to filled in with a success or an error.
+     * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const
+     * @stable ICU 2.0
+     */
+    virtual void transliterate(Replaceable& text, UTransPosition& index,
+                               UChar32 insertion,
+                               UErrorCode& status) const;
+
+    /**
+     * Transliterates the portion of the text buffer that can be
+     * transliterated unambiguosly.  This is a convenience method; see
+     * {@link
+     * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }
+     * for details.
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     * @param index an array of three integers.  See {@link
+     * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }.
+     * @param status    Output param to filled in with a success or an error.
+     * @see #transliterate(Replaceable, int[], String)
+     * @stable ICU 2.0
+     */
+    virtual void transliterate(Replaceable& text, UTransPosition& index,
+                               UErrorCode& status) const;
+
+    /**
+     * Finishes any pending transliterations that were waiting for
+     * more characters.  Clients should call this method as the last
+     * call after a sequence of one or more calls to
+     * <code>transliterate()</code>.
+     * @param text the buffer holding transliterated and
+     * untransliterated text.
+     * @param index the array of indices previously passed to {@link
+     * #transliterate }
+     * @stable ICU 2.0
+     */
+    virtual void finishTransliteration(Replaceable& text,
+                                       UTransPosition& index) const;
+
+private:
+
+    /**
+     * This internal method does incremental transliteration.  If the
+     * 'insertion' is non-null then we append it to 'text' before
+     * proceeding.  This method calls through to the pure virtual
+     * framework method handleTransliterate() to do the actual
+     * work.
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     * @param index an array of three integers.  See {@link
+     * #transliterate(Replaceable, int[], String)}.
+     * @param insertion text to be inserted and possibly
+     * transliterated into the translation buffer at
+     * <code>index.limit</code>.
+     * @param status    Output param to filled in with a success or an error.
+     */
+    void _transliterate(Replaceable& text,
+                        UTransPosition& index,
+                        const UnicodeString* insertion,
+                        UErrorCode &status) const;
+
+protected:
+
+    /**
+     * Abstract method that concrete subclasses define to implement
+     * their transliteration algorithm.  This method handles both
+     * incremental and non-incremental transliteration.  Let
+     * <code>originalStart</code> refer to the value of
+     * <code>pos.start</code> upon entry.
+     *
+     * <ul>
+     *  <li>If <code>incremental</code> is false, then this method
+     *  should transliterate all characters between
+     *  <code>pos.start</code> and <code>pos.limit</code>. Upon return
+     *  <code>pos.start</code> must == <code> pos.limit</code>.</li>
+     *
+     *  <li>If <code>incremental</code> is true, then this method
+     *  should transliterate all characters between
+     *  <code>pos.start</code> and <code>pos.limit</code> that can be
+     *  unambiguously transliterated, regardless of future insertions
+     *  of text at <code>pos.limit</code>.  Upon return,
+     *  <code>pos.start</code> should be in the range
+     *  [<code>originalStart</code>, <code>pos.limit</code>).
+     *  <code>pos.start</code> should be positioned such that
+     *  characters [<code>originalStart</code>, <code>
+     *  pos.start</code>) will not be changed in the future by this
+     *  transliterator and characters [<code>pos.start</code>,
+     *  <code>pos.limit</code>) are unchanged.</li>
+     * </ul>
+     *
+     * <p>Implementations of this method should also obey the
+     * following invariants:</p>
+     *
+     * <ul>
+     *  <li> <code>pos.limit</code> and <code>pos.contextLimit</code>
+     *  should be updated to reflect changes in length of the text
+     *  between <code>pos.start</code> and <code>pos.limit</code>. The
+     *  difference <code> pos.contextLimit - pos.limit</code> should
+     *  not change.</li>
+     *
+     *  <li><code>pos.contextStart</code> should not change.</li>
+     *
+     *  <li>Upon return, neither <code>pos.start</code> nor
+     *  <code>pos.limit</code> should be less than
+     *  <code>originalStart</code>.</li>
+     *
+     *  <li>Text before <code>originalStart</code> and text after
+     *  <code>pos.limit</code> should not change.</li>
+     *
+     *  <li>Text before <code>pos.contextStart</code> and text after
+     *  <code> pos.contextLimit</code> should be ignored.</li>
+     * </ul>
+     *
+     * <p>Subclasses may safely assume that all characters in
+     * [<code>pos.start</code>, <code>pos.limit</code>) are filtered.
+     * In other words, the filter has already been applied by the time
+     * this method is called.  See
+     * <code>filteredTransliterate()</code>.
+     *
+     * <p>This method is <b>not</b> for public consumption.  Calling
+     * this method directly will transliterate
+     * [<code>pos.start</code>, <code>pos.limit</code>) without
+     * applying the filter. End user code should call <code>
+     * transliterate()</code> instead of this method. Subclass code
+     * and wrapping transliterators should call
+     * <code>filteredTransliterate()</code> instead of this method.<p>
+     *
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     *
+     * @param pos the indices indicating the start, limit, context
+     * start, and context limit of the text.
+     *
+     * @param incremental if true, assume more text may be inserted at
+     * <code>pos.limit</code> and act accordingly.  Otherwise,
+     * transliterate all text between <code>pos.start</code> and
+     * <code>pos.limit</code> and move <code>pos.start</code> up to
+     * <code>pos.limit</code>.
+     *
+     * @see #transliterate
+     * @stable ICU 2.4
+     */
+    virtual void handleTransliterate(Replaceable& text,
+                                     UTransPosition& pos,
+                                     UBool incremental) const = 0;
+
+public:
+    /**
+     * Transliterate a substring of text, as specified by index, taking filters
+     * into account.  This method is for subclasses that need to delegate to
+     * another transliterator, such as CompoundTransliterator.
+     * @param text the text to be transliterated
+     * @param index the position indices
+     * @param incremental if TRUE, then assume more characters may be inserted
+     * at index.limit, and postpone processing to accomodate future incoming
+     * characters
+     * @stable ICU 2.4
+     */
+    virtual void filteredTransliterate(Replaceable& text,
+                                       UTransPosition& index,
+                                       UBool incremental) const;
+
+private:
+
+    /**
+     * Top-level transliteration method, handling filtering, incremental and
+     * non-incremental transliteration, and rollback.  All transliteration
+     * public API methods eventually call this method with a rollback argument
+     * of TRUE.  Other entities may call this method but rollback should be
+     * FALSE.
+     *
+     * <p>If this transliterator has a filter, break up the input text into runs
+     * of unfiltered characters.  Pass each run to
+     * <subclass>.handleTransliterate().
+     *
+     * <p>In incremental mode, if rollback is TRUE, perform a special
+     * incremental procedure in which several passes are made over the input
+     * text, adding one character at a time, and committing successful
+     * transliterations as they occur.  Unsuccessful transliterations are rolled
+     * back and retried with additional characters to give correct results.
+     *
+     * @param text the text to be transliterated
+     * @param index the position indices
+     * @param incremental if TRUE, then assume more characters may be inserted
+     * at index.limit, and postpone processing to accomodate future incoming
+     * characters
+     * @param rollback if TRUE and if incremental is TRUE, then perform special
+     * incremental processing, as described above, and undo partial
+     * transliterations where necessary.  If incremental is FALSE then this
+     * parameter is ignored.
+     */
+    virtual void filteredTransliterate(Replaceable& text,
+                                       UTransPosition& index,
+                                       UBool incremental,
+                                       UBool rollback) const;
+
+public:
+
+    /**
+     * Returns the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.  The default implementation supplied
+     * by <code>Transliterator</code> returns zero; subclasses
+     * that use preceding context should override this method to return the
+     * correct value.  For example, if a transliterator translates "ddd" (where
+     * d is any digit) to "555" when preceded by "(ddd)", then the preceding
+     * context length is 5, the length of "(ddd)".
+     *
+     * @return The maximum number of preceding context characters this
+     * transliterator needs to examine
+     * @stable ICU 2.0
+     */
+    int32_t getMaximumContextLength(void) const;
+
+protected:
+
+    /**
+     * Method for subclasses to use to set the maximum context length.
+     * @param maxContextLength the new value to be set.
+     * @see #getMaximumContextLength
+     * @stable ICU 2.4
+     */
+    void setMaximumContextLength(int32_t maxContextLength);
+
+public:
+
+    /**
+     * Returns a programmatic identifier for this transliterator.
+     * If this identifier is passed to <code>createInstance()</code>, it
+     * will return this object, if it has been registered.
+     * @return a programmatic identifier for this transliterator.
+     * @see #registerInstance
+     * @see #registerFactory
+     * @see #getAvailableIDs
+     * @stable ICU 2.0
+     */
+    virtual const UnicodeString& getID(void) const;
+
+    /**
+     * Returns a name for this transliterator that is appropriate for
+     * display to the user in the default locale.  See {@link
+     * #getDisplayName } for details.
+     * @param ID     the string identifier for this transliterator
+     * @param result Output param to receive the display name
+     * @return       A reference to 'result'.
+     * @stable ICU 2.0
+     */
+    static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
+                                         UnicodeString& result);
+
+    /**
+     * Returns a name for this transliterator that is appropriate for
+     * display to the user in the given locale.  This name is taken
+     * from the locale resource data in the standard manner of the
+     * <code>java.text</code> package.
+     *
+     * <p>If no localized names exist in the system resource bundles,
+     * a name is synthesized using a localized
+     * <code>MessageFormat</code> pattern from the resource data.  The
+     * arguments to this pattern are an integer followed by one or two
+     * strings.  The integer is the number of strings, either 1 or 2.
+     * The strings are formed by splitting the ID for this
+     * transliterator at the first '-'.  If there is no '-', then the
+     * entire ID forms the only string.
+     * @param ID       the string identifier for this transliterator
+     * @param inLocale the Locale in which the display name should be
+     *                 localized.
+     * @param result   Output param to receive the display name
+     * @return         A reference to 'result'.
+     * @stable ICU 2.0
+     */
+    static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
+                                         const Locale& inLocale,
+                                         UnicodeString& result);
+
+    /**
+     * Returns the filter used by this transliterator, or <tt>NULL</tt>
+     * if this transliterator uses no filter.
+     * @return the filter used by this transliterator, or <tt>NULL</tt>
+     *         if this transliterator uses no filter.
+     * @stable ICU 2.0
+     */
+    const UnicodeFilter* getFilter(void) const;
+
+    /**
+     * Returns the filter used by this transliterator, or <tt>NULL</tt> if this
+     * transliterator uses no filter.  The caller must eventually delete the
+     * result.  After this call, this transliterator's filter is set to
+     * <tt>NULL</tt>.
+     * @return the filter used by this transliterator, or <tt>NULL</tt> if this
+     *         transliterator uses no filter.
+     * @stable ICU 2.4
+     */
+    UnicodeFilter* orphanFilter(void);
+
+    /**
+     * Changes the filter used by this transliterator.  If the filter
+     * is set to <tt>null</tt> then no filtering will occur.
+     *
+     * <p>Callers must take care if a transliterator is in use by
+     * multiple threads.  The filter should not be changed by one
+     * thread while another thread may be transliterating.
+     * @param adoptedFilter the new filter to be adopted.
+     * @stable ICU 2.0
+     */
+    void adoptFilter(UnicodeFilter* adoptedFilter);
+
+    /**
+     * Returns this transliterator's inverse.  See the class
+     * documentation for details.  This implementation simply inverts
+     * the two entities in the ID and attempts to retrieve the
+     * resulting transliterator.  That is, if <code>getID()</code>
+     * returns "A-B", then this method will return the result of
+     * <code>createInstance("B-A")</code>, or <code>null</code> if that
+     * call fails.
+     *
+     * <p>Subclasses with knowledge of their inverse may wish to
+     * override this method.
+     *
+     * @param status Output param to filled in with a success or an error.
+     * @return a transliterator that is an inverse, not necessarily
+     * exact, of this transliterator, or <code>null</code> if no such
+     * transliterator is registered.
+     * @see #registerInstance
+     * @stable ICU 2.0
+     */
+    Transliterator* createInverse(UErrorCode& status) const;
+
+    /**
+     * Returns a <code>Transliterator</code> object given its ID.
+     * The ID must be either a system transliterator ID or a ID registered
+     * using <code>registerInstance()</code>.
+     *
+     * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
+     * @param dir        either FORWARD or REVERSE.
+     * @param parseError Struct to recieve information on position
+     *                   of error if an error is encountered
+     * @param status     Output param to filled in with a success or an error.
+     * @return A <code>Transliterator</code> object with the given ID
+     * @see #registerInstance
+     * @see #getAvailableIDs
+     * @see #getID
+     * @stable ICU 2.0
+     */
+    static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
+                                          UTransDirection dir,
+                                          UParseError& parseError,
+                                          UErrorCode& status);
+
+    /**
+     * Returns a <code>Transliterator</code> object given its ID.
+     * The ID must be either a system transliterator ID or a ID registered
+     * using <code>registerInstance()</code>.
+     * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
+     * @param dir        either FORWARD or REVERSE.
+     * @param status     Output param to filled in with a success or an error.
+     * @return A <code>Transliterator</code> object with the given ID
+     * @stable ICU 2.0
+     */
+    static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
+                                          UTransDirection dir,
+                                          UErrorCode& status);
+
+    /**
+     * Returns a <code>Transliterator</code> object constructed from
+     * the given rule string.  This will be a RuleBasedTransliterator,
+     * if the rule string contains only rules, or a
+     * CompoundTransliterator, if it contains ID blocks, or a
+     * NullTransliterator, if it contains ID blocks which parse as
+     * empty for the given direction.
+     * @param ID            the id for the transliterator.
+     * @param rules         rules, separated by ';'
+     * @param dir           either FORWARD or REVERSE.
+     * @param parseError    Struct to recieve information on position
+     *                      of error if an error is encountered
+     * @param status        Output param set to success/failure code.
+     * @stable ICU 2.0
+     */
+    static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
+                                           const UnicodeString& rules,
+                                           UTransDirection dir,
+                                           UParseError& parseError,
+                                           UErrorCode& status);
+
+    /**
+     * Create a rule string that can be passed to createFromRules()
+     * to recreate this transliterator.
+     * @param result the string to receive the rules.  Previous
+     * contents will be deleted.
+     * @param escapeUnprintable if TRUE then convert unprintable
+     * character to their hex escape representations, \\uxxxx or
+     * \\Uxxxxxxxx.  Unprintable characters are those other than
+     * U+000A, U+0020..U+007E.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& toRules(UnicodeString& result,
+                                   UBool escapeUnprintable) const;
+
+    /**
+     * Return the number of elements that make up this transliterator.
+     * For example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
+     * were created, the return value of this method would be 3.
+     *
+     * <p>If this transliterator is not composed of other
+     * transliterators, then this method returns 1.
+     * @return the number of transliterators that compose this
+     * transliterator, or 1 if this transliterator is not composed of
+     * multiple transliterators
+     * @stable ICU 3.0
+     */
+    int32_t countElements() const;
+
+    /**
+     * Return an element that makes up this transliterator.  For
+     * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
+     * were created, the return value of this method would be one
+     * of the three transliterator objects that make up that
+     * transliterator: [NFD, Jamo-Latin, Latin-Greek].
+     *
+     * <p>If this transliterator is not composed of other
+     * transliterators, then this method will return a reference to
+     * this transliterator when given the index 0.
+     * @param index a value from 0..countElements()-1 indicating the
+     * transliterator to return
+     * @param ec input-output error code
+     * @return one of the transliterators that makes up this
+     * transliterator, if this transliterator is made up of multiple
+     * transliterators, otherwise a reference to this object if given
+     * an index of 0
+     * @stable ICU 3.0
+     */
+    const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
+
+    /**
+     * Returns the set of all characters that may be modified in the
+     * input text by this Transliterator.  This incorporates this
+     * object's current filter; if the filter is changed, the return
+     * value of this function will change.  The default implementation
+     * returns an empty set.  Some subclasses may override {@link
+     * #handleGetSourceSet } to return a more precise result.  The
+     * return result is approximate in any case and is intended for
+     * use by tests, tools, or utilities.
+     * @param result receives result set; previous contents lost
+     * @return a reference to result
+     * @see #getTargetSet
+     * @see #handleGetSourceSet
+     * @stable ICU 2.4
+     */
+    UnicodeSet& getSourceSet(UnicodeSet& result) const;
+
+    /**
+     * Framework method that returns the set of all characters that
+     * may be modified in the input text by this Transliterator,
+     * ignoring the effect of this object's filter.  The base class
+     * implementation returns the empty set.  Subclasses that wish to
+     * implement this should override this method.
+     * @return the set of characters that this transliterator may
+     * modify.  The set may be modified, so subclasses should return a
+     * newly-created object.
+     * @param result receives result set; previous contents lost
+     * @see #getSourceSet
+     * @see #getTargetSet
+     * @stable ICU 2.4
+     */
+    virtual void handleGetSourceSet(UnicodeSet& result) const;
+
+    /**
+     * Returns the set of all characters that may be generated as
+     * replacement text by this transliterator.  The default
+     * implementation returns the empty set.  Some subclasses may
+     * override this method to return a more precise result.  The
+     * return result is approximate in any case and is intended for
+     * use by tests, tools, or utilities requiring such
+     * meta-information.
+     * @param result receives result set; previous contents lost
+     * @return a reference to result
+     * @see #getTargetSet
+     * @stable ICU 2.4
+     */
+    virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
+
+public:
+
+    /**
+     * Registers a factory function that creates transliterators of
+     * a given ID.
+     * @param id the ID being registered
+     * @param factory a function pointer that will be copied and
+     * called later when the given ID is passed to createInstance()
+     * @param context a context pointer that will be stored and
+     * later passed to the factory function when an ID matching
+     * the registration ID is being instantiated with this factory.
+     * @stable ICU 2.0
+     */
+    static void U_EXPORT2 registerFactory(const UnicodeString& id,
+                                Factory factory,
+                                Token context);
+
+    /**
+     * Registers an instance <tt>obj</tt> of a subclass of
+     * <code>Transliterator</code> with the system.  When
+     * <tt>createInstance()</tt> is called with an ID string that is
+     * equal to <tt>obj->getID()</tt>, then <tt>obj->clone()</tt> is
+     * returned.
+     *
+     * After this call the Transliterator class owns the adoptedObj
+     * and will delete it.
+     *
+     * @param adoptedObj an instance of subclass of
+     * <code>Transliterator</code> that defines <tt>clone()</tt>
+     * @see #createInstance
+     * @see #registerFactory
+     * @see #unregister
+     * @stable ICU 2.0
+     */
+    static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
+
+    /**
+     * Registers an ID string as an alias of another ID string.
+     * That is, after calling this function, <tt>createInstance(aliasID)</tt>
+     * will return the same thing as <tt>createInstance(realID)</tt>.
+     * This is generally used to create shorter, more mnemonic aliases
+     * for long compound IDs.
+     *
+     * @param aliasID The new ID being registered.
+     * @param realID The ID that the new ID is to be an alias for.
+     * This can be a compound ID and can include filters and should
+     * refer to transliterators that have already been registered with
+     * the framework, although this isn't checked.
+     * @stable ICU 3.6
+     */
+     static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
+                                         const UnicodeString& realID);
+
+protected:
+
+    /**
+     * @internal
+     * @param id the ID being registered
+     * @param factory a function pointer that will be copied and
+     * called later when the given ID is passed to createInstance()
+     * @param context a context pointer that will be stored and
+     * later passed to the factory function when an ID matching
+     * the registration ID is being instantiated with this factory.
+     */
+    static void _registerFactory(const UnicodeString& id,
+                                 Factory factory,
+                                 Token context);
+
+    /**
+     * @internal
+     */
+    static void _registerInstance(Transliterator* adoptedObj);
+
+    /**
+     * @internal
+     */
+    static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
+
+    /**
+     * Register two targets as being inverses of one another.  For
+     * example, calling registerSpecialInverse("NFC", "NFD", true) causes
+     * Transliterator to form the following inverse relationships:
+     *
+     * <pre>NFC => NFD
+     * Any-NFC => Any-NFD
+     * NFD => NFC
+     * Any-NFD => Any-NFC</pre>
+     *
+     * (Without the special inverse registration, the inverse of NFC
+     * would be NFC-Any.)  Note that NFD is shorthand for Any-NFD, but
+     * that the presence or absence of "Any-" is preserved.
+     *
+     * <p>The relationship is symmetrical; registering (a, b) is
+     * equivalent to registering (b, a).
+     *
+     * <p>The relevant IDs must still be registered separately as
+     * factories or classes.
+     *
+     * <p>Only the targets are specified.  Special inverses always
+     * have the form Any-Target1 <=> Any-Target2.  The target should
+     * have canonical casing (the casing desired to be produced when
+     * an inverse is formed) and should contain no whitespace or other
+     * extraneous characters.
+     *
+     * @param target the target against which to register the inverse
+     * @param inverseTarget the inverse of target, that is
+     * Any-target.getInverse() => Any-inverseTarget
+     * @param bidirectional if true, register the reverse relation
+     * as well, that is, Any-inverseTarget.getInverse() => Any-target
+     * @internal
+     */
+    static void _registerSpecialInverse(const UnicodeString& target,
+                                        const UnicodeString& inverseTarget,
+                                        UBool bidirectional);
+
+public:
+
+    /**
+     * Unregisters a transliterator or class.  This may be either
+     * a system transliterator or a user transliterator or class.
+     * Any attempt to construct an unregistered transliterator based
+     * on its ID will fail.
+     *
+     * @param ID the ID of the transliterator or class
+     * @return the <code>Object</code> that was registered with
+     * <code>ID</code>, or <code>null</code> if none was
+     * @see #registerInstance
+     * @see #registerFactory
+     * @stable ICU 2.0
+     */
+    static void U_EXPORT2 unregister(const UnicodeString& ID);
+
+public:
+
+    /**
+     * Return a StringEnumeration over the IDs available at the time of the
+     * call, including user-registered IDs.
+     * @param ec input-output error code
+     * @return a newly-created StringEnumeration over the transliterators
+     * available at the time of the call. The caller should delete this object
+     * when done using it.
+     * @stable ICU 3.0
+     */
+    static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec);
+
+    /**
+     * Return the number of registered source specifiers.
+     * @return the number of registered source specifiers.
+     * @stable ICU 2.0
+     */
+    static int32_t U_EXPORT2 countAvailableSources(void);
+
+    /**
+     * Return a registered source specifier.
+     * @param index which specifier to return, from 0 to n-1, where
+     * n = countAvailableSources()
+     * @param result fill-in paramter to receive the source specifier.
+     * If index is out of range, result will be empty.
+     * @return reference to result
+     * @stable ICU 2.0
+     */
+    static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
+                                             UnicodeString& result);
+
+    /**
+     * Return the number of registered target specifiers for a given
+     * source specifier.
+     * @param source the given source specifier.
+     * @return the number of registered target specifiers for a given
+     *         source specifier.
+     * @stable ICU 2.0
+     */
+    static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
+
+    /**
+     * Return a registered target specifier for a given source.
+     * @param index which specifier to return, from 0 to n-1, where
+     * n = countAvailableTargets(source)
+     * @param source the source specifier
+     * @param result fill-in paramter to receive the target specifier.
+     * If source is invalid or if index is out of range, result will
+     * be empty.
+     * @return reference to result
+     * @stable ICU 2.0
+     */
+    static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
+                                             const UnicodeString& source,
+                                             UnicodeString& result);
+
+    /**
+     * Return the number of registered variant specifiers for a given
+     * source-target pair.
+     * @param source    the source specifiers.
+     * @param target    the target specifiers.
+     * @stable ICU 2.0
+     */
+    static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
+                                          const UnicodeString& target);
+
+    /**
+     * Return a registered variant specifier for a given source-target
+     * pair.
+     * @param index which specifier to return, from 0 to n-1, where
+     * n = countAvailableVariants(source, target)
+     * @param source the source specifier
+     * @param target the target specifier
+     * @param result fill-in paramter to receive the variant
+     * specifier.  If source is invalid or if target is invalid or if
+     * index is out of range, result will be empty.
+     * @return reference to result
+     * @stable ICU 2.0
+     */
+    static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
+                                              const UnicodeString& source,
+                                              const UnicodeString& target,
+                                              UnicodeString& result);
+
+protected:
+
+    /**
+     * Non-mutexed internal method
+     * @internal
+     */
+    static int32_t _countAvailableSources(void);
+
+    /**
+     * Non-mutexed internal method
+     * @internal
+     */
+    static UnicodeString& _getAvailableSource(int32_t index,
+                                              UnicodeString& result);
+
+    /**
+     * Non-mutexed internal method
+     * @internal
+     */
+    static int32_t _countAvailableTargets(const UnicodeString& source);
+
+    /**
+     * Non-mutexed internal method
+     * @internal
+     */
+    static UnicodeString& _getAvailableTarget(int32_t index,
+                                              const UnicodeString& source,
+                                              UnicodeString& result);
+
+    /**
+     * Non-mutexed internal method
+     * @internal
+     */
+    static int32_t _countAvailableVariants(const UnicodeString& source,
+                                           const UnicodeString& target);
+
+    /**
+     * Non-mutexed internal method
+     * @internal
+     */
+    static UnicodeString& _getAvailableVariant(int32_t index,
+                                               const UnicodeString& source,
+                                               const UnicodeString& target,
+                                               UnicodeString& result);
+
+protected:
+
+    /**
+     * Set the ID of this transliterators.  Subclasses shouldn't do
+     * this, unless the underlying script behavior has changed.
+     * @param id the new id t to be set.
+     * @stable ICU 2.4
+     */
+    void setID(const UnicodeString& id);
+
+public:
+
+    /**
+     * Return the class ID for this class.  This is useful only for
+     * comparing to a return value from getDynamicClassID().
+     * Note that Transliterator is an abstract base class, and therefor
+     * no fully constructed object will  have a dynamic
+     * UCLassID that equals the UClassID returned from
+     * TRansliterator::getStaticClassID().
+     * @return       The class ID for class Transliterator.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID <b>polymorphically</b>.  This method
+     * is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI.  Polymorphic operator==() and
+     * clone() methods call this method.
+     *
+     * <p>Concrete subclasses of Transliterator must use the
+     *    UOBJECT_DEFINE_RTTI_IMPLEMENTATION macro from
+     *    uobject.h to provide the RTTI functions.
+     *
+     * @return The class ID for this object. All objects of a given
+     * class have the same class ID.  Objects of other classes have
+     * different class IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const = 0;
+
+private:
+    static UBool initializeRegistry(UErrorCode &status);
+
+public:
+    /**
+     * Return the number of IDs currently registered with the system.
+     * To retrieve the actual IDs, call getAvailableID(i) with
+     * i from 0 to countAvailableIDs() - 1.
+     * @return the number of IDs currently registered with the system.
+     * @obsolete ICU 3.4 use getAvailableIDs() instead
+     */
+    static int32_t U_EXPORT2 countAvailableIDs(void);
+
+    /**
+     * Return the index-th available ID.  index must be between 0
+     * and countAvailableIDs() - 1, inclusive.  If index is out of
+     * range, the result of getAvailableID(0) is returned.
+     * @param index the given ID index.
+     * @return      the index-th available ID.  index must be between 0
+     *              and countAvailableIDs() - 1, inclusive.  If index is out of
+     *              range, the result of getAvailableID(0) is returned.
+     * @obsolete ICU 3.4 use getAvailableIDs() instead; this function
+     * is not thread safe, since it returns a reference to storage that
+     * may become invalid if another thread calls unregister
+     */
+    static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
+};
+
+inline int32_t Transliterator::getMaximumContextLength(void) const {
+    return maximumContextLength;
+}
+
+inline void Transliterator::setID(const UnicodeString& id) {
+    ID = id;
+    // NUL-terminate the ID string, which is a non-aliased copy.
+    ID.append((UChar)0);
+    ID.truncate(ID.length()-1);
+}
+
+inline Transliterator::Token Transliterator::integerToken(int32_t i) {
+    Token t;
+    t.integer = i;
+    return t;
+}
+
+inline Transliterator::Token Transliterator::pointerToken(void* p) {
+    Token t;
+    t.pointer = p;
+    return t;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_TRANSLITERATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/tzrule.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/tzrule.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/tzrule.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,828 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 2007-2008, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
-*******************************************************************************
-*/
-#ifndef TZRULE_H
-#define TZRULE_H
-
-/**
- * \file 
- * \brief C++ API: Time zone rule classes
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-#include "unicode/dtrule.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * <code>TimeZoneRule</code> is a class representing a rule for time zone.
- * <code>TimeZoneRule</code> has a set of time zone attributes, such as zone name,
- * raw offset (UTC offset for standard time) and daylight saving time offset.
- * 
- * @stable ICU 4.0
- */
-class U_I18N_API TimeZoneRule : public UObject {
-public:
-    /**
-     * Destructor.
-     * @stable ICU 4.0
-     */
-    virtual ~TimeZoneRule();
-
-    /**
-     * Clone this TimeZoneRule object polymorphically. The caller owns the result and
-     * should delete it when done.
-     * @return  A copy of the object.
-     * @stable ICU 4.0
-     */
-    virtual TimeZoneRule* clone(void) const = 0;
-
-    /**
-     * Return true if the given <code>TimeZoneRule</code> objects are semantically equal. Objects
-     * of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given <code>TimeZoneRule</code> objects are semantically equal.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator==(const TimeZoneRule& that) const;
-
-    /**
-     * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
-     * of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given <code>TimeZoneRule</code> objects are semantically unequal.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator!=(const TimeZoneRule& that) const;
-
-    /**
-     * Fills in "name" with the name of this time zone.
-     * @param name  Receives the name of this time zone.
-     * @return  A reference to "name"
-     * @stable ICU 4.0
-     */
-    UnicodeString& getName(UnicodeString& name) const;
-
-    /**
-     * Gets the standard time offset.
-     * @return  The standard time offset from UTC in milliseconds.
-     * @stable ICU 4.0
-     */
-    int32_t getRawOffset(void) const;
-
-    /**
-     * Gets the amount of daylight saving delta time from the standard time.
-     * @return  The amount of daylight saving offset used by this rule
-     *          in milliseconds.
-     * @stable ICU 4.0
-     */
-    int32_t getDSTSavings(void) const;
-
-    /**
-     * Returns if this rule represents the same rule and offsets as another.
-     * When two <code>TimeZoneRule</code> objects differ only its names, this method
-     * returns true.
-     * @param other The <code>TimeZoneRule</code> object to be compared with.
-     * @return  true if the other <code>TimeZoneRule</code> is the same as this one.
-     * @stable ICU 4.0
-     */
-    virtual UBool isEquivalentTo(const TimeZoneRule& other) const;
-
-    /**
-     * Gets the very first time when this rule takes effect.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param result            Receives the very first time when this rule takes effect.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getFirstStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const = 0;
-
-    /**
-     * Gets the final time when this rule takes effect.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param result            Receives the final time when this rule takes effect.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getFinalStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const = 0;
-
-    /**
-     * Gets the first time when this rule takes effect after the specified time.
-     * @param base              The first start time after this base time will be returned.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param inclusive         Whether the base time is inclusive or not.
-     * @param result            Receives The first time when this rule takes effect after
-     *                          the specified base time.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getNextStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
-        UBool inclusive, UDate& result) const = 0;
-
-    /**
-     * Gets the most recent time when this rule takes effect before the specified time.
-     * @param base              The most recent time before this base time will be returned.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param inclusive         Whether the base time is inclusive or not.
-     * @param result            Receives The most recent time when this rule takes effect before
-     *                          the specified base time.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getPreviousStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
-        UBool inclusive, UDate& result) const = 0;
-
-protected:
-
-    /**
-     * Constructs a <code>TimeZoneRule</code> with the name, the GMT offset of its
-     * standard time and the amount of daylight saving offset adjustment.
-     * @param name          The time zone name.
-     * @param rawOffset     The UTC offset of its standard time in milliseconds.
-     * @param dstSavings    The amount of daylight saving offset adjustment in milliseconds.
-     *                      If this ia a rule for standard time, the value of this argument is 0.
-     * @stable ICU 4.0
-     */
-    TimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings);
-
-    /**
-     * Copy constructor.
-     * @param source    The TimeZoneRule object to be copied.
-     * @stable ICU 4.0
-     */
-    TimeZoneRule(const TimeZoneRule& source);
-
-    /**
-     * Assignment operator.
-     * @param right The object to be copied.
-     * @stable ICU 4.0
-     */
-    TimeZoneRule& operator=(const TimeZoneRule& right);
-
-private:
-    UnicodeString fName; // time name
-    int32_t fRawOffset;  // UTC offset of the standard time in milliseconds
-    int32_t fDSTSavings; // DST saving amount in milliseconds
-};
-
-/**
- * <code>InitialTimeZoneRule</code> represents a time zone rule
- * representing a time zone effective from the beginning and
- * has no actual start times.
- * @stable ICU 4.0
- */
-class U_I18N_API InitialTimeZoneRule : public TimeZoneRule {
-public:
-    /**
-     * Constructs an <code>InitialTimeZoneRule</code> with the name, the GMT offset of its
-     * standard time and the amount of daylight saving offset adjustment.
-     * @param name          The time zone name.
-     * @param rawOffset     The UTC offset of its standard time in milliseconds.
-     * @param dstSavings    The amount of daylight saving offset adjustment in milliseconds.
-     *                      If this ia a rule for standard time, the value of this argument is 0.
-     * @stable ICU 4.0
-     */
-    InitialTimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings);
-
-    /**
-     * Copy constructor.
-     * @param source    The InitialTimeZoneRule object to be copied.
-     * @stable ICU 4.0
-     */
-    InitialTimeZoneRule(const InitialTimeZoneRule& source);
-
-    /**
-     * Destructor.
-     * @stable ICU 4.0
-     */
-    virtual ~InitialTimeZoneRule();
-
-    /**
-     * Clone this InitialTimeZoneRule object polymorphically. The caller owns the result and
-     * should delete it when done.
-     * @return    A copy of the object.
-     * @stable ICU 4.0
-     */
-    virtual InitialTimeZoneRule* clone(void) const;
-
-    /**
-     * Assignment operator.
-     * @param right The object to be copied.
-     * @stable ICU 4.0
-     */
-    InitialTimeZoneRule& operator=(const InitialTimeZoneRule& right);
-
-    /**
-     * Return true if the given <code>TimeZoneRule</code> objects are semantically equal. Objects
-     * of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given <code>TimeZoneRule</code> objects are semantically equal.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator==(const TimeZoneRule& that) const;
-
-    /**
-     * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
-     * of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given <code>TimeZoneRule</code> objects are semantically unequal.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator!=(const TimeZoneRule& that) const;
-
-    /**
-     * Gets the time when this rule takes effect in the given year.
-     * @param year              The Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param result            Receives the start time in the year.
-     * @return  true if this rule takes effect in the year and the result is set to
-     *          "result".
-     * @stable ICU 4.0
-     */
-    UBool getStartInYear(int32_t year, int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
-
-    /**
-     * Returns if this rule represents the same rule and offsets as another.
-     * When two <code>TimeZoneRule</code> objects differ only its names, this method
-     * returns true.
-     * @param that  The <code>TimeZoneRule</code> object to be compared with.
-     * @return  true if the other <code>TimeZoneRule</code> is equivalent to this one.
-     * @stable ICU 4.0
-     */
-    virtual UBool isEquivalentTo(const TimeZoneRule& that) const;
-
-    /**
-     * Gets the very first time when this rule takes effect.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param result            Receives the very first time when this rule takes effect.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getFirstStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
-
-    /**
-     * Gets the final time when this rule takes effect.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param result            Receives the final time when this rule takes effect.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getFinalStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
-
-    /**
-     * Gets the first time when this rule takes effect after the specified time.
-     * @param base              The first start time after this base time will be returned.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param inclusive         Whether the base time is inclusive or not.
-     * @param result            Receives The first time when this rule takes effect after
-     *                          the specified base time.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getNextStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
-        UBool inclusive, UDate& result) const;
-
-    /**
-     * Gets the most recent time when this rule takes effect before the specified time.
-     * @param base              The most recent time before this base time will be returned.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param inclusive         Whether the base time is inclusive or not.
-     * @param result            Receives The most recent time when this rule takes effect before
-     *                          the specified base time.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getPreviousStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
-        UBool inclusive, UDate& result) const;
-
-public:
-    /**
-     * Return the class ID for this class. This is useful only for comparing to
-     * a return value from getDynamicClassID(). For example:
-     * <pre>
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       erived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 4.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
-     * method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone()
-     * methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 4.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-};
-
-/**
- * <code>AnnualTimeZoneRule</code> is a class used for representing a time zone
- * rule which takes effect annually.  The calenday system used for the rule is
- * is based on Gregorian calendar
- * 
- * @stable ICU 4.0
- */
-class U_I18N_API AnnualTimeZoneRule : public TimeZoneRule {
-public:
-    /**
-     * The constant representing the maximum year used for designating
-     * a rule is permanent.
-     */
-    static const int32_t MAX_YEAR;
-
-    /**
-     * Constructs a <code>AnnualTimeZoneRule</code> with the name, the GMT offset of its
-     * standard time, the amount of daylight saving offset adjustment, the annual start
-     * time rule and the start/until years.  The input DateTimeRule is copied by this
-     * constructor, so the caller remains responsible for deleting the object.
-     * @param name          The time zone name.
-     * @param rawOffset     The GMT offset of its standard time in milliseconds.
-     * @param dstSavings    The amount of daylight saving offset adjustment in
-     *                      milliseconds.  If this ia a rule for standard time,
-     *                      the value of this argument is 0.
-     * @param dateTimeRule  The start date/time rule repeated annually.
-     * @param startYear     The first year when this rule takes effect.
-     * @param endYear       The last year when this rule takes effect.  If this
-     *                      rule is effective forever in future, specify MAX_YEAR.
-     * @stable ICU 4.0
-     */
-    AnnualTimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings,
-            const DateTimeRule& dateTimeRule, int32_t startYear, int32_t endYear);
-
-    /**
-     * Constructs a <code>AnnualTimeZoneRule</code> with the name, the GMT offset of its
-     * standard time, the amount of daylight saving offset adjustment, the annual start
-     * time rule and the start/until years.  The input DateTimeRule object is adopted
-     * by this object, therefore, the caller must not delete the object.
-     * @param name          The time zone name.
-     * @param rawOffset     The GMT offset of its standard time in milliseconds.
-     * @param dstSavings    The amount of daylight saving offset adjustment in
-     *                      milliseconds.  If this ia a rule for standard time,
-     *                      the value of this argument is 0.
-     * @param dateTimeRule  The start date/time rule repeated annually.
-     * @param startYear     The first year when this rule takes effect.
-     * @param endYear       The last year when this rule takes effect.  If this
-     *                      rule is effective forever in future, specify MAX_YEAR.
-     * @stable ICU 4.0
-     */
-    AnnualTimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings,
-            DateTimeRule* dateTimeRule, int32_t startYear, int32_t endYear);
-
-    /**
-     * Copy constructor.
-     * @param source    The AnnualTimeZoneRule object to be copied.
-     * @stable ICU 4.0
-     */
-    AnnualTimeZoneRule(const AnnualTimeZoneRule& source);
-
-    /**
-     * Destructor.
-     * @stable ICU 4.0
-     */
-    virtual ~AnnualTimeZoneRule();
-
-    /**
-     * Clone this AnnualTimeZoneRule object polymorphically. The caller owns the result and
-     * should delete it when done.
-     * @return    A copy of the object.
-     * @stable ICU 4.0
-     */
-    virtual AnnualTimeZoneRule* clone(void) const;
-
-    /**
-     * Assignment operator.
-     * @param right The object to be copied.
-     * @stable ICU 4.0
-     */
-    AnnualTimeZoneRule& operator=(const AnnualTimeZoneRule& right);
-
-    /**
-     * Return true if the given <code>TimeZoneRule</code> objects are semantically equal. Objects
-     * of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given <code>TimeZoneRule</code> objects are semantically equal.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator==(const TimeZoneRule& that) const;
-
-    /**
-     * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
-     * of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given <code>TimeZoneRule</code> objects are semantically unequal.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator!=(const TimeZoneRule& that) const;
-
-    /**
-     * Gets the start date/time rule used by this rule.
-     * @return  The <code>AnnualDateTimeRule</code> which represents the start date/time
-     *          rule used by this time zone rule.
-     * @stable ICU 4.0
-     */
-    const DateTimeRule* getRule(void) const;
-
-    /**
-     * Gets the first year when this rule takes effect.
-     * @return  The start year of this rule.  The year is in Gregorian calendar
-     *          with 0 == 1 BCE, -1 == 2 BCE, etc.
-     * @stable ICU 4.0
-     */
-    int32_t getStartYear(void) const;
-
-    /**
-     * Gets the end year when this rule takes effect.
-     * @return  The end year of this rule (inclusive). The year is in Gregorian calendar
-     *          with 0 == 1 BCE, -1 == 2 BCE, etc.
-     * @stable ICU 4.0
-     */
-    int32_t getEndYear(void) const;
-
-    /**
-     * Gets the time when this rule takes effect in the given year.
-     * @param year              The Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param result            Receives the start time in the year.
-     * @return  true if this rule takes effect in the year and the result is set to
-     *          "result".
-     * @stable ICU 4.0
-     */
-    UBool getStartInYear(int32_t year, int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
-
-    /**
-     * Returns if this rule represents the same rule and offsets as another.
-     * When two <code>TimeZoneRule</code> objects differ only its names, this method
-     * returns true.
-     * @param that  The <code>TimeZoneRule</code> object to be compared with.
-     * @return  true if the other <code>TimeZoneRule</code> is equivalent to this one.
-     * @stable ICU 4.0
-     */
-    virtual UBool isEquivalentTo(const TimeZoneRule& that) const;
-
-    /**
-     * Gets the very first time when this rule takes effect.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param result            Receives the very first time when this rule takes effect.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getFirstStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
-
-    /**
-     * Gets the final time when this rule takes effect.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param result            Receives the final time when this rule takes effect.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getFinalStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
-
-    /**
-     * Gets the first time when this rule takes effect after the specified time.
-     * @param base              The first start time after this base time will be returned.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param inclusive         Whether the base time is inclusive or not.
-     * @param result            Receives The first time when this rule takes effect after
-     *                          the specified base time.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getNextStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
-        UBool inclusive, UDate& result) const;
-
-    /**
-     * Gets the most recent time when this rule takes effect before the specified time.
-     * @param base              The most recent time before this base time will be returned.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param inclusive         Whether the base time is inclusive or not.
-     * @param result            Receives The most recent time when this rule takes effect before
-     *                          the specified base time.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getPreviousStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
-        UBool inclusive, UDate& result) const;
-
-
-private:
-    DateTimeRule* fDateTimeRule;
-    int32_t fStartYear;
-    int32_t fEndYear;
-
-public:
-    /**
-     * Return the class ID for this class. This is useful only for comparing to
-     * a return value from getDynamicClassID(). For example:
-     * <pre>
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       erived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 4.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
-     * method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone()
-     * methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 4.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-};
-
-/**
- * <code>TimeArrayTimeZoneRule</code> represents a time zone rule whose start times are
- * defined by an array of milliseconds since the standard base time.
- * 
- * @stable ICU 4.0
- */
-class U_I18N_API TimeArrayTimeZoneRule : public TimeZoneRule {
-public:
-    /**
-     * Constructs a <code>TimeArrayTimeZoneRule</code> with the name, the GMT offset of its
-     * standard time, the amount of daylight saving offset adjustment and
-     * the array of times when this rule takes effect.
-     * @param name          The time zone name.
-     * @param rawOffset     The UTC offset of its standard time in milliseconds.
-     * @param dstSavings    The amount of daylight saving offset adjustment in
-     *                      milliseconds.  If this ia a rule for standard time,
-     *                      the value of this argument is 0.
-     * @param startTimes    The array start times in milliseconds since the base time
-     *                      (January 1, 1970, 00:00:00).
-     * @param numStartTimes The number of elements in the parameter "startTimes"
-     * @param timeRuleType  The time type of the start times, which is one of
-     *                      <code>DataTimeRule::WALL_TIME</code>, <code>STANDARD_TIME</code>
-     *                      and <code>UTC_TIME</code>.
-     * @stable ICU 4.0
-     */
-    TimeArrayTimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings,
-        const UDate* startTimes, int32_t numStartTimes, DateTimeRule::TimeRuleType timeRuleType);
-
-    /**
-     * Copy constructor.
-     * @param source    The TimeArrayTimeZoneRule object to be copied.
-     * @stable ICU 4.0
-     */
-    TimeArrayTimeZoneRule(const TimeArrayTimeZoneRule& source);
-
-    /**
-     * Destructor.
-     * @stable ICU 4.0
-     */
-    virtual ~TimeArrayTimeZoneRule();
-
-    /**
-     * Clone this TimeArrayTimeZoneRule object polymorphically. The caller owns the result and
-     * should delete it when done.
-     * @return    A copy of the object.
-     * @stable ICU 4.0
-     */
-    virtual TimeArrayTimeZoneRule* clone(void) const;
-
-    /**
-     * Assignment operator.
-     * @param right The object to be copied.
-     * @stable ICU 4.0
-     */
-    TimeArrayTimeZoneRule& operator=(const TimeArrayTimeZoneRule& right);
-
-    /**
-     * Return true if the given <code>TimeZoneRule</code> objects are semantically equal. Objects
-     * of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given <code>TimeZoneRule</code> objects are semantically equal.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator==(const TimeZoneRule& that) const;
-
-    /**
-     * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
-     * of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given <code>TimeZoneRule</code> objects are semantically unequal.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator!=(const TimeZoneRule& that) const;
-
-    /**
-     * Gets the time type of the start times used by this rule.  The return value
-     * is either <code>DateTimeRule::WALL_TIME</code> or <code>STANDARD_TIME</code>
-     * or <code>UTC_TIME</code>.
-     * 
-     * @return The time type used of the start times used by this rule.
-     * @stable ICU 4.0
-     */
-    DateTimeRule::TimeRuleType getTimeType(void) const;
-
-    /**
-     * Gets a start time at the index stored in this rule.
-     * @param index     The index of start times
-     * @param result    Receives the start time at the index
-     * @return  true if the index is within the valid range and
-     *          and the result is set.  When false, the output
-     *          parameger "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    UBool getStartTimeAt(int32_t index, UDate& result) const;
-
-    /**
-     * Returns the number of start times stored in this rule
-     * @return The number of start times.
-     * @stable ICU 4.0
-     */
-    int32_t countStartTimes(void) const;
-
-    /**
-     * Returns if this rule represents the same rule and offsets as another.
-     * When two <code>TimeZoneRule</code> objects differ only its names, this method
-     * returns true.
-     * @param that  The <code>TimeZoneRule</code> object to be compared with.
-     * @return  true if the other <code>TimeZoneRule</code> is equivalent to this one.
-     * @stable ICU 4.0
-     */
-    virtual UBool isEquivalentTo(const TimeZoneRule& that) const;
-
-    /**
-     * Gets the very first time when this rule takes effect.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param result            Receives the very first time when this rule takes effect.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getFirstStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
-
-    /**
-     * Gets the final time when this rule takes effect.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param result            Receives the final time when this rule takes effect.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getFinalStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
-
-    /**
-     * Gets the first time when this rule takes effect after the specified time.
-     * @param base              The first start time after this base time will be returned.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param inclusive         Whether the base time is inclusive or not.
-     * @param result            Receives The first time when this rule takes effect after
-     *                          the specified base time.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getNextStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
-        UBool inclusive, UDate& result) const;
-
-    /**
-     * Gets the most recent time when this rule takes effect before the specified time.
-     * @param base              The most recent time before this base time will be returned.
-     * @param prevRawOffset     The standard time offset from UTC before this rule
-     *                          takes effect in milliseconds.
-     * @param prevDSTSavings    The amount of daylight saving offset from the
-     *                          standard time.
-     * @param inclusive         Whether the base time is inclusive or not.
-     * @param result            Receives The most recent time when this rule takes effect before
-     *                          the specified base time.
-     * @return  true if the start time is available.  When false is returned, output parameter
-     *          "result" is unchanged.
-     * @stable ICU 4.0
-     */
-    virtual UBool getPreviousStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
-        UBool inclusive, UDate& result) const;
-
-
-private:
-    enum { TIMEARRAY_STACK_BUFFER_SIZE = 32 };
-    UBool initStartTimes(const UDate source[], int32_t size, UErrorCode& ec);
-    UDate getUTC(UDate time, int32_t raw, int32_t dst) const;
-
-    DateTimeRule::TimeRuleType  fTimeRuleType;
-    int32_t fNumStartTimes;
-    UDate*  fStartTimes;
-    UDate   fLocalStartTimes[TIMEARRAY_STACK_BUFFER_SIZE];
-
-public:
-    /**
-     * Return the class ID for this class. This is useful only for comparing to
-     * a return value from getDynamicClassID(). For example:
-     * <pre>
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       erived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 4.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
-     * method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone()
-     * methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 4.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-};
-
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // TZRULE_H
-
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/tzrule.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/tzrule.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/tzrule.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/tzrule.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,828 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2008, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*/
+#ifndef TZRULE_H
+#define TZRULE_H
+
+/**
+ * \file 
+ * \brief C++ API: Time zone rule classes
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/dtrule.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * <code>TimeZoneRule</code> is a class representing a rule for time zone.
+ * <code>TimeZoneRule</code> has a set of time zone attributes, such as zone name,
+ * raw offset (UTC offset for standard time) and daylight saving time offset.
+ * 
+ * @stable ICU 4.0
+ */
+class U_I18N_API TimeZoneRule : public UObject {
+public:
+    /**
+     * Destructor.
+     * @stable ICU 4.0
+     */
+    virtual ~TimeZoneRule();
+
+    /**
+     * Clone this TimeZoneRule object polymorphically. The caller owns the result and
+     * should delete it when done.
+     * @return  A copy of the object.
+     * @stable ICU 4.0
+     */
+    virtual TimeZoneRule* clone(void) const = 0;
+
+    /**
+     * Return true if the given <code>TimeZoneRule</code> objects are semantically equal. Objects
+     * of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given <code>TimeZoneRule</code> objects are semantically equal.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator==(const TimeZoneRule& that) const;
+
+    /**
+     * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
+     * of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given <code>TimeZoneRule</code> objects are semantically unequal.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator!=(const TimeZoneRule& that) const;
+
+    /**
+     * Fills in "name" with the name of this time zone.
+     * @param name  Receives the name of this time zone.
+     * @return  A reference to "name"
+     * @stable ICU 4.0
+     */
+    UnicodeString& getName(UnicodeString& name) const;
+
+    /**
+     * Gets the standard time offset.
+     * @return  The standard time offset from UTC in milliseconds.
+     * @stable ICU 4.0
+     */
+    int32_t getRawOffset(void) const;
+
+    /**
+     * Gets the amount of daylight saving delta time from the standard time.
+     * @return  The amount of daylight saving offset used by this rule
+     *          in milliseconds.
+     * @stable ICU 4.0
+     */
+    int32_t getDSTSavings(void) const;
+
+    /**
+     * Returns if this rule represents the same rule and offsets as another.
+     * When two <code>TimeZoneRule</code> objects differ only its names, this method
+     * returns true.
+     * @param other The <code>TimeZoneRule</code> object to be compared with.
+     * @return  true if the other <code>TimeZoneRule</code> is the same as this one.
+     * @stable ICU 4.0
+     */
+    virtual UBool isEquivalentTo(const TimeZoneRule& other) const;
+
+    /**
+     * Gets the very first time when this rule takes effect.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param result            Receives the very first time when this rule takes effect.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getFirstStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const = 0;
+
+    /**
+     * Gets the final time when this rule takes effect.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param result            Receives the final time when this rule takes effect.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getFinalStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const = 0;
+
+    /**
+     * Gets the first time when this rule takes effect after the specified time.
+     * @param base              The first start time after this base time will be returned.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param inclusive         Whether the base time is inclusive or not.
+     * @param result            Receives The first time when this rule takes effect after
+     *                          the specified base time.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getNextStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
+        UBool inclusive, UDate& result) const = 0;
+
+    /**
+     * Gets the most recent time when this rule takes effect before the specified time.
+     * @param base              The most recent time before this base time will be returned.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param inclusive         Whether the base time is inclusive or not.
+     * @param result            Receives The most recent time when this rule takes effect before
+     *                          the specified base time.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getPreviousStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
+        UBool inclusive, UDate& result) const = 0;
+
+protected:
+
+    /**
+     * Constructs a <code>TimeZoneRule</code> with the name, the GMT offset of its
+     * standard time and the amount of daylight saving offset adjustment.
+     * @param name          The time zone name.
+     * @param rawOffset     The UTC offset of its standard time in milliseconds.
+     * @param dstSavings    The amount of daylight saving offset adjustment in milliseconds.
+     *                      If this ia a rule for standard time, the value of this argument is 0.
+     * @stable ICU 4.0
+     */
+    TimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings);
+
+    /**
+     * Copy constructor.
+     * @param source    The TimeZoneRule object to be copied.
+     * @stable ICU 4.0
+     */
+    TimeZoneRule(const TimeZoneRule& source);
+
+    /**
+     * Assignment operator.
+     * @param right The object to be copied.
+     * @stable ICU 4.0
+     */
+    TimeZoneRule& operator=(const TimeZoneRule& right);
+
+private:
+    UnicodeString fName; // time name
+    int32_t fRawOffset;  // UTC offset of the standard time in milliseconds
+    int32_t fDSTSavings; // DST saving amount in milliseconds
+};
+
+/**
+ * <code>InitialTimeZoneRule</code> represents a time zone rule
+ * representing a time zone effective from the beginning and
+ * has no actual start times.
+ * @stable ICU 4.0
+ */
+class U_I18N_API InitialTimeZoneRule : public TimeZoneRule {
+public:
+    /**
+     * Constructs an <code>InitialTimeZoneRule</code> with the name, the GMT offset of its
+     * standard time and the amount of daylight saving offset adjustment.
+     * @param name          The time zone name.
+     * @param rawOffset     The UTC offset of its standard time in milliseconds.
+     * @param dstSavings    The amount of daylight saving offset adjustment in milliseconds.
+     *                      If this ia a rule for standard time, the value of this argument is 0.
+     * @stable ICU 4.0
+     */
+    InitialTimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings);
+
+    /**
+     * Copy constructor.
+     * @param source    The InitialTimeZoneRule object to be copied.
+     * @stable ICU 4.0
+     */
+    InitialTimeZoneRule(const InitialTimeZoneRule& source);
+
+    /**
+     * Destructor.
+     * @stable ICU 4.0
+     */
+    virtual ~InitialTimeZoneRule();
+
+    /**
+     * Clone this InitialTimeZoneRule object polymorphically. The caller owns the result and
+     * should delete it when done.
+     * @return    A copy of the object.
+     * @stable ICU 4.0
+     */
+    virtual InitialTimeZoneRule* clone(void) const;
+
+    /**
+     * Assignment operator.
+     * @param right The object to be copied.
+     * @stable ICU 4.0
+     */
+    InitialTimeZoneRule& operator=(const InitialTimeZoneRule& right);
+
+    /**
+     * Return true if the given <code>TimeZoneRule</code> objects are semantically equal. Objects
+     * of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given <code>TimeZoneRule</code> objects are semantically equal.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator==(const TimeZoneRule& that) const;
+
+    /**
+     * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
+     * of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given <code>TimeZoneRule</code> objects are semantically unequal.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator!=(const TimeZoneRule& that) const;
+
+    /**
+     * Gets the time when this rule takes effect in the given year.
+     * @param year              The Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param result            Receives the start time in the year.
+     * @return  true if this rule takes effect in the year and the result is set to
+     *          "result".
+     * @stable ICU 4.0
+     */
+    UBool getStartInYear(int32_t year, int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
+
+    /**
+     * Returns if this rule represents the same rule and offsets as another.
+     * When two <code>TimeZoneRule</code> objects differ only its names, this method
+     * returns true.
+     * @param that  The <code>TimeZoneRule</code> object to be compared with.
+     * @return  true if the other <code>TimeZoneRule</code> is equivalent to this one.
+     * @stable ICU 4.0
+     */
+    virtual UBool isEquivalentTo(const TimeZoneRule& that) const;
+
+    /**
+     * Gets the very first time when this rule takes effect.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param result            Receives the very first time when this rule takes effect.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getFirstStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
+
+    /**
+     * Gets the final time when this rule takes effect.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param result            Receives the final time when this rule takes effect.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getFinalStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
+
+    /**
+     * Gets the first time when this rule takes effect after the specified time.
+     * @param base              The first start time after this base time will be returned.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param inclusive         Whether the base time is inclusive or not.
+     * @param result            Receives The first time when this rule takes effect after
+     *                          the specified base time.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getNextStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
+        UBool inclusive, UDate& result) const;
+
+    /**
+     * Gets the most recent time when this rule takes effect before the specified time.
+     * @param base              The most recent time before this base time will be returned.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param inclusive         Whether the base time is inclusive or not.
+     * @param result            Receives The most recent time when this rule takes effect before
+     *                          the specified base time.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getPreviousStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
+        UBool inclusive, UDate& result) const;
+
+public:
+    /**
+     * Return the class ID for this class. This is useful only for comparing to
+     * a return value from getDynamicClassID(). For example:
+     * <pre>
+     * .   Base* polymorphic_pointer = createPolymorphicObject();
+     * .   if (polymorphic_pointer->getDynamicClassID() ==
+     * .       erived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 4.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+     * method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 4.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+};
+
+/**
+ * <code>AnnualTimeZoneRule</code> is a class used for representing a time zone
+ * rule which takes effect annually.  The calenday system used for the rule is
+ * is based on Gregorian calendar
+ * 
+ * @stable ICU 4.0
+ */
+class U_I18N_API AnnualTimeZoneRule : public TimeZoneRule {
+public:
+    /**
+     * The constant representing the maximum year used for designating
+     * a rule is permanent.
+     */
+    static const int32_t MAX_YEAR;
+
+    /**
+     * Constructs a <code>AnnualTimeZoneRule</code> with the name, the GMT offset of its
+     * standard time, the amount of daylight saving offset adjustment, the annual start
+     * time rule and the start/until years.  The input DateTimeRule is copied by this
+     * constructor, so the caller remains responsible for deleting the object.
+     * @param name          The time zone name.
+     * @param rawOffset     The GMT offset of its standard time in milliseconds.
+     * @param dstSavings    The amount of daylight saving offset adjustment in
+     *                      milliseconds.  If this ia a rule for standard time,
+     *                      the value of this argument is 0.
+     * @param dateTimeRule  The start date/time rule repeated annually.
+     * @param startYear     The first year when this rule takes effect.
+     * @param endYear       The last year when this rule takes effect.  If this
+     *                      rule is effective forever in future, specify MAX_YEAR.
+     * @stable ICU 4.0
+     */
+    AnnualTimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings,
+            const DateTimeRule& dateTimeRule, int32_t startYear, int32_t endYear);
+
+    /**
+     * Constructs a <code>AnnualTimeZoneRule</code> with the name, the GMT offset of its
+     * standard time, the amount of daylight saving offset adjustment, the annual start
+     * time rule and the start/until years.  The input DateTimeRule object is adopted
+     * by this object, therefore, the caller must not delete the object.
+     * @param name          The time zone name.
+     * @param rawOffset     The GMT offset of its standard time in milliseconds.
+     * @param dstSavings    The amount of daylight saving offset adjustment in
+     *                      milliseconds.  If this ia a rule for standard time,
+     *                      the value of this argument is 0.
+     * @param dateTimeRule  The start date/time rule repeated annually.
+     * @param startYear     The first year when this rule takes effect.
+     * @param endYear       The last year when this rule takes effect.  If this
+     *                      rule is effective forever in future, specify MAX_YEAR.
+     * @stable ICU 4.0
+     */
+    AnnualTimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings,
+            DateTimeRule* dateTimeRule, int32_t startYear, int32_t endYear);
+
+    /**
+     * Copy constructor.
+     * @param source    The AnnualTimeZoneRule object to be copied.
+     * @stable ICU 4.0
+     */
+    AnnualTimeZoneRule(const AnnualTimeZoneRule& source);
+
+    /**
+     * Destructor.
+     * @stable ICU 4.0
+     */
+    virtual ~AnnualTimeZoneRule();
+
+    /**
+     * Clone this AnnualTimeZoneRule object polymorphically. The caller owns the result and
+     * should delete it when done.
+     * @return    A copy of the object.
+     * @stable ICU 4.0
+     */
+    virtual AnnualTimeZoneRule* clone(void) const;
+
+    /**
+     * Assignment operator.
+     * @param right The object to be copied.
+     * @stable ICU 4.0
+     */
+    AnnualTimeZoneRule& operator=(const AnnualTimeZoneRule& right);
+
+    /**
+     * Return true if the given <code>TimeZoneRule</code> objects are semantically equal. Objects
+     * of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given <code>TimeZoneRule</code> objects are semantically equal.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator==(const TimeZoneRule& that) const;
+
+    /**
+     * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
+     * of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given <code>TimeZoneRule</code> objects are semantically unequal.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator!=(const TimeZoneRule& that) const;
+
+    /**
+     * Gets the start date/time rule used by this rule.
+     * @return  The <code>AnnualDateTimeRule</code> which represents the start date/time
+     *          rule used by this time zone rule.
+     * @stable ICU 4.0
+     */
+    const DateTimeRule* getRule(void) const;
+
+    /**
+     * Gets the first year when this rule takes effect.
+     * @return  The start year of this rule.  The year is in Gregorian calendar
+     *          with 0 == 1 BCE, -1 == 2 BCE, etc.
+     * @stable ICU 4.0
+     */
+    int32_t getStartYear(void) const;
+
+    /**
+     * Gets the end year when this rule takes effect.
+     * @return  The end year of this rule (inclusive). The year is in Gregorian calendar
+     *          with 0 == 1 BCE, -1 == 2 BCE, etc.
+     * @stable ICU 4.0
+     */
+    int32_t getEndYear(void) const;
+
+    /**
+     * Gets the time when this rule takes effect in the given year.
+     * @param year              The Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param result            Receives the start time in the year.
+     * @return  true if this rule takes effect in the year and the result is set to
+     *          "result".
+     * @stable ICU 4.0
+     */
+    UBool getStartInYear(int32_t year, int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
+
+    /**
+     * Returns if this rule represents the same rule and offsets as another.
+     * When two <code>TimeZoneRule</code> objects differ only its names, this method
+     * returns true.
+     * @param that  The <code>TimeZoneRule</code> object to be compared with.
+     * @return  true if the other <code>TimeZoneRule</code> is equivalent to this one.
+     * @stable ICU 4.0
+     */
+    virtual UBool isEquivalentTo(const TimeZoneRule& that) const;
+
+    /**
+     * Gets the very first time when this rule takes effect.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param result            Receives the very first time when this rule takes effect.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getFirstStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
+
+    /**
+     * Gets the final time when this rule takes effect.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param result            Receives the final time when this rule takes effect.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getFinalStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
+
+    /**
+     * Gets the first time when this rule takes effect after the specified time.
+     * @param base              The first start time after this base time will be returned.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param inclusive         Whether the base time is inclusive or not.
+     * @param result            Receives The first time when this rule takes effect after
+     *                          the specified base time.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getNextStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
+        UBool inclusive, UDate& result) const;
+
+    /**
+     * Gets the most recent time when this rule takes effect before the specified time.
+     * @param base              The most recent time before this base time will be returned.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param inclusive         Whether the base time is inclusive or not.
+     * @param result            Receives The most recent time when this rule takes effect before
+     *                          the specified base time.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getPreviousStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
+        UBool inclusive, UDate& result) const;
+
+
+private:
+    DateTimeRule* fDateTimeRule;
+    int32_t fStartYear;
+    int32_t fEndYear;
+
+public:
+    /**
+     * Return the class ID for this class. This is useful only for comparing to
+     * a return value from getDynamicClassID(). For example:
+     * <pre>
+     * .   Base* polymorphic_pointer = createPolymorphicObject();
+     * .   if (polymorphic_pointer->getDynamicClassID() ==
+     * .       erived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 4.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+     * method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 4.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+};
+
+/**
+ * <code>TimeArrayTimeZoneRule</code> represents a time zone rule whose start times are
+ * defined by an array of milliseconds since the standard base time.
+ * 
+ * @stable ICU 4.0
+ */
+class U_I18N_API TimeArrayTimeZoneRule : public TimeZoneRule {
+public:
+    /**
+     * Constructs a <code>TimeArrayTimeZoneRule</code> with the name, the GMT offset of its
+     * standard time, the amount of daylight saving offset adjustment and
+     * the array of times when this rule takes effect.
+     * @param name          The time zone name.
+     * @param rawOffset     The UTC offset of its standard time in milliseconds.
+     * @param dstSavings    The amount of daylight saving offset adjustment in
+     *                      milliseconds.  If this ia a rule for standard time,
+     *                      the value of this argument is 0.
+     * @param startTimes    The array start times in milliseconds since the base time
+     *                      (January 1, 1970, 00:00:00).
+     * @param numStartTimes The number of elements in the parameter "startTimes"
+     * @param timeRuleType  The time type of the start times, which is one of
+     *                      <code>DataTimeRule::WALL_TIME</code>, <code>STANDARD_TIME</code>
+     *                      and <code>UTC_TIME</code>.
+     * @stable ICU 4.0
+     */
+    TimeArrayTimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings,
+        const UDate* startTimes, int32_t numStartTimes, DateTimeRule::TimeRuleType timeRuleType);
+
+    /**
+     * Copy constructor.
+     * @param source    The TimeArrayTimeZoneRule object to be copied.
+     * @stable ICU 4.0
+     */
+    TimeArrayTimeZoneRule(const TimeArrayTimeZoneRule& source);
+
+    /**
+     * Destructor.
+     * @stable ICU 4.0
+     */
+    virtual ~TimeArrayTimeZoneRule();
+
+    /**
+     * Clone this TimeArrayTimeZoneRule object polymorphically. The caller owns the result and
+     * should delete it when done.
+     * @return    A copy of the object.
+     * @stable ICU 4.0
+     */
+    virtual TimeArrayTimeZoneRule* clone(void) const;
+
+    /**
+     * Assignment operator.
+     * @param right The object to be copied.
+     * @stable ICU 4.0
+     */
+    TimeArrayTimeZoneRule& operator=(const TimeArrayTimeZoneRule& right);
+
+    /**
+     * Return true if the given <code>TimeZoneRule</code> objects are semantically equal. Objects
+     * of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given <code>TimeZoneRule</code> objects are semantically equal.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator==(const TimeZoneRule& that) const;
+
+    /**
+     * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
+     * of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given <code>TimeZoneRule</code> objects are semantically unequal.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator!=(const TimeZoneRule& that) const;
+
+    /**
+     * Gets the time type of the start times used by this rule.  The return value
+     * is either <code>DateTimeRule::WALL_TIME</code> or <code>STANDARD_TIME</code>
+     * or <code>UTC_TIME</code>.
+     * 
+     * @return The time type used of the start times used by this rule.
+     * @stable ICU 4.0
+     */
+    DateTimeRule::TimeRuleType getTimeType(void) const;
+
+    /**
+     * Gets a start time at the index stored in this rule.
+     * @param index     The index of start times
+     * @param result    Receives the start time at the index
+     * @return  true if the index is within the valid range and
+     *          and the result is set.  When false, the output
+     *          parameger "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    UBool getStartTimeAt(int32_t index, UDate& result) const;
+
+    /**
+     * Returns the number of start times stored in this rule
+     * @return The number of start times.
+     * @stable ICU 4.0
+     */
+    int32_t countStartTimes(void) const;
+
+    /**
+     * Returns if this rule represents the same rule and offsets as another.
+     * When two <code>TimeZoneRule</code> objects differ only its names, this method
+     * returns true.
+     * @param that  The <code>TimeZoneRule</code> object to be compared with.
+     * @return  true if the other <code>TimeZoneRule</code> is equivalent to this one.
+     * @stable ICU 4.0
+     */
+    virtual UBool isEquivalentTo(const TimeZoneRule& that) const;
+
+    /**
+     * Gets the very first time when this rule takes effect.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param result            Receives the very first time when this rule takes effect.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getFirstStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
+
+    /**
+     * Gets the final time when this rule takes effect.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param result            Receives the final time when this rule takes effect.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getFinalStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const;
+
+    /**
+     * Gets the first time when this rule takes effect after the specified time.
+     * @param base              The first start time after this base time will be returned.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param inclusive         Whether the base time is inclusive or not.
+     * @param result            Receives The first time when this rule takes effect after
+     *                          the specified base time.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getNextStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
+        UBool inclusive, UDate& result) const;
+
+    /**
+     * Gets the most recent time when this rule takes effect before the specified time.
+     * @param base              The most recent time before this base time will be returned.
+     * @param prevRawOffset     The standard time offset from UTC before this rule
+     *                          takes effect in milliseconds.
+     * @param prevDSTSavings    The amount of daylight saving offset from the
+     *                          standard time.
+     * @param inclusive         Whether the base time is inclusive or not.
+     * @param result            Receives The most recent time when this rule takes effect before
+     *                          the specified base time.
+     * @return  true if the start time is available.  When false is returned, output parameter
+     *          "result" is unchanged.
+     * @stable ICU 4.0
+     */
+    virtual UBool getPreviousStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings,
+        UBool inclusive, UDate& result) const;
+
+
+private:
+    enum { TIMEARRAY_STACK_BUFFER_SIZE = 32 };
+    UBool initStartTimes(const UDate source[], int32_t size, UErrorCode& ec);
+    UDate getUTC(UDate time, int32_t raw, int32_t dst) const;
+
+    DateTimeRule::TimeRuleType  fTimeRuleType;
+    int32_t fNumStartTimes;
+    UDate*  fStartTimes;
+    UDate   fLocalStartTimes[TIMEARRAY_STACK_BUFFER_SIZE];
+
+public:
+    /**
+     * Return the class ID for this class. This is useful only for comparing to
+     * a return value from getDynamicClassID(). For example:
+     * <pre>
+     * .   Base* polymorphic_pointer = createPolymorphicObject();
+     * .   if (polymorphic_pointer->getDynamicClassID() ==
+     * .       erived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 4.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+     * method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 4.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+};
+
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // TZRULE_H
+
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/tztrans.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/tztrans.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/tztrans.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,195 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 2007-2008, International Business Machines Corporation and         *
-* others. All Rights Reserved.                                                *
-*******************************************************************************
-*/
-#ifndef TZTRANS_H
-#define TZTRANS_H
-
-/**
- * \file 
- * \brief C++ API: Time zone transition
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/uobject.h"
-
-U_NAMESPACE_BEGIN
-
-// Forward declaration
-class TimeZoneRule;
-
-/**
- * <code>TimeZoneTransition</code> is a class representing a time zone transition.
- * An instance has a time of transition and rules for both before and after the transition.
- * @stable ICU 4.0
- */
-class U_I18N_API TimeZoneTransition : public UObject {
-public:
-    /**
-     * Constructs a <code>TimeZoneTransition</code> with the time and the rules before/after
-     * the transition.
-     * 
-     * @param time  The time of transition in milliseconds since the base time.
-     * @param from  The time zone rule used before the transition.
-     * @param to    The time zone rule used after the transition.
-     * @stable ICU 4.0
-     */
-    TimeZoneTransition(UDate time, const TimeZoneRule& from, const TimeZoneRule& to);
-
-    /**
-     * Constructs an empty <code>TimeZoneTransition</code>
-     * @stable ICU 4.0
-     */
-    TimeZoneTransition();
-
-    /**
-     * Copy constructor.
-     * @param source    The TimeZoneTransition object to be copied.
-     * @stable ICU 4.0
-     */
-    TimeZoneTransition(const TimeZoneTransition& source);
-
-    /**
-     * Destructor.
-     * @stable ICU 4.0
-     */
-    ~TimeZoneTransition();
-
-    /**
-     * Clone this TimeZoneTransition object polymorphically. The caller owns the result and
-     * should delete it when done.
-     * @return  A copy of the object.
-     * @stable ICU 4.0
-     */
-    TimeZoneTransition* clone(void) const;
-
-    /**
-     * Assignment operator.
-     * @param right The object to be copied.
-     * @stable ICU 4.0
-     */
-    TimeZoneTransition& operator=(const TimeZoneTransition& right);
-
-    /**
-     * Return true if the given TimeZoneTransition objects are semantically equal. Objects
-     * of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given TimeZoneTransition objects are semantically equal.
-     * @stable ICU 4.0
-     */
-    UBool operator==(const TimeZoneTransition& that) const;
-
-    /**
-     * Return true if the given TimeZoneTransition objects are semantically unequal. Objects
-     * of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given TimeZoneTransition objects are semantically unequal.
-     * @stable ICU 4.0
-     */
-    UBool operator!=(const TimeZoneTransition& that) const;
-
-    /**
-     * Returns the time of transition in milliseconds.
-     * @return The time of the transition in milliseconds since the 1970 Jan 1 epoch time.
-     * @stable ICU 4.0
-     */
-    UDate getTime(void) const;
-
-    /**
-     * Sets the time of transition in milliseconds.
-     * @param time The time of the transition in milliseconds since the 1970 Jan 1 epoch time.
-     * @stable ICU 4.0
-     */
-    void setTime(UDate time);
-
-    /**
-     * Returns the rule used before the transition.
-     * @return The time zone rule used after the transition.
-     * @stable ICU 4.0
-     */
-    const TimeZoneRule* getFrom(void) const;
-
-    /**
-     * Sets the rule used before the transition.  The caller remains
-     * responsible for deleting the <code>TimeZoneRule</code> object.
-     * @param from The time zone rule used before the transition.
-     * @stable ICU 4.0
-     */
-    void setFrom(const TimeZoneRule& from);
-
-    /**
-     * Adopts the rule used before the transition.  The caller must
-     * not delete the <code>TimeZoneRule</code> object passed in.
-     * @param from The time zone rule used before the transition.
-     * @stable ICU 4.0
-     */
-    void adoptFrom(TimeZoneRule* from);
-
-    /**
-     * Sets the rule used after the transition.  The caller remains
-     * responsible for deleting the <code>TimeZoneRule</code> object.
-     * @param to The time zone rule used after the transition.
-     * @stable ICU 4.0
-     */
-    void setTo(const TimeZoneRule& to);
-
-    /**
-     * Adopts the rule used after the transition.  The caller must
-     * not delete the <code>TimeZoneRule</code> object passed in.
-     * @param to The time zone rule used after the transition.
-     * @stable ICU 4.0
-     */
-    void adoptTo(TimeZoneRule* to);
-
-    /**
-     * Returns the rule used after the transition.
-     * @return The time zone rule used after the transition.
-     * @stable ICU 4.0
-     */
-    const TimeZoneRule* getTo(void) const;
-
-private:
-    UDate   fTime;
-    TimeZoneRule*   fFrom;
-    TimeZoneRule*   fTo;
-
-public:
-    /**
-     * Return the class ID for this class. This is useful only for comparing to
-     * a return value from getDynamicClassID(). For example:
-     * <pre>
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       erived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 4.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
-     * method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone()
-     * methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 4.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // TZTRANS_H
-
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/tztrans.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/tztrans.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/tztrans.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/tztrans.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,195 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2008, International Business Machines Corporation and         *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*/
+#ifndef TZTRANS_H
+#define TZTRANS_H
+
+/**
+ * \file 
+ * \brief C++ API: Time zone transition
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+
+// Forward declaration
+class TimeZoneRule;
+
+/**
+ * <code>TimeZoneTransition</code> is a class representing a time zone transition.
+ * An instance has a time of transition and rules for both before and after the transition.
+ * @stable ICU 4.0
+ */
+class U_I18N_API TimeZoneTransition : public UObject {
+public:
+    /**
+     * Constructs a <code>TimeZoneTransition</code> with the time and the rules before/after
+     * the transition.
+     * 
+     * @param time  The time of transition in milliseconds since the base time.
+     * @param from  The time zone rule used before the transition.
+     * @param to    The time zone rule used after the transition.
+     * @stable ICU 4.0
+     */
+    TimeZoneTransition(UDate time, const TimeZoneRule& from, const TimeZoneRule& to);
+
+    /**
+     * Constructs an empty <code>TimeZoneTransition</code>
+     * @stable ICU 4.0
+     */
+    TimeZoneTransition();
+
+    /**
+     * Copy constructor.
+     * @param source    The TimeZoneTransition object to be copied.
+     * @stable ICU 4.0
+     */
+    TimeZoneTransition(const TimeZoneTransition& source);
+
+    /**
+     * Destructor.
+     * @stable ICU 4.0
+     */
+    ~TimeZoneTransition();
+
+    /**
+     * Clone this TimeZoneTransition object polymorphically. The caller owns the result and
+     * should delete it when done.
+     * @return  A copy of the object.
+     * @stable ICU 4.0
+     */
+    TimeZoneTransition* clone(void) const;
+
+    /**
+     * Assignment operator.
+     * @param right The object to be copied.
+     * @stable ICU 4.0
+     */
+    TimeZoneTransition& operator=(const TimeZoneTransition& right);
+
+    /**
+     * Return true if the given TimeZoneTransition objects are semantically equal. Objects
+     * of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given TimeZoneTransition objects are semantically equal.
+     * @stable ICU 4.0
+     */
+    UBool operator==(const TimeZoneTransition& that) const;
+
+    /**
+     * Return true if the given TimeZoneTransition objects are semantically unequal. Objects
+     * of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given TimeZoneTransition objects are semantically unequal.
+     * @stable ICU 4.0
+     */
+    UBool operator!=(const TimeZoneTransition& that) const;
+
+    /**
+     * Returns the time of transition in milliseconds.
+     * @return The time of the transition in milliseconds since the 1970 Jan 1 epoch time.
+     * @stable ICU 4.0
+     */
+    UDate getTime(void) const;
+
+    /**
+     * Sets the time of transition in milliseconds.
+     * @param time The time of the transition in milliseconds since the 1970 Jan 1 epoch time.
+     * @stable ICU 4.0
+     */
+    void setTime(UDate time);
+
+    /**
+     * Returns the rule used before the transition.
+     * @return The time zone rule used after the transition.
+     * @stable ICU 4.0
+     */
+    const TimeZoneRule* getFrom(void) const;
+
+    /**
+     * Sets the rule used before the transition.  The caller remains
+     * responsible for deleting the <code>TimeZoneRule</code> object.
+     * @param from The time zone rule used before the transition.
+     * @stable ICU 4.0
+     */
+    void setFrom(const TimeZoneRule& from);
+
+    /**
+     * Adopts the rule used before the transition.  The caller must
+     * not delete the <code>TimeZoneRule</code> object passed in.
+     * @param from The time zone rule used before the transition.
+     * @stable ICU 4.0
+     */
+    void adoptFrom(TimeZoneRule* from);
+
+    /**
+     * Sets the rule used after the transition.  The caller remains
+     * responsible for deleting the <code>TimeZoneRule</code> object.
+     * @param to The time zone rule used after the transition.
+     * @stable ICU 4.0
+     */
+    void setTo(const TimeZoneRule& to);
+
+    /**
+     * Adopts the rule used after the transition.  The caller must
+     * not delete the <code>TimeZoneRule</code> object passed in.
+     * @param to The time zone rule used after the transition.
+     * @stable ICU 4.0
+     */
+    void adoptTo(TimeZoneRule* to);
+
+    /**
+     * Returns the rule used after the transition.
+     * @return The time zone rule used after the transition.
+     * @stable ICU 4.0
+     */
+    const TimeZoneRule* getTo(void) const;
+
+private:
+    UDate   fTime;
+    TimeZoneRule*   fFrom;
+    TimeZoneRule*   fTo;
+
+public:
+    /**
+     * Return the class ID for this class. This is useful only for comparing to
+     * a return value from getDynamicClassID(). For example:
+     * <pre>
+     * .   Base* polymorphic_pointer = createPolymorphicObject();
+     * .   if (polymorphic_pointer->getDynamicClassID() ==
+     * .       erived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 4.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+     * method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 4.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // TZTRANS_H
+
+//eof

Deleted: MacRuby/trunk/icu-1060/unicode/ubidi.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ubidi.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ubidi.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,2013 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 1999-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-******************************************************************************
-*   file name:  ubidi.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 1999jul27
-*   created by: Markus W. Scherer, updated by Matitiahu Allouche
-*/
-
-#ifndef UBIDI_H
-#define UBIDI_H
-
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-
-/**
- *\file
- * \brief C API: Bidi algorithm
- *
- * <h2>Bidi algorithm for ICU</h2>
- *
- * This is an implementation of the Unicode Bidirectional algorithm.
- * The algorithm is defined in the
- * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
- * version 13, also described in The Unicode Standard, Version 4.0 .<p>
- *
- * Note: Libraries that perform a bidirectional algorithm and
- * reorder strings accordingly are sometimes called "Storage Layout Engines".
- * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such
- * "Storage Layout Engines".
- *
- * <h3>General remarks about the API:</h3>
- *
- * In functions with an error code parameter,
- * the <code>pErrorCode</code> pointer must be valid
- * and the value that it points to must not indicate a failure before
- * the function call. Otherwise, the function returns immediately.
- * After the function call, the value indicates success or failure.<p>
- *
- * The &quot;limit&quot; of a sequence of characters is the position just after their
- * last character, i.e., one more than that position.<p>
- *
- * Some of the API functions provide access to &quot;runs&quot;.
- * Such a &quot;run&quot; is defined as a sequence of characters
- * that are at the same embedding level
- * after performing the Bidi algorithm.<p>
- *
- * @author Markus W. Scherer
- * @version 1.0
- *
- *
- * <h4> Sample code for the ICU Bidi API </h4>
- *
- * <h5>Rendering a paragraph with the ICU Bidi API</h5>
- *
- * This is (hypothetical) sample code that illustrates
- * how the ICU Bidi API could be used to render a paragraph of text.
- * Rendering code depends highly on the graphics system,
- * therefore this sample code must make a lot of assumptions,
- * which may or may not match any existing graphics system's properties.
- *
- * <p>The basic assumptions are:</p>
- * <ul>
- * <li>Rendering is done from left to right on a horizontal line.</li>
- * <li>A run of single-style, unidirectional text can be rendered at once.</li>
- * <li>Such a run of text is passed to the graphics system with
- *     characters (code units) in logical order.</li>
- * <li>The line-breaking algorithm is very complicated
- *     and Locale-dependent -
- *     and therefore its implementation omitted from this sample code.</li>
- * </ul>
- *
- * <pre>
- * \code
- *#include "unicode/ubidi.h"
- *
- *typedef enum {
- *     styleNormal=0, styleSelected=1,
- *     styleBold=2, styleItalics=4,
- *     styleSuper=8, styleSub=16
- *} Style;
- *
- *typedef struct { int32_t limit; Style style; } StyleRun;
- *
- *int getTextWidth(const UChar *text, int32_t start, int32_t limit,
- *                  const StyleRun *styleRuns, int styleRunCount);
- *
- * // set *pLimit and *pStyleRunLimit for a line
- * // from text[start] and from styleRuns[styleRunStart]
- * // using ubidi_getLogicalRun(para, ...)
- *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit,
- *                  UBiDi *para,
- *                  const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
- *                  int *pLineWidth);
- *
- * // render runs on a line sequentially, always from left to right
- *
- * // prepare rendering a new line
- * void startLine(UBiDiDirection textDirection, int lineWidth);
- *
- * // render a run of text and advance to the right by the run width
- * // the text[start..limit-1] is always in logical order
- * void renderRun(const UChar *text, int32_t start, int32_t limit,
- *               UBiDiDirection textDirection, Style style);
- *
- * // We could compute a cross-product
- * // from the style runs with the directional runs
- * // and then reorder it.
- * // Instead, here we iterate over each run type
- * // and render the intersections -
- * // with shortcuts in simple (and common) cases.
- * // renderParagraph() is the main function.
- *
- * // render a directional run with
- * // (possibly) multiple style runs intersecting with it
- * void renderDirectionalRun(const UChar *text,
- *                           int32_t start, int32_t limit,
- *                           UBiDiDirection direction,
- *                           const StyleRun *styleRuns, int styleRunCount) {
- *     int i;
- *
- *     // iterate over style runs
- *     if(direction==UBIDI_LTR) {
- *         int styleLimit;
- *
- *         for(i=0; i<styleRunCount; ++i) {
- *             styleLimit=styleRun[i].limit;
- *             if(start<styleLimit) {
- *                 if(styleLimit>limit) { styleLimit=limit; }
- *                 renderRun(text, start, styleLimit,
- *                           direction, styleRun[i].style);
- *                 if(styleLimit==limit) { break; }
- *                 start=styleLimit;
- *             }
- *         }
- *     } else {
- *         int styleStart;
- *
- *         for(i=styleRunCount-1; i>=0; --i) {
- *             if(i>0) {
- *                 styleStart=styleRun[i-1].limit;
- *             } else {
- *                 styleStart=0;
- *             }
- *             if(limit>=styleStart) {
- *                 if(styleStart<start) { styleStart=start; }
- *                 renderRun(text, styleStart, limit,
- *                           direction, styleRun[i].style);
- *                 if(styleStart==start) { break; }
- *                 limit=styleStart;
- *             }
- *         }
- *     }
- * }
- *
- * // the line object represents text[start..limit-1]
- * void renderLine(UBiDi *line, const UChar *text,
- *                 int32_t start, int32_t limit,
- *                 const StyleRun *styleRuns, int styleRunCount) {
- *     UBiDiDirection direction=ubidi_getDirection(line);
- *     if(direction!=UBIDI_MIXED) {
- *         // unidirectional
- *         if(styleRunCount<=1) {
- *             renderRun(text, start, limit, direction, styleRuns[0].style);
- *         } else {
- *             renderDirectionalRun(text, start, limit,
- *                                  direction, styleRuns, styleRunCount);
- *         }
- *     } else {
- *         // mixed-directional
- *         int32_t count, i, length;
- *         UBiDiLevel level;
- *
- *         count=ubidi_countRuns(para, pErrorCode);
- *         if(U_SUCCESS(*pErrorCode)) {
- *             if(styleRunCount<=1) {
- *                 Style style=styleRuns[0].style;
- *
- *                 // iterate over directional runs
- *                for(i=0; i<count; ++i) {
- *                    direction=ubidi_getVisualRun(para, i, &start, &length);
- *                     renderRun(text, start, start+length, direction, style);
- *                }
- *             } else {
- *                 int32_t j;
- *
- *                 // iterate over both directional and style runs
- *                 for(i=0; i<count; ++i) {
- *                     direction=ubidi_getVisualRun(line, i, &start, &length);
- *                     renderDirectionalRun(text, start, start+length,
- *                                          direction, styleRuns, styleRunCount);
- *                 }
- *             }
- *         }
- *     }
- * }
- *
- *void renderParagraph(const UChar *text, int32_t length,
- *                     UBiDiDirection textDirection,
- *                      const StyleRun *styleRuns, int styleRunCount,
- *                      int lineWidth,
- *                      UErrorCode *pErrorCode) {
- *     UBiDi *para;
- *
- *     if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) {
- *         return;
- *     }
- *
- *     para=ubidi_openSized(length, 0, pErrorCode);
- *     if(para==NULL) { return; }
- *
- *     ubidi_setPara(para, text, length,
- *                   textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
- *                   NULL, pErrorCode);
- *     if(U_SUCCESS(*pErrorCode)) {
- *         UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
- *         StyleRun styleRun={ length, styleNormal };
- *         int width;
- *
- *         if(styleRuns==NULL || styleRunCount<=0) {
- *            styleRunCount=1;
- *             styleRuns=&styleRun;
- *         }
- *
- *        // assume styleRuns[styleRunCount-1].limit>=length
- *
- *         width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
- *         if(width<=lineWidth) {
- *             // everything fits onto one line
- *
- *            // prepare rendering a new line from either left or right
- *             startLine(paraLevel, width);
- *
- *             renderLine(para, text, 0, length,
- *                        styleRuns, styleRunCount);
- *         } else {
- *             UBiDi *line;
- *
- *             // we need to render several lines
- *             line=ubidi_openSized(length, 0, pErrorCode);
- *             if(line!=NULL) {
- *                 int32_t start=0, limit;
- *                 int styleRunStart=0, styleRunLimit;
- *
- *                 for(;;) {
- *                     limit=length;
- *                     styleRunLimit=styleRunCount;
- *                     getLineBreak(text, start, &limit, para,
- *                                  styleRuns, styleRunStart, &styleRunLimit,
- *                                 &width);
- *                     ubidi_setLine(para, start, limit, line, pErrorCode);
- *                     if(U_SUCCESS(*pErrorCode)) {
- *                         // prepare rendering a new line
- *                         // from either left or right
- *                         startLine(paraLevel, width);
- *
- *                         renderLine(line, text, start, limit,
- *                                    styleRuns+styleRunStart,
- *                                    styleRunLimit-styleRunStart);
- *                     }
- *                     if(limit==length) { break; }
- *                     start=limit;
- *                     styleRunStart=styleRunLimit-1;
- *                     if(start>=styleRuns[styleRunStart].limit) {
- *                         ++styleRunStart;
- *                     }
- *                 }
- *
- *                 ubidi_close(line);
- *             }
- *        }
- *    }
- *
- *     ubidi_close(para);
- *}
- *\endcode
- * </pre>
- */
-
-/*DOCXX_TAG*/
-/*@{*/
-
-/**
- * UBiDiLevel is the type of the level values in this
- * Bidi implementation.
- * It holds an embedding level and indicates the visual direction
- * by its bit&nbsp;0 (even/odd value).<p>
- *
- * It can also hold non-level values for the
- * <code>paraLevel</code> and <code>embeddingLevels</code>
- * arguments of <code>ubidi_setPara()</code>; there:
- * <ul>
- * <li>bit&nbsp;7 of an <code>embeddingLevels[]</code>
- * value indicates whether the using application is
- * specifying the level of a character to <i>override</i> whatever the
- * Bidi implementation would resolve it to.</li>
- * <li><code>paraLevel</code> can be set to the
- * pseudo-level values <code>UBIDI_DEFAULT_LTR</code>
- * and <code>UBIDI_DEFAULT_RTL</code>.</li>
- * </ul>
- *
- * @see ubidi_setPara
- *
- * <p>The related constants are not real, valid level values.
- * <code>UBIDI_DEFAULT_XXX</code> can be used to specify
- * a default for the paragraph level for
- * when the <code>ubidi_setPara()</code> function
- * shall determine it but there is no
- * strongly typed character in the input.<p>
- *
- * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
- * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
- * just like with normal LTR and RTL level values -
- * these special values are designed that way. Also, the implementation
- * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
- *
- * @see UBIDI_DEFAULT_LTR
- * @see UBIDI_DEFAULT_RTL
- * @see UBIDI_LEVEL_OVERRIDE
- * @see UBIDI_MAX_EXPLICIT_LEVEL
- * @stable ICU 2.0
- */
-typedef uint8_t UBiDiLevel;
-
-/** Paragraph level setting.<p>
- *
- * Constant indicating that the base direction depends on the first strong
- * directional character in the text according to the Unicode Bidirectional
- * Algorithm. If no strong directional character is present,
- * then set the paragraph level to 0 (left-to-right).<p>
- *
- * If this value is used in conjunction with reordering modes
- * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
- * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
- * is assumed to be visual LTR, and the text after reordering is required
- * to be the corresponding logical string with appropriate contextual
- * direction. The direction of the result string will be RTL if either
- * the righmost or leftmost strong character of the source text is RTL
- * or Arabic Letter, the direction will be LTR otherwise.<p>
- *
- * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
- * be added at the beginning of the result string to ensure round trip
- * (that the result string, when reordered back to visual, will produce
- * the original source text).
- * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
- * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
- * @stable ICU 2.0
- */
-#define UBIDI_DEFAULT_LTR 0xfe
-
-/** Paragraph level setting.<p>
- *
- * Constant indicating that the base direction depends on the first strong
- * directional character in the text according to the Unicode Bidirectional
- * Algorithm. If no strong directional character is present,
- * then set the paragraph level to 1 (right-to-left).<p>
- *
- * If this value is used in conjunction with reordering modes
- * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
- * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
- * is assumed to be visual LTR, and the text after reordering is required
- * to be the corresponding logical string with appropriate contextual
- * direction. The direction of the result string will be RTL if either
- * the righmost or leftmost strong character of the source text is RTL
- * or Arabic Letter, or if the text contains no strong character;
- * the direction will be LTR otherwise.<p>
- *
- * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
- * be added at the beginning of the result string to ensure round trip
- * (that the result string, when reordered back to visual, will produce
- * the original source text).
- * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
- * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
- * @stable ICU 2.0
- */
-#define UBIDI_DEFAULT_RTL 0xff
-
-/**
- * Maximum explicit embedding level.
- * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
- * @stable ICU 2.0
- */
-#define UBIDI_MAX_EXPLICIT_LEVEL 61
-
-/** Bit flag for level input.
- *  Overrides directional properties.
- * @stable ICU 2.0
- */
-#define UBIDI_LEVEL_OVERRIDE 0x80
-
-/**
- * Special value which can be returned by the mapping functions when a logical
- * index has no corresponding visual index or vice-versa. This may happen
- * for the logical-to-visual mapping of a Bidi control when option
- * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen
- * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted
- * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
- * @see ubidi_getVisualIndex
- * @see ubidi_getVisualMap
- * @see ubidi_getLogicalIndex
- * @see ubidi_getLogicalMap
- * @stable ICU 3.6
- */
-#define UBIDI_MAP_NOWHERE   (-1)
-
-/**
- * <code>UBiDiDirection</code> values indicate the text direction.
- * @stable ICU 2.0
- */
-enum UBiDiDirection {
-    /** All left-to-right text. This is a 0 value. @stable ICU 2.0 */
-    UBIDI_LTR,
-    /** All right-to-left text. This is a 1 value. @stable ICU 2.0 */
-    UBIDI_RTL,
-    /** Mixed-directional text. @stable ICU 2.0 */
-    UBIDI_MIXED
-};
-
-/** @stable ICU 2.0 */
-typedef enum UBiDiDirection UBiDiDirection;
-
-/**
- * Forward declaration of the <code>UBiDi</code> structure for the declaration of
- * the API functions. Its fields are implementation-specific.<p>
- * This structure holds information about a paragraph (or multiple paragraphs)
- * of text with Bidi-algorithm-related details, or about one line of
- * such a paragraph.<p>
- * Reordering can be done on a line, or on one or more paragraphs which are
- * then interpreted each as one single line.
- * @stable ICU 2.0
- */
-struct UBiDi;
-
-/** @stable ICU 2.0 */
-typedef struct UBiDi UBiDi;
-
-/**
- * Allocate a <code>UBiDi</code> structure.
- * Such an object is initially empty. It is assigned
- * the Bidi properties of a piece of text containing one or more paragraphs
- * by <code>ubidi_setPara()</code>
- * or the Bidi properties of a line within a paragraph by
- * <code>ubidi_setLine()</code>.<p>
- * This object can be reused for as long as it is not deallocated
- * by calling <code>ubidi_close()</code>.<p>
- * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate
- * additional memory for internal structures as necessary.
- *
- * @return An empty <code>UBiDi</code> object.
- * @stable ICU 2.0
- */
-U_STABLE UBiDi * U_EXPORT2
-ubidi_open(void);
-
-/**
- * Allocate a <code>UBiDi</code> structure with preallocated memory
- * for internal structures.
- * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
- * with no arguments, but it also preallocates memory for internal structures
- * according to the sizings supplied by the caller.<p>
- * Subsequent functions will not allocate any more memory, and are thus
- * guaranteed not to fail because of lack of memory.<p>
- * The preallocation can be limited to some of the internal memory
- * by setting some values to 0 here. That means that if, e.g.,
- * <code>maxRunCount</code> cannot be reasonably predetermined and should not
- * be set to <code>maxLength</code> (the only failproof value) to avoid
- * wasting memory, then <code>maxRunCount</code> could be set to 0 here
- * and the internal structures that are associated with it will be allocated
- * on demand, just like with <code>ubidi_open()</code>.
- *
- * @param maxLength is the maximum text or line length that internal memory
- *        will be preallocated for. An attempt to associate this object with a
- *        longer text will fail, unless this value is 0, which leaves the allocation
- *        up to the implementation.
- *
- * @param maxRunCount is the maximum anticipated number of same-level runs
- *        that internal memory will be preallocated for. An attempt to access
- *        visual runs on an object that was not preallocated for as many runs
- *        as the text was actually resolved to will fail,
- *        unless this value is 0, which leaves the allocation up to the implementation.<br><br>
- *        The number of runs depends on the actual text and maybe anywhere between
- *        1 and <code>maxLength</code>. It is typically small.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @return An empty <code>UBiDi</code> object with preallocated memory.
- * @stable ICU 2.0
- */
-U_STABLE UBiDi * U_EXPORT2
-ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode);
-
-/**
- * <code>ubidi_close()</code> must be called to free the memory
- * associated with a UBiDi object.<p>
- *
- * <strong>Important: </strong>
- * A parent <code>UBiDi</code> object must not be destroyed or reused if
- * it still has children.
- * If a <code>UBiDi</code> object has become the <i>child</i>
- * of another one (its <i>parent</i>) by calling
- * <code>ubidi_setLine()</code>, then the child object must
- * be destroyed (closed) or reused (by calling
- * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
- * before the parent object.
- *
- * @param pBiDi is a <code>UBiDi</code> object.
- *
- * @see ubidi_setPara
- * @see ubidi_setLine
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ubidi_close(UBiDi *pBiDi);
-
-/**
- * Modify the operation of the Bidi algorithm such that it
- * approximates an "inverse Bidi" algorithm. This function
- * must be called before <code>ubidi_setPara()</code>.
- *
- * <p>The normal operation of the Bidi algorithm as described
- * in the Unicode Technical Report is to take text stored in logical
- * (keyboard, typing) order and to determine the reordering of it for visual
- * rendering.
- * Some legacy systems store text in visual order, and for operations
- * with standard, Unicode-based algorithms, the text needs to be transformed
- * to logical order. This is effectively the inverse algorithm of the
- * described Bidi algorithm. Note that there is no standard algorithm for
- * this "inverse Bidi" and that the current implementation provides only an
- * approximation of "inverse Bidi".</p>
- *
- * <p>With <code>isInverse</code> set to <code>TRUE</code>,
- * this function changes the behavior of some of the subsequent functions
- * in a way that they can be used for the inverse Bidi algorithm.
- * Specifically, runs of text with numeric characters will be treated in a
- * special way and may need to be surrounded with LRM characters when they are
- * written in reordered sequence.</p>
- *
- * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>.
- * Since the actual input for "inverse Bidi" is visually ordered text and
- * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually
- * the runs of the logically ordered output.</p>
- *
- * <p>Calling this function with argument <code>isInverse</code> set to
- * <code>TRUE</code> is equivalent to calling
- * <code>ubidi_setReorderingMode</code> with argument
- * <code>reorderingMode</code>
- * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
- * Calling this function with argument <code>isInverse</code> set to
- * <code>FALSE</code> is equivalent to calling
- * <code>ubidi_setReorderingMode</code> with argument
- * <code>reorderingMode</code>
- * set to <code>#UBIDI_REORDER_DEFAULT</code>.
- *
- * @param pBiDi is a <code>UBiDi</code> object.
- *
- * @param isInverse specifies "forward" or "inverse" Bidi operation.
- *
- * @see ubidi_setPara
- * @see ubidi_writeReordered
- * @see ubidi_setReorderingMode
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ubidi_setInverse(UBiDi *pBiDi, UBool isInverse);
-
-/**
- * Is this Bidi object set to perform the inverse Bidi algorithm?
- * <p>Note: calling this function after setting the reordering mode with
- * <code>ubidi_setReorderingMode</code> will return <code>TRUE</code> if the
- * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>,
- * <code>FALSE</code> for all other values.</p>
- *
- * @param pBiDi is a <code>UBiDi</code> object.
- * @return TRUE if the Bidi object is set to perform the inverse Bidi algorithm
- * by handling numbers as L.
- *
- * @see ubidi_setInverse
- * @see ubidi_setReorderingMode
- * @stable ICU 2.0
- */
-
-U_STABLE UBool U_EXPORT2
-ubidi_isInverse(UBiDi *pBiDi);
-
-/**
- * Specify whether block separators must be allocated level zero,
- * so that successive paragraphs will progress from left to right.
- * This function must be called before <code>ubidi_setPara()</code>.
- * Paragraph separators (B) may appear in the text.  Setting them to level zero
- * means that all paragraph separators (including one possibly appearing
- * in the last text position) are kept in the reordered text after the text
- * that they follow in the source text.
- * When this feature is not enabled, a paragraph separator at the last
- * position of the text before reordering will go to the first position
- * of the reordered text when the paragraph level is odd.
- *
- * @param pBiDi is a <code>UBiDi</code> object.
- *
- * @param orderParagraphsLTR specifies whether paragraph separators (B) must
- * receive level 0, so that successive paragraphs progress from left to right.
- *
- * @see ubidi_setPara
- * @stable ICU 3.4
- */
-U_STABLE void U_EXPORT2
-ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);
-
-/**
- * Is this Bidi object set to allocate level 0 to block separators so that
- * successive paragraphs progress from left to right?
- *
- * @param pBiDi is a <code>UBiDi</code> object.
- * @return TRUE if the Bidi object is set to allocate level 0 to block
- *         separators.
- *
- * @see ubidi_orderParagraphsLTR
- * @stable ICU 3.4
- */
-U_STABLE UBool U_EXPORT2
-ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
-
-/**
- * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi
- * algorithm to use.
- *
- * @see ubidi_setReorderingMode
- * @stable ICU 3.6
- */
-typedef enum UBiDiReorderingMode {
-    /** Regular Logical to Visual Bidi algorithm according to Unicode.
-      * This is a 0 value.
-      * @stable ICU 3.6 */
-    UBIDI_REORDER_DEFAULT = 0,
-    /** Logical to Visual algorithm which handles numbers in a way which
-      * mimicks the behavior of Windows XP.
-      * @stable ICU 3.6 */
-    UBIDI_REORDER_NUMBERS_SPECIAL,
-    /** Logical to Visual algorithm grouping numbers with adjacent R characters
-      * (reversible algorithm).
-      * @stable ICU 3.6 */
-    UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
-    /** Reorder runs only to transform a Logical LTR string to the Logical RTL
-      * string with the same display, or vice-versa.<br>
-      * If this mode is set together with option
-      * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source
-      * text may be removed and other controls may be added to produce the
-      * minimum combination which has the required display.
-      * @stable ICU 3.6 */
-    UBIDI_REORDER_RUNS_ONLY,
-    /** Visual to Logical algorithm which handles numbers like L
-      * (same algorithm as selected by <code>ubidi_setInverse(TRUE)</code>.
-      * @see ubidi_setInverse
-      * @stable ICU 3.6 */
-    UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
-    /** Visual to Logical algorithm equivalent to the regular Logical to Visual
-      * algorithm.
-      * @stable ICU 3.6 */
-    UBIDI_REORDER_INVERSE_LIKE_DIRECT,
-    /** Inverse Bidi (Visual to Logical) algorithm for the
-      * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
-      * @stable ICU 3.6 */
-    UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL,
-    /** Number of values for reordering mode.
-      * @stable ICU 3.6 */
-    UBIDI_REORDER_COUNT
-} UBiDiReorderingMode;
-
-/**
- * Modify the operation of the Bidi algorithm such that it implements some
- * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
- * algorithm, depending on different values of the "reordering mode".
- * This function must be called before <code>ubidi_setPara()</code>, and stays
- * in effect until called again with a different argument.
- *
- * <p>The normal operation of the Bidi algorithm as described
- * in the Unicode Standard Annex #9 is to take text stored in logical
- * (keyboard, typing) order and to determine how to reorder it for visual
- * rendering.</p>
- *
- * <p>With the reordering mode set to a value other than
- * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of
- * some of the subsequent functions in a way such that they implement an
- * inverse Bidi algorithm or some other algorithm variants.</p>
- *
- * <p>Some legacy systems store text in visual order, and for operations
- * with standard, Unicode-based algorithms, the text needs to be transformed
- * into logical order. This is effectively the inverse algorithm of the
- * described Bidi algorithm. Note that there is no standard algorithm for
- * this "inverse Bidi", so a number of variants are implemented here.</p>
- *
- * <p>In other cases, it may be desirable to emulate some variant of the
- * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
- * Logical to Logical transformation.</p>
- *
- * <ul>
- * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>,
- * the standard Bidi Logical to Visual algorithm is applied.</li>
- *
- * <li>When the reordering mode is set to
- * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>,
- * the algorithm used to perform Bidi transformations when calling
- * <code>ubidi_setPara</code> should approximate the algorithm used in
- * Microsoft Windows XP rather than strictly conform to the Unicode Bidi
- * algorithm.
- * <br>
- * The differences between the basic algorithm and the algorithm addressed
- * by this option are as follows:
- * <ul>
- *   <li>Within text at an even embedding level, the sequence "123AB"
- *   (where AB represent R or AL letters) is transformed to "123BA" by the
- *   Unicode algorithm and to "BA123" by the Windows algorithm.</li>
- *   <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just
- *   like regular numbers (EN).</li>
- * </ul></li>
- *
- * <li>When the reordering mode is set to
- * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>,
- * numbers located between LTR text and RTL text are associated with the RTL
- * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
- * upper case letters represent RTL characters) will be transformed to
- * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
- * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
- * This makes the algorithm reversible and makes it useful when round trip
- * (from visual to logical and back to visual) must be achieved without
- * adding LRM characters. However, this is a variation from the standard
- * Unicode Bidi algorithm.<br>
- * The source text should not contain Bidi control characters other than LRM
- * or RLM.</li>
- *
- * <li>When the reordering mode is set to
- * <code>#UBIDI_REORDER_RUNS_ONLY</code>,
- * a "Logical to Logical" transformation must be performed:
- * <ul>
- * <li>If the default text level of the source text (argument <code>paraLevel</code>
- * in <code>ubidi_setPara</code>) is even, the source text will be handled as
- * LTR logical text and will be transformed to the RTL logical text which has
- * the same LTR visual display.</li>
- * <li>If the default level of the source text is odd, the source text
- * will be handled as RTL logical text and will be transformed to the
- * LTR logical text which has the same LTR visual display.</li>
- * </ul>
- * This mode may be needed when logical text which is basically Arabic or
- * Hebrew, with possible included numbers or phrases in English, has to be
- * displayed as if it had an even embedding level (this can happen if the
- * displaying application treats all text as if it was basically LTR).
- * <br>
- * This mode may also be needed in the reverse case, when logical text which is
- * basically English, with possible included phrases in Arabic or Hebrew, has to
- * be displayed as if it had an odd embedding level.
- * <br>
- * Both cases could be handled by adding LRE or RLE at the head of the text,
- * if the display subsystem supports these formatting controls. If it does not,
- * the problem may be handled by transforming the source text in this mode
- * before displaying it, so that it will be displayed properly.<br>
- * The source text should not contain Bidi control characters other than LRM
- * or RLM.</li>
- *
- * <li>When the reordering mode is set to
- * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm
- * is applied.
- * Runs of text with numeric characters will be treated like LTR letters and
- * may need to be surrounded with LRM characters when they are written in
- * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can
- * be used with function <code>ubidi_writeReordered</code> to this end. This
- * mode is equivalent to calling <code>ubidi_setInverse()</code> with
- * argument <code>isInverse</code> set to <code>TRUE</code>.</li>
- *
- * <li>When the reordering mode is set to
- * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual
- * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm.
- * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>
- * but is closer to the regular Bidi algorithm.
- * <br>
- * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
- * upper case represents RTL characters) will be transformed to
- * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
- * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
- * When used in conjunction with option
- * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally
- * adds Bidi marks to the output significantly more sparingly than mode
- * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option
- * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to
- * <code>ubidi_writeReordered</code>.</li>
- *
- * <li>When the reordering mode is set to
- * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual
- * Bidi algorithm used in Windows XP is used as an approximation of an
- * "inverse Bidi" algorithm.
- * <br>
- * For example, an LTR paragraph with the content "abc FED123" (where
- * upper case represents RTL characters) will be transformed to
- * "abc 123DEF.</li>
- * </ul>
- *
- * <p>In all the reordering modes specifying an "inverse Bidi" algorithm
- * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>),
- * output runs should be retrieved using
- * <code>ubidi_getVisualRun()</code>, and the output text with
- * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in
- * "inverse Bidi" modes the input is actually visually ordered text and
- * reordered output returned by <code>ubidi_getVisualRun()</code> or
- * <code>ubidi_writeReordered()</code> are actually runs or character string
- * of logically ordered output.<br>
- * For all the "inverse Bidi" modes, the source text should not contain
- * Bidi control characters other than LRM or RLM.</p>
- *
- * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of
- * <code>ubidi_writeReordered</code> has no useful meaning and should not be
- * used in conjunction with any value of the reordering mode specifying
- * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>.
- *
- * @param pBiDi is a <code>UBiDi</code> object.
- * @param reorderingMode specifies the required variant of the Bidi algorithm.
- *
- * @see UBiDiReorderingMode
- * @see ubidi_setInverse
- * @see ubidi_setPara
- * @see ubidi_writeReordered
- * @stable ICU 3.6
- */
-U_STABLE void U_EXPORT2
-ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode);
-
-/**
- * What is the requested reordering mode for a given Bidi object?
- *
- * @param pBiDi is a <code>UBiDi</code> object.
- * @return the current reordering mode of the Bidi object
- * @see ubidi_setReorderingMode
- * @stable ICU 3.6
- */
-U_STABLE UBiDiReorderingMode U_EXPORT2
-ubidi_getReorderingMode(UBiDi *pBiDi);
-
-/**
- * <code>UBiDiReorderingOption</code> values indicate which options are
- * specified to affect the Bidi algorithm.
- *
- * @see ubidi_setReorderingOptions
- * @stable ICU 3.6
- */
-typedef enum UBiDiReorderingOption {
-    /**
-     * option value for <code>ubidi_setReorderingOptions</code>:
-     * disable all the options which can be set with this function
-     * @see ubidi_setReorderingOptions
-     * @stable ICU 3.6
-     */
-    UBIDI_OPTION_DEFAULT = 0,
-
-    /**
-     * option bit for <code>ubidi_setReorderingOptions</code>:
-     * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
-     * a reordering to a Logical order
-     *
-     * <p>This option must be set or reset before calling
-     * <code>ubidi_setPara</code>.</p>
-     *
-     * <p>This option is significant only with reordering modes which generate
-     * a result with Logical order, specifically:</p>
-     * <ul>
-     *   <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li>
-     *   <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li>
-     *   <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li>
-     *   <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
-     * </ul>
-     *
-     * <p>If this option is set in conjunction with reordering mode
-     * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
-     * <code>ubidi_setInverse(TRUE)</code>, it implies
-     * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>
-     * in calls to function <code>ubidi_writeReordered()</code>.</p>
-     *
-     * <p>For other reordering modes, a minimum number of LRM or RLM characters
-     * will be added to the source text after reordering it so as to ensure
-     * round trip, i.e. when applying the inverse reordering mode on the
-     * resulting logical text with removal of Bidi marks
-     * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling
-     * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
-     * in <code>ubidi_writeReordered</code>), the result will be identical to the
-     * source text in the first transformation.
-     *
-     * <p>This option will be ignored if specified together with option
-     * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option
-     * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function
-     * <code>ubidi_writeReordered()</code> and it implies option
-     * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function
-     * <code>ubidi_writeReordered()</code> if the reordering mode is
-     * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
-     *
-     * @see ubidi_setReorderingMode
-     * @see ubidi_setReorderingOptions
-     * @stable ICU 3.6
-     */
-    UBIDI_OPTION_INSERT_MARKS = 1,
-
-    /**
-     * option bit for <code>ubidi_setReorderingOptions</code>:
-     * remove Bidi control characters
-     *
-     * <p>This option must be set or reset before calling
-     * <code>ubidi_setPara</code>.</p>
-     *
-     * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
-     * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls
-     * to function <code>ubidi_writeReordered()</code> and it implies option
-     * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p>
-     *
-     * @see ubidi_setReorderingMode
-     * @see ubidi_setReorderingOptions
-     * @stable ICU 3.6
-     */
-    UBIDI_OPTION_REMOVE_CONTROLS = 2,
-
-    /**
-     * option bit for <code>ubidi_setReorderingOptions</code>:
-     * process the output as part of a stream to be continued
-     *
-     * <p>This option must be set or reset before calling
-     * <code>ubidi_setPara</code>.</p>
-     *
-     * <p>This option specifies that the caller is interested in processing large
-     * text object in parts.
-     * The results of the successive calls are expected to be concatenated by the
-     * caller. Only the call for the last part will have this option bit off.</p>
-     *
-     * <p>When this option bit is on, <code>ubidi_setPara()</code> may process
-     * less than the full source text in order to truncate the text at a meaningful
-     * boundary. The caller should call <code>ubidi_getProcessedLength()</code>
-     * immediately after calling <code>ubidi_setPara()</code> in order to
-     * determine how much of the source text has been processed.
-     * Source text beyond that length should be resubmitted in following calls to
-     * <code>ubidi_setPara</code>. The processed length may be less than
-     * the length of the source text if a character preceding the last character of
-     * the source text constitutes a reasonable boundary (like a block separator)
-     * for text to be continued.<br>
-     * If the last character of the source text constitutes a reasonable
-     * boundary, the whole text will be processed at once.<br>
-     * If nowhere in the source text there exists
-     * such a reasonable boundary, the processed length will be zero.<br>
-     * The caller should check for such an occurrence and do one of the following:
-     * <ul><li>submit a larger amount of text with a better chance to include
-     *         a reasonable boundary.</li>
-     *     <li>resubmit the same text after turning off option
-     *         <code>UBIDI_OPTION_STREAMING</code>.</li></ul>
-     * In all cases, this option should be turned off before processing the last
-     * part of the text.</p>
-     *
-     * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used,
-     * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with
-     * argument <code>orderParagraphsLTR</code> set to <code>TRUE</code> before
-     * calling <code>ubidi_setPara</code> so that later paragraphs may be
-     * concatenated to previous paragraphs on the right.</p>
-     *
-     * @see ubidi_setReorderingMode
-     * @see ubidi_setReorderingOptions
-     * @see ubidi_getProcessedLength
-     * @see ubidi_orderParagraphsLTR
-     * @stable ICU 3.6
-     */
-    UBIDI_OPTION_STREAMING = 4
-} UBiDiReorderingOption;
-
-/**
- * Specify which of the reordering options
- * should be applied during Bidi transformations.
- *
- * @param pBiDi is a <code>UBiDi</code> object.
- * @param reorderingOptions is a combination of zero or more of the following
- * options:
- * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>,
- * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>.
- *
- * @see ubidi_getReorderingOptions
- * @stable ICU 3.6
- */
-U_STABLE void U_EXPORT2
-ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions);
-
-/**
- * What are the reordering options applied to a given Bidi object?
- *
- * @param pBiDi is a <code>UBiDi</code> object.
- * @return the current reordering options of the Bidi object
- * @see ubidi_setReorderingOptions
- * @stable ICU 3.6
- */
-U_STABLE uint32_t U_EXPORT2
-ubidi_getReorderingOptions(UBiDi *pBiDi);
-
-/**
- * Perform the Unicode Bidi algorithm. It is defined in the
- * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>,
- * version 13,
- * also described in The Unicode Standard, Version 4.0 .<p>
- *
- * This function takes a piece of plain text containing one or more paragraphs,
- * with or without externally specified embedding levels from <i>styled</i>
- * text and computes the left-right-directionality of each character.<p>
- *
- * If the entire text is all of the same directionality, then
- * the function may not perform all the steps described by the algorithm,
- * i.e., some levels may not be the same as if all steps were performed.
- * This is not relevant for unidirectional text.<br>
- * For example, in pure LTR text with numbers the numbers would get
- * a resolved level of 2 higher than the surrounding text according to
- * the algorithm. This implementation may set all resolved levels to
- * the same value in such a case.<p>
- *
- * The text can be composed of multiple paragraphs. Occurrence of a block
- * separator in the text terminates a paragraph, and whatever comes next starts
- * a new paragraph. The exception to this rule is when a Carriage Return (CR)
- * is followed by a Line Feed (LF). Both CR and LF are block separators, but
- * in that case, the pair of characters is considered as terminating the
- * preceding paragraph, and a new paragraph will be started by a character
- * coming after the LF.
- *
- * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
- *        which will be set to contain the reordering information,
- *        especially the resolved levels for all the characters in <code>text</code>.
- *
- * @param text is a pointer to the text that the Bidi algorithm will be performed on.
- *        This pointer is stored in the UBiDi object and can be retrieved
- *        with <code>ubidi_getText()</code>.<br>
- *        <strong>Note:</strong> the text must be (at least) <code>length</code> long.
- *
- * @param length is the length of the text; if <code>length==-1</code> then
- *        the text must be zero-terminated.
- *
- * @param paraLevel specifies the default level for the text;
- *        it is typically 0 (LTR) or 1 (RTL).
- *        If the function shall determine the paragraph level from the text,
- *        then <code>paraLevel</code> can be set to
- *        either <code>#UBIDI_DEFAULT_LTR</code>
- *        or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple
- *        paragraphs, the paragraph level shall be determined separately for
- *        each paragraph; if a paragraph does not include any strongly typed
- *        character, then the desired default is used (0 for LTR or 1 for RTL).
- *        Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code>
- *        is also valid, with odd levels indicating RTL.
- *
- * @param embeddingLevels (in) may be used to preset the embedding and override levels,
- *        ignoring characters like LRE and PDF in the text.
- *        A level overrides the directional property of its corresponding
- *        (same index) character if the level has the
- *        <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br>
- *        Except for that bit, it must be
- *        <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>,
- *        with one exception: a level of zero may be specified for a paragraph
- *        separator even if <code>paraLevel>0</code> when multiple paragraphs
- *        are submitted in the same call to <code>ubidi_setPara()</code>.<br><br>
- *        <strong>Caution: </strong>A copy of this pointer, not of the levels,
- *        will be stored in the <code>UBiDi</code> object;
- *        the <code>embeddingLevels</code> array must not be
- *        deallocated before the <code>UBiDi</code> structure is destroyed or reused,
- *        and the <code>embeddingLevels</code>
- *        should not be modified to avoid unexpected results on subsequent Bidi operations.
- *        However, the <code>ubidi_setPara()</code> and
- *        <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br>
- *        After the <code>UBiDi</code> object is reused or destroyed, the caller
- *        must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br>
- *        <strong>Note:</strong> the <code>embeddingLevels</code> array must be
- *        at least <code>length</code> long.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
-              UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
-              UErrorCode *pErrorCode);
-
-/**
- * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to
- * contain the reordering information, especially the resolved levels,
- * for all the characters in a line of text. This line of text is
- * specified by referring to a <code>UBiDi</code> object representing
- * this information for a piece of text containing one or more paragraphs,
- * and by specifying a range of indexes in this text.<p>
- * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
- *
- * This is used after calling <code>ubidi_setPara()</code>
- * for a piece of text, and after line-breaking on that text.
- * It is not necessary if each paragraph is treated as a single line.<p>
- *
- * After line-breaking, rules (L1) and (L2) for the treatment of
- * trailing WS and for reordering are performed on
- * a <code>UBiDi</code> object that represents a line.<p>
- *
- * <strong>Important: </strong><code>pLineBiDi</code> shares data with
- * <code>pParaBiDi</code>.
- * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
- * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
- * before the object for its parent paragraph.<p>
- *
- * The text pointer that was stored in <code>pParaBiDi</code> is also copied,
- * and <code>start</code> is added to it so that it points to the beginning of the
- * line for this object.
- *
- * @param pParaBiDi is the parent paragraph object. It must have been set
- * by a successful call to ubidi_setPara.
- *
- * @param start is the line's first index into the text.
- *
- * @param limit is just behind the line's last index into the text
- *        (its last index +1).<br>
- *        It must be <code>0<=start<limit<=</code>containing paragraph limit.
- *        If the specified line crosses a paragraph boundary, the function
- *        will terminate with error code U_ILLEGAL_ARGUMENT_ERROR.
- *
- * @param pLineBiDi is the object that will now represent a line of the text.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @see ubidi_setPara
- * @see ubidi_getProcessedLength
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ubidi_setLine(const UBiDi *pParaBiDi,
-              int32_t start, int32_t limit,
-              UBiDi *pLineBiDi,
-              UErrorCode *pErrorCode);
-
-/**
- * Get the directionality of the text.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>
- *         or <code>UBIDI_MIXED</code>
- *         that indicates if the entire text
- *         represented by this object is unidirectional,
- *         and which direction, or if it is mixed-directional.
- *
- * @see UBiDiDirection
- * @stable ICU 2.0
- */
-U_STABLE UBiDiDirection U_EXPORT2
-ubidi_getDirection(const UBiDi *pBiDi);
-
-/**
- * Get the pointer to the text.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @return The pointer to the text that the UBiDi object was created for.
- *
- * @see ubidi_setPara
- * @see ubidi_setLine
- * @stable ICU 2.0
- */
-U_STABLE const UChar * U_EXPORT2
-ubidi_getText(const UBiDi *pBiDi);
-
-/**
- * Get the length of the text.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @return The length of the text that the UBiDi object was created for.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubidi_getLength(const UBiDi *pBiDi);
-
-/**
- * Get the paragraph level of the text.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @return The paragraph level. If there are multiple paragraphs, their
- *         level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or
- *         UBIDI_DEFAULT_RTL.  In that case, the level of the first paragraph
- *         is returned.
- *
- * @see UBiDiLevel
- * @see ubidi_getParagraph
- * @see ubidi_getParagraphByIndex
- * @stable ICU 2.0
- */
-U_STABLE UBiDiLevel U_EXPORT2
-ubidi_getParaLevel(const UBiDi *pBiDi);
-
-/**
- * Get the number of paragraphs.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @return The number of paragraphs.
- * @stable ICU 3.4
- */
-U_STABLE int32_t U_EXPORT2
-ubidi_countParagraphs(UBiDi *pBiDi);
-
-/**
- * Get a paragraph, given a position within the text.
- * This function returns information about a paragraph.<br>
- * Note: if the paragraph index is known, it is more efficient to
- * retrieve the paragraph information using ubidi_getParagraphByIndex().<p>
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @param charIndex is the index of a character within the text, in the
- *        range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>.
- *
- * @param pParaStart will receive the index of the first character of the
- *        paragraph in the text.
- *        This pointer can be <code>NULL</code> if this
- *        value is not necessary.
- *
- * @param pParaLimit will receive the limit of the paragraph.
- *        The l-value that you point to here may be the
- *        same expression (variable) as the one for
- *        <code>charIndex</code>.
- *        This pointer can be <code>NULL</code> if this
- *        value is not necessary.
- *
- * @param pParaLevel will receive the level of the paragraph.
- *        This pointer can be <code>NULL</code> if this
- *        value is not necessary.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @return The index of the paragraph containing the specified position.
- *
- * @see ubidi_getProcessedLength
- * @stable ICU 3.4
- */
-U_STABLE int32_t U_EXPORT2
-ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
-                   int32_t *pParaLimit, UBiDiLevel *pParaLevel,
-                   UErrorCode *pErrorCode);
-
-/**
- * Get a paragraph, given the index of this paragraph.
- *
- * This function returns information about a paragraph.<p>
- *
- * @param pBiDi is the paragraph <code>UBiDi</code> object.
- *
- * @param paraIndex is the number of the paragraph, in the
- *        range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>.
- *
- * @param pParaStart will receive the index of the first character of the
- *        paragraph in the text.
- *        This pointer can be <code>NULL</code> if this
- *        value is not necessary.
- *
- * @param pParaLimit will receive the limit of the paragraph.
- *        This pointer can be <code>NULL</code> if this
- *        value is not necessary.
- *
- * @param pParaLevel will receive the level of the paragraph.
- *        This pointer can be <code>NULL</code> if this
- *        value is not necessary.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @stable ICU 3.4
- */
-U_STABLE void U_EXPORT2
-ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
-                          int32_t *pParaStart, int32_t *pParaLimit,
-                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode);
-
-/**
- * Get the level for one character.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @param charIndex the index of a character. It must be in the range
- *         [0..ubidi_getProcessedLength(pBiDi)].
- *
- * @return The level for the character at charIndex (0 if charIndex is not
- *         in the valid range).
- *
- * @see UBiDiLevel
- * @see ubidi_getProcessedLength
- * @stable ICU 2.0
- */
-U_STABLE UBiDiLevel U_EXPORT2
-ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex);
-
-/**
- * Get an array of levels for each character.<p>
- *
- * Note that this function may allocate memory under some
- * circumstances, unlike <code>ubidi_getLevelAt()</code>.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose
- *        text length must be strictly positive.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @return The levels array for the text,
- *         or <code>NULL</code> if an error occurs.
- *
- * @see UBiDiLevel
- * @see ubidi_getProcessedLength
- * @stable ICU 2.0
- */
-U_STABLE const UBiDiLevel * U_EXPORT2
-ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
-
-/**
- * Get a logical run.
- * This function returns information about a run and is used
- * to retrieve runs in logical order.<p>
- * This is especially useful for line-breaking on a paragraph.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @param logicalPosition is a logical position within the source text.
- *
- * @param pLogicalLimit will receive the limit of the corresponding run.
- *        The l-value that you point to here may be the
- *        same expression (variable) as the one for
- *        <code>logicalPosition</code>.
- *        This pointer can be <code>NULL</code> if this
- *        value is not necessary.
- *
- * @param pLevel will receive the level of the corresponding run.
- *        This pointer can be <code>NULL</code> if this
- *        value is not necessary.
- *
- * @see ubidi_getProcessedLength
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
-                    int32_t *pLogicalLimit, UBiDiLevel *pLevel);
-
-/**
- * Get the number of runs.
- * This function may invoke the actual reordering on the
- * <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
- * may have resolved only the levels of the text. Therefore,
- * <code>ubidi_countRuns()</code> may have to allocate memory,
- * and may fail doing so.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @return The number of runs.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
-
-/**
- * Get one run's logical start, length, and directionality,
- * which can be 0 for LTR or 1 for RTL.
- * In an RTL run, the character at the logical start is
- * visually on the right of the displayed run.
- * The length is the number of characters in the run.<p>
- * <code>ubidi_countRuns()</code> should be called
- * before the runs are retrieved.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @param runIndex is the number of the run in visual order, in the
- *        range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
- *
- * @param pLogicalStart is the first logical character index in the text.
- *        The pointer may be <code>NULL</code> if this index is not needed.
- *
- * @param pLength is the number of characters (at least one) in the run.
- *        The pointer may be <code>NULL</code> if this is not needed.
- *
- * @return the directionality of the run,
- *         <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
- *         never <code>UBIDI_MIXED</code>.
- *
- * @see ubidi_countRuns
- *
- * Example:
- * <pre>
- * \code
- * int32_t i, count=ubidi_countRuns(pBiDi),
- *         logicalStart, visualIndex=0, length;
- * for(i=0; i<count; ++i) {
- *    if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
- *         do { // LTR
- *             show_char(text[logicalStart++], visualIndex++);
- *         } while(--length>0);
- *     } else {
- *         logicalStart+=length;  // logicalLimit
- *         do { // RTL
- *             show_char(text[--logicalStart], visualIndex++);
- *         } while(--length>0);
- *     }
- * }
- *\endcode
- * </pre>
- *
- * Note that in right-to-left runs, code like this places
- * second surrogates before first ones (which is generally a bad idea)
- * and combining characters before base characters.
- * <p>
- * Use of <code>ubidi_writeReordered()</code>, optionally with the
- * <code>#UBIDI_KEEP_BASE_COMBINING</code> option, can be considered in order
- * to avoid these issues.
- * @stable ICU 2.0
- */
-U_STABLE UBiDiDirection U_EXPORT2
-ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
-                   int32_t *pLogicalStart, int32_t *pLength);
-
-/**
- * Get the visual position from a logical text position.
- * If such a mapping is used many times on the same
- * <code>UBiDi</code> object, then calling
- * <code>ubidi_getLogicalMap()</code> is more efficient.<p>
- *
- * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
- * visual position because the corresponding text character is a Bidi control
- * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
- * <p>
- * When the visual output is altered by using options of
- * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
- * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
- * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not
- * be correct. It is advised to use, when possible, reordering options
- * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
- * <p>
- * Note that in right-to-left runs, this mapping places
- * second surrogates before first ones (which is generally a bad idea)
- * and combining characters before base characters.
- * Use of <code>ubidi_writeReordered()</code>, optionally with the
- * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
- * of using the mapping, in order to avoid these issues.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @param logicalIndex is the index of a character in the text.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @return The visual position of this character.
- *
- * @see ubidi_getLogicalMap
- * @see ubidi_getLogicalIndex
- * @see ubidi_getProcessedLength
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode);
-
-/**
- * Get the logical text position from a visual position.
- * If such a mapping is used many times on the same
- * <code>UBiDi</code> object, then calling
- * <code>ubidi_getVisualMap()</code> is more efficient.<p>
- *
- * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
- * logical position because the corresponding text character is a Bidi mark
- * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
- * <p>
- * This is the inverse function to <code>ubidi_getVisualIndex()</code>.
- * <p>
- * When the visual output is altered by using options of
- * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
- * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
- * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not
- * be correct. It is advised to use, when possible, reordering options
- * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @param visualIndex is the visual position of a character.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @return The index of this character in the text.
- *
- * @see ubidi_getVisualMap
- * @see ubidi_getVisualIndex
- * @see ubidi_getResultLength
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode);
-
-/**
- * Get a logical-to-visual index map (array) for the characters in the UBiDi
- * (paragraph or line) object.
- * <p>
- * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
- * corresponding text characters are Bidi controls removed from the visual
- * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
- * <p>
- * When the visual output is altered by using options of
- * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
- * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
- * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not
- * be correct. It is advised to use, when possible, reordering options
- * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
- * <p>
- * Note that in right-to-left runs, this mapping places
- * second surrogates before first ones (which is generally a bad idea)
- * and combining characters before base characters.
- * Use of <code>ubidi_writeReordered()</code>, optionally with the
- * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
- * of using the mapping, in order to avoid these issues.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code>
- *        indexes which will reflect the reordering of the characters.
- *        If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number
- *        of elements allocated in <code>indexMap</code> must be no less than
- *        <code>ubidi_getResultLength()</code>.
- *        The array does not need to be initialized.<br><br>
- *        The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @see ubidi_getVisualMap
- * @see ubidi_getVisualIndex
- * @see ubidi_getProcessedLength
- * @see ubidi_getResultLength
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
-
-/**
- * Get a visual-to-logical index map (array) for the characters in the UBiDi
- * (paragraph or line) object.
- * <p>
- * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
- * corresponding text characters are Bidi marks inserted in the visual output
- * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
- * <p>
- * When the visual output is altered by using options of
- * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
- * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
- * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
- * be correct. It is advised to use, when possible, reordering options
- * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
- *
- * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
- *
- * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code>
- *        indexes which will reflect the reordering of the characters.
- *        If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number
- *        of elements allocated in <code>indexMap</code> must be no less than
- *        <code>ubidi_getProcessedLength()</code>.
- *        The array does not need to be initialized.<br><br>
- *        The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @see ubidi_getLogicalMap
- * @see ubidi_getLogicalIndex
- * @see ubidi_getProcessedLength
- * @see ubidi_getResultLength
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
-
-/**
- * This is a convenience function that does not use a UBiDi object.
- * It is intended to be used for when an application has determined the levels
- * of objects (character sequences) and just needs to have them reordered (L2).
- * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a
- * <code>UBiDi</code> object.
- *
- * @param levels is an array with <code>length</code> levels that have been determined by
- *        the application.
- *
- * @param length is the number of levels in the array, or, semantically,
- *        the number of objects to be reordered.
- *        It must be <code>length>0</code>.
- *
- * @param indexMap is a pointer to an array of <code>length</code>
- *        indexes which will reflect the reordering of the characters.
- *        The array does not need to be initialized.<p>
- *        The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
-
-/**
- * This is a convenience function that does not use a UBiDi object.
- * It is intended to be used for when an application has determined the levels
- * of objects (character sequences) and just needs to have them reordered (L2).
- * This is equivalent to using <code>ubidi_getVisualMap()</code> on a
- * <code>UBiDi</code> object.
- *
- * @param levels is an array with <code>length</code> levels that have been determined by
- *        the application.
- *
- * @param length is the number of levels in the array, or, semantically,
- *        the number of objects to be reordered.
- *        It must be <code>length>0</code>.
- *
- * @param indexMap is a pointer to an array of <code>length</code>
- *        indexes which will reflect the reordering of the characters.
- *        The array does not need to be initialized.<p>
- *        The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
-
-/**
- * Invert an index map.
- * The index mapping of the first map is inverted and written to
- * the second one.
- *
- * @param srcMap is an array with <code>length</code> elements
- *        which defines the original mapping from a source array containing
- *        <code>length</code> elements to a destination array.
- *        Some elements of the source array may have no mapping in the
- *        destination array. In that case, their value will be
- *        the special value <code>UBIDI_MAP_NOWHERE</code>.
- *        All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>.
- *        Some elements may have a value >= <code>length</code>, if the
- *        destination array has more elements than the source array.
- *        There must be no duplicate indexes (two or more elements with the
- *        same value except <code>UBIDI_MAP_NOWHERE</code>).
- *
- * @param destMap is an array with a number of elements equal to 1 + the highest
- *        value in <code>srcMap</code>.
- *        <code>destMap</code> will be filled with the inverse mapping.
- *        If element with index i in <code>srcMap</code> has a value k different
- *        from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of
- *        the source array maps to element k in the destination array.
- *        The inverse map will have value i in its k-th element.
- *        For all elements of the destination array which do not map to
- *        an element in the source array, the corresponding element in the
- *        inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>.
- *
- * @param length is the length of each array.
- * @see UBIDI_MAP_NOWHERE
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);
-
-/** option flags for ubidi_writeReordered() */
-
-/**
- * option bit for ubidi_writeReordered():
- * keep combining characters after their base characters in RTL runs
- *
- * @see ubidi_writeReordered
- * @stable ICU 2.0
- */
-#define UBIDI_KEEP_BASE_COMBINING       1
-
-/**
- * option bit for ubidi_writeReordered():
- * replace characters with the "mirrored" property in RTL runs
- * by their mirror-image mappings
- *
- * @see ubidi_writeReordered
- * @stable ICU 2.0
- */
-#define UBIDI_DO_MIRRORING              2
-
-/**
- * option bit for ubidi_writeReordered():
- * surround the run with LRMs if necessary;
- * this is part of the approximate "inverse Bidi" algorithm
- *
- * <p>This option does not imply corresponding adjustment of the index
- * mappings.</p>
- *
- * @see ubidi_setInverse
- * @see ubidi_writeReordered
- * @stable ICU 2.0
- */
-#define UBIDI_INSERT_LRM_FOR_NUMERIC    4
-
-/**
- * option bit for ubidi_writeReordered():
- * remove Bidi control characters
- * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC)
- *
- * <p>This option does not imply corresponding adjustment of the index
- * mappings.</p>
- *
- * @see ubidi_writeReordered
- * @stable ICU 2.0
- */
-#define UBIDI_REMOVE_BIDI_CONTROLS      8
-
-/**
- * option bit for ubidi_writeReordered():
- * write the output in reverse order
- *
- * <p>This has the same effect as calling <code>ubidi_writeReordered()</code>
- * first without this option, and then calling
- * <code>ubidi_writeReverse()</code> without mirroring.
- * Doing this in the same step is faster and avoids a temporary buffer.
- * An example for using this option is output to a character terminal that
- * is designed for RTL scripts and stores text in reverse order.</p>
- *
- * @see ubidi_writeReordered
- * @stable ICU 2.0
- */
-#define UBIDI_OUTPUT_REVERSE            16
-
-/**
- * Get the length of the source text processed by the last call to
- * <code>ubidi_setPara()</code>. This length may be different from the length
- * of the source text if option <code>#UBIDI_OPTION_STREAMING</code>
- * has been set.
- * <br>
- * Note that whenever the length of the text affects the execution or the
- * result of a function, it is the processed length which must be considered,
- * except for <code>ubidi_setPara</code> (which receives unprocessed source
- * text) and <code>ubidi_getLength</code> (which returns the original length
- * of the source text).<br>
- * In particular, the processed length is the one to consider in the following
- * cases:
- * <ul>
- * <li>maximum value of the <code>limit</code> argument of
- * <code>ubidi_setLine</code></li>
- * <li>maximum value of the <code>charIndex</code> argument of
- * <code>ubidi_getParagraph</code></li>
- * <li>maximum value of the <code>charIndex</code> argument of
- * <code>ubidi_getLevelAt</code></li>
- * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li>
- * <li>maximum value of the <code>logicalStart</code> argument of
- * <code>ubidi_getLogicalRun</code></li>
- * <li>maximum value of the <code>logicalIndex</code> argument of
- * <code>ubidi_getVisualIndex</code></li>
- * <li>number of elements filled in the <code>*indexMap</code> argument of
- * <code>ubidi_getLogicalMap</code></li>
- * <li>length of text processed by <code>ubidi_writeReordered</code></li>
- * </ul>
- *
- * @param pBiDi is the paragraph <code>UBiDi</code> object.
- *
- * @return The length of the part of the source text processed by
- *         the last call to <code>ubidi_setPara</code>.
- * @see ubidi_setPara
- * @see UBIDI_OPTION_STREAMING
- * @stable ICU 3.6
- */
-U_STABLE int32_t U_EXPORT2
-ubidi_getProcessedLength(const UBiDi *pBiDi);
-
-/**
- * Get the length of the reordered text resulting from the last call to
- * <code>ubidi_setPara()</code>. This length may be different from the length
- * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code>
- * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set.
- * <br>
- * This resulting length is the one to consider in the following cases:
- * <ul>
- * <li>maximum value of the <code>visualIndex</code> argument of
- * <code>ubidi_getLogicalIndex</code></li>
- * <li>number of elements of the <code>*indexMap</code> argument of
- * <code>ubidi_getVisualMap</code></li>
- * </ul>
- * Note that this length stays identical to the source text length if
- * Bidi marks are inserted or removed using option bits of
- * <code>ubidi_writeReordered</code>, or if option
- * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set.
- *
- * @param pBiDi is the paragraph <code>UBiDi</code> object.
- *
- * @return The length of the reordered text resulting from
- *         the last call to <code>ubidi_setPara</code>.
- * @see ubidi_setPara
- * @see UBIDI_OPTION_INSERT_MARKS
- * @see UBIDI_OPTION_REMOVE_CONTROLS
- * @stable ICU 3.6
- */
-U_STABLE int32_t U_EXPORT2
-ubidi_getResultLength(const UBiDi *pBiDi);
-
-U_CDECL_BEGIN
-/**
- * value returned by <code>UBiDiClassCallback</code> callbacks when
- * there is no need to override the standard Bidi class for a given code point.
- * @see UBiDiClassCallback
- * @stable ICU 3.6
- */
-#define U_BIDI_CLASS_DEFAULT  U_CHAR_DIRECTION_COUNT
-
-/**
- * Callback type declaration for overriding default Bidi class values with
- * custom ones.
- * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code>
- * object by calling the <code>ubidi_setClassCallback()</code> function;
- * then the callback will be invoked by the UBA implementation any time the
- * class of a character is to be determined.</p>
- *
- * @param context is a pointer to the callback private data.
- *
- * @param c       is the code point to get a Bidi class for.
- *
- * @return The directional property / Bidi class for the given code point
- *         <code>c</code> if the default class has been overridden, or
- *         <code>#U_BIDI_CLASS_DEFAULT</code> if the standard Bidi class value
- *         for <code>c</code> is to be used.
- * @see ubidi_setClassCallback
- * @see ubidi_getClassCallback
- * @stable ICU 3.6
- */
-typedef UCharDirection U_CALLCONV
-UBiDiClassCallback(const void *context, UChar32 c);
-
-U_CDECL_END
-
-/**
- * Retrieve the Bidi class for a given code point.
- * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a
- * value other than <code>#U_BIDI_CLASS_DEFAULT</code>, that value is used;
- * otherwise the default class determination mechanism is invoked.</p>
- *
- * @param pBiDi is the paragraph <code>UBiDi</code> object.
- *
- * @param c     is the code point whose Bidi class must be retrieved.
- *
- * @return The Bidi class for character <code>c</code> based
- *         on the given <code>pBiDi</code> instance.
- * @see UBiDiClassCallback
- * @stable ICU 3.6
- */
-U_STABLE UCharDirection U_EXPORT2
-ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c);
-
-/**
- * Set the callback function and callback data used by the UBA
- * implementation for Bidi class determination.
- * <p>This may be useful for assigning Bidi classes to PUA characters, or
- * for special application needs. For instance, an application may want to
- * handle all spaces like L or R characters (according to the base direction)
- * when creating the visual ordering of logical lines which are part of a report
- * organized in columns: there should not be interaction between adjacent
- * cells.<p>
- *
- * @param pBiDi is the paragraph <code>UBiDi</code> object.
- *
- * @param newFn is the new callback function pointer.
- *
- * @param newContext is the new callback context pointer. This can be NULL.
- *
- * @param oldFn fillin: Returns the old callback function pointer. This can be
- *                      NULL.
- *
- * @param oldContext fillin: Returns the old callback's context. This can be
- *                           NULL.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @see ubidi_getClassCallback
- * @stable ICU 3.6
- */
-U_STABLE void U_EXPORT2
-ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
-                       const void *newContext, UBiDiClassCallback **oldFn,
-                       const void **oldContext, UErrorCode *pErrorCode);
-
-/**
- * Get the current callback function used for Bidi class determination.
- *
- * @param pBiDi is the paragraph <code>UBiDi</code> object.
- *
- * @param fn fillin: Returns the callback function pointer.
- *
- * @param context fillin: Returns the callback's private context.
- *
- * @see ubidi_setClassCallback
- * @stable ICU 3.6
- */
-U_STABLE void U_EXPORT2
-ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context);
-
-/**
- * Take a <code>UBiDi</code> object containing the reordering
- * information for a piece of text (one or more paragraphs) set by
- * <code>ubidi_setPara()</code> or for a line of text set by
- * <code>ubidi_setLine()</code> and write a reordered string to the
- * destination buffer.
- *
- * This function preserves the integrity of characters with multiple
- * code units and (optionally) combining characters.
- * Characters in RTL runs can be replaced by mirror-image characters
- * in the destination buffer. Note that "real" mirroring has
- * to be done in a rendering engine by glyph selection
- * and that for many "mirrored" characters there are no
- * Unicode characters as mirror-image equivalents.
- * There are also options to insert or remove Bidi control
- * characters; see the description of the <code>destSize</code>
- * and <code>options</code> parameters and of the option bit flags.
- *
- * @param pBiDi A pointer to a <code>UBiDi</code> object that
- *              is set by <code>ubidi_setPara()</code> or
- *              <code>ubidi_setLine()</code> and contains the reordering
- *              information for the text that it was defined for,
- *              as well as a pointer to that text.<br><br>
- *              The text was aliased (only the pointer was stored
- *              without copying the contents) and must not have been modified
- *              since the <code>ubidi_setPara()</code> call.
- *
- * @param dest A pointer to where the reordered text is to be copied.
- *             The source text and <code>dest[destSize]</code>
- *             must not overlap.
- *
- * @param destSize The size of the <code>dest</code> buffer,
- *                 in number of UChars.
- *                 If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>
- *                 option is set, then the destination length could be
- *                 as large as
- *                 <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>.
- *                 If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
- *                 is set, then the destination length may be less than
- *                 <code>ubidi_getLength(pBiDi)</code>.
- *                 If none of these options is set, then the destination length
- *                 will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>.
- *
- * @param options A bit set of options for the reordering that control
- *                how the reordered text is written.
- *                The options include mirroring the characters on a code
- *                point basis and inserting LRM characters, which is used
- *                especially for transforming visually stored text
- *                to logically stored text (although this is still an
- *                imperfect implementation of an "inverse Bidi" algorithm
- *                because it uses the "forward Bidi" algorithm at its core).
- *                The available options are:
- *                <code>#UBIDI_DO_MIRRORING</code>,
- *                <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
- *                <code>#UBIDI_KEEP_BASE_COMBINING</code>,
- *                <code>#UBIDI_OUTPUT_REVERSE</code>,
- *                <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @return The length of the output string.
- *
- * @see ubidi_getProcessedLength
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubidi_writeReordered(UBiDi *pBiDi,
-                     UChar *dest, int32_t destSize,
-                     uint16_t options,
-                     UErrorCode *pErrorCode);
-
-/**
- * Reverse a Right-To-Left run of Unicode text.
- *
- * This function preserves the integrity of characters with multiple
- * code units and (optionally) combining characters.
- * Characters can be replaced by mirror-image characters
- * in the destination buffer. Note that "real" mirroring has
- * to be done in a rendering engine by glyph selection
- * and that for many "mirrored" characters there are no
- * Unicode characters as mirror-image equivalents.
- * There are also options to insert or remove Bidi control
- * characters.
- *
- * This function is the implementation for reversing RTL runs as part
- * of <code>ubidi_writeReordered()</code>. For detailed descriptions
- * of the parameters, see there.
- * Since no Bidi controls are inserted here, the output string length
- * will never exceed <code>srcLength</code>.
- *
- * @see ubidi_writeReordered
- *
- * @param src A pointer to the RTL run text.
- *
- * @param srcLength The length of the RTL run.
- *
- * @param dest A pointer to where the reordered text is to be copied.
- *             <code>src[srcLength]</code> and <code>dest[destSize]</code>
- *             must not overlap.
- *
- * @param destSize The size of the <code>dest</code> buffer,
- *                 in number of UChars.
- *                 If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
- *                 is set, then the destination length may be less than
- *                 <code>srcLength</code>.
- *                 If this option is not set, then the destination length
- *                 will be exactly <code>srcLength</code>.
- *
- * @param options A bit set of options for the reordering that control
- *                how the reordered text is written.
- *                See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>.
- *
- * @param pErrorCode must be a valid pointer to an error code value.
- *
- * @return The length of the output string.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubidi_writeReverse(const UChar *src, int32_t srcLength,
-                   UChar *dest, int32_t destSize,
-                   uint16_t options,
-                   UErrorCode *pErrorCode);
-
-/*#define BIDI_SAMPLE_CODE*/
-/*@}*/
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/ubidi.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ubidi.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ubidi.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ubidi.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,2013 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1999-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  ubidi.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999jul27
+*   created by: Markus W. Scherer, updated by Matitiahu Allouche
+*/
+
+#ifndef UBIDI_H
+#define UBIDI_H
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+
+/**
+ *\file
+ * \brief C API: Bidi algorithm
+ *
+ * <h2>Bidi algorithm for ICU</h2>
+ *
+ * This is an implementation of the Unicode Bidirectional algorithm.
+ * The algorithm is defined in the
+ * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
+ * version 13, also described in The Unicode Standard, Version 4.0 .<p>
+ *
+ * Note: Libraries that perform a bidirectional algorithm and
+ * reorder strings accordingly are sometimes called "Storage Layout Engines".
+ * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such
+ * "Storage Layout Engines".
+ *
+ * <h3>General remarks about the API:</h3>
+ *
+ * In functions with an error code parameter,
+ * the <code>pErrorCode</code> pointer must be valid
+ * and the value that it points to must not indicate a failure before
+ * the function call. Otherwise, the function returns immediately.
+ * After the function call, the value indicates success or failure.<p>
+ *
+ * The &quot;limit&quot; of a sequence of characters is the position just after their
+ * last character, i.e., one more than that position.<p>
+ *
+ * Some of the API functions provide access to &quot;runs&quot;.
+ * Such a &quot;run&quot; is defined as a sequence of characters
+ * that are at the same embedding level
+ * after performing the Bidi algorithm.<p>
+ *
+ * @author Markus W. Scherer
+ * @version 1.0
+ *
+ *
+ * <h4> Sample code for the ICU Bidi API </h4>
+ *
+ * <h5>Rendering a paragraph with the ICU Bidi API</h5>
+ *
+ * This is (hypothetical) sample code that illustrates
+ * how the ICU Bidi API could be used to render a paragraph of text.
+ * Rendering code depends highly on the graphics system,
+ * therefore this sample code must make a lot of assumptions,
+ * which may or may not match any existing graphics system's properties.
+ *
+ * <p>The basic assumptions are:</p>
+ * <ul>
+ * <li>Rendering is done from left to right on a horizontal line.</li>
+ * <li>A run of single-style, unidirectional text can be rendered at once.</li>
+ * <li>Such a run of text is passed to the graphics system with
+ *     characters (code units) in logical order.</li>
+ * <li>The line-breaking algorithm is very complicated
+ *     and Locale-dependent -
+ *     and therefore its implementation omitted from this sample code.</li>
+ * </ul>
+ *
+ * <pre>
+ * \code
+ *#include "unicode/ubidi.h"
+ *
+ *typedef enum {
+ *     styleNormal=0, styleSelected=1,
+ *     styleBold=2, styleItalics=4,
+ *     styleSuper=8, styleSub=16
+ *} Style;
+ *
+ *typedef struct { int32_t limit; Style style; } StyleRun;
+ *
+ *int getTextWidth(const UChar *text, int32_t start, int32_t limit,
+ *                  const StyleRun *styleRuns, int styleRunCount);
+ *
+ * // set *pLimit and *pStyleRunLimit for a line
+ * // from text[start] and from styleRuns[styleRunStart]
+ * // using ubidi_getLogicalRun(para, ...)
+ *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit,
+ *                  UBiDi *para,
+ *                  const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
+ *                  int *pLineWidth);
+ *
+ * // render runs on a line sequentially, always from left to right
+ *
+ * // prepare rendering a new line
+ * void startLine(UBiDiDirection textDirection, int lineWidth);
+ *
+ * // render a run of text and advance to the right by the run width
+ * // the text[start..limit-1] is always in logical order
+ * void renderRun(const UChar *text, int32_t start, int32_t limit,
+ *               UBiDiDirection textDirection, Style style);
+ *
+ * // We could compute a cross-product
+ * // from the style runs with the directional runs
+ * // and then reorder it.
+ * // Instead, here we iterate over each run type
+ * // and render the intersections -
+ * // with shortcuts in simple (and common) cases.
+ * // renderParagraph() is the main function.
+ *
+ * // render a directional run with
+ * // (possibly) multiple style runs intersecting with it
+ * void renderDirectionalRun(const UChar *text,
+ *                           int32_t start, int32_t limit,
+ *                           UBiDiDirection direction,
+ *                           const StyleRun *styleRuns, int styleRunCount) {
+ *     int i;
+ *
+ *     // iterate over style runs
+ *     if(direction==UBIDI_LTR) {
+ *         int styleLimit;
+ *
+ *         for(i=0; i<styleRunCount; ++i) {
+ *             styleLimit=styleRun[i].limit;
+ *             if(start<styleLimit) {
+ *                 if(styleLimit>limit) { styleLimit=limit; }
+ *                 renderRun(text, start, styleLimit,
+ *                           direction, styleRun[i].style);
+ *                 if(styleLimit==limit) { break; }
+ *                 start=styleLimit;
+ *             }
+ *         }
+ *     } else {
+ *         int styleStart;
+ *
+ *         for(i=styleRunCount-1; i>=0; --i) {
+ *             if(i>0) {
+ *                 styleStart=styleRun[i-1].limit;
+ *             } else {
+ *                 styleStart=0;
+ *             }
+ *             if(limit>=styleStart) {
+ *                 if(styleStart<start) { styleStart=start; }
+ *                 renderRun(text, styleStart, limit,
+ *                           direction, styleRun[i].style);
+ *                 if(styleStart==start) { break; }
+ *                 limit=styleStart;
+ *             }
+ *         }
+ *     }
+ * }
+ *
+ * // the line object represents text[start..limit-1]
+ * void renderLine(UBiDi *line, const UChar *text,
+ *                 int32_t start, int32_t limit,
+ *                 const StyleRun *styleRuns, int styleRunCount) {
+ *     UBiDiDirection direction=ubidi_getDirection(line);
+ *     if(direction!=UBIDI_MIXED) {
+ *         // unidirectional
+ *         if(styleRunCount<=1) {
+ *             renderRun(text, start, limit, direction, styleRuns[0].style);
+ *         } else {
+ *             renderDirectionalRun(text, start, limit,
+ *                                  direction, styleRuns, styleRunCount);
+ *         }
+ *     } else {
+ *         // mixed-directional
+ *         int32_t count, i, length;
+ *         UBiDiLevel level;
+ *
+ *         count=ubidi_countRuns(para, pErrorCode);
+ *         if(U_SUCCESS(*pErrorCode)) {
+ *             if(styleRunCount<=1) {
+ *                 Style style=styleRuns[0].style;
+ *
+ *                 // iterate over directional runs
+ *                for(i=0; i<count; ++i) {
+ *                    direction=ubidi_getVisualRun(para, i, &start, &length);
+ *                     renderRun(text, start, start+length, direction, style);
+ *                }
+ *             } else {
+ *                 int32_t j;
+ *
+ *                 // iterate over both directional and style runs
+ *                 for(i=0; i<count; ++i) {
+ *                     direction=ubidi_getVisualRun(line, i, &start, &length);
+ *                     renderDirectionalRun(text, start, start+length,
+ *                                          direction, styleRuns, styleRunCount);
+ *                 }
+ *             }
+ *         }
+ *     }
+ * }
+ *
+ *void renderParagraph(const UChar *text, int32_t length,
+ *                     UBiDiDirection textDirection,
+ *                      const StyleRun *styleRuns, int styleRunCount,
+ *                      int lineWidth,
+ *                      UErrorCode *pErrorCode) {
+ *     UBiDi *para;
+ *
+ *     if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) {
+ *         return;
+ *     }
+ *
+ *     para=ubidi_openSized(length, 0, pErrorCode);
+ *     if(para==NULL) { return; }
+ *
+ *     ubidi_setPara(para, text, length,
+ *                   textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
+ *                   NULL, pErrorCode);
+ *     if(U_SUCCESS(*pErrorCode)) {
+ *         UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
+ *         StyleRun styleRun={ length, styleNormal };
+ *         int width;
+ *
+ *         if(styleRuns==NULL || styleRunCount<=0) {
+ *            styleRunCount=1;
+ *             styleRuns=&styleRun;
+ *         }
+ *
+ *        // assume styleRuns[styleRunCount-1].limit>=length
+ *
+ *         width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
+ *         if(width<=lineWidth) {
+ *             // everything fits onto one line
+ *
+ *            // prepare rendering a new line from either left or right
+ *             startLine(paraLevel, width);
+ *
+ *             renderLine(para, text, 0, length,
+ *                        styleRuns, styleRunCount);
+ *         } else {
+ *             UBiDi *line;
+ *
+ *             // we need to render several lines
+ *             line=ubidi_openSized(length, 0, pErrorCode);
+ *             if(line!=NULL) {
+ *                 int32_t start=0, limit;
+ *                 int styleRunStart=0, styleRunLimit;
+ *
+ *                 for(;;) {
+ *                     limit=length;
+ *                     styleRunLimit=styleRunCount;
+ *                     getLineBreak(text, start, &limit, para,
+ *                                  styleRuns, styleRunStart, &styleRunLimit,
+ *                                 &width);
+ *                     ubidi_setLine(para, start, limit, line, pErrorCode);
+ *                     if(U_SUCCESS(*pErrorCode)) {
+ *                         // prepare rendering a new line
+ *                         // from either left or right
+ *                         startLine(paraLevel, width);
+ *
+ *                         renderLine(line, text, start, limit,
+ *                                    styleRuns+styleRunStart,
+ *                                    styleRunLimit-styleRunStart);
+ *                     }
+ *                     if(limit==length) { break; }
+ *                     start=limit;
+ *                     styleRunStart=styleRunLimit-1;
+ *                     if(start>=styleRuns[styleRunStart].limit) {
+ *                         ++styleRunStart;
+ *                     }
+ *                 }
+ *
+ *                 ubidi_close(line);
+ *             }
+ *        }
+ *    }
+ *
+ *     ubidi_close(para);
+ *}
+ *\endcode
+ * </pre>
+ */
+
+/*DOCXX_TAG*/
+/*@{*/
+
+/**
+ * UBiDiLevel is the type of the level values in this
+ * Bidi implementation.
+ * It holds an embedding level and indicates the visual direction
+ * by its bit&nbsp;0 (even/odd value).<p>
+ *
+ * It can also hold non-level values for the
+ * <code>paraLevel</code> and <code>embeddingLevels</code>
+ * arguments of <code>ubidi_setPara()</code>; there:
+ * <ul>
+ * <li>bit&nbsp;7 of an <code>embeddingLevels[]</code>
+ * value indicates whether the using application is
+ * specifying the level of a character to <i>override</i> whatever the
+ * Bidi implementation would resolve it to.</li>
+ * <li><code>paraLevel</code> can be set to the
+ * pseudo-level values <code>UBIDI_DEFAULT_LTR</code>
+ * and <code>UBIDI_DEFAULT_RTL</code>.</li>
+ * </ul>
+ *
+ * @see ubidi_setPara
+ *
+ * <p>The related constants are not real, valid level values.
+ * <code>UBIDI_DEFAULT_XXX</code> can be used to specify
+ * a default for the paragraph level for
+ * when the <code>ubidi_setPara()</code> function
+ * shall determine it but there is no
+ * strongly typed character in the input.<p>
+ *
+ * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
+ * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
+ * just like with normal LTR and RTL level values -
+ * these special values are designed that way. Also, the implementation
+ * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
+ *
+ * @see UBIDI_DEFAULT_LTR
+ * @see UBIDI_DEFAULT_RTL
+ * @see UBIDI_LEVEL_OVERRIDE
+ * @see UBIDI_MAX_EXPLICIT_LEVEL
+ * @stable ICU 2.0
+ */
+typedef uint8_t UBiDiLevel;
+
+/** Paragraph level setting.<p>
+ *
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present,
+ * then set the paragraph level to 0 (left-to-right).<p>
+ *
+ * If this value is used in conjunction with reordering modes
+ * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
+ * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
+ * is assumed to be visual LTR, and the text after reordering is required
+ * to be the corresponding logical string with appropriate contextual
+ * direction. The direction of the result string will be RTL if either
+ * the righmost or leftmost strong character of the source text is RTL
+ * or Arabic Letter, the direction will be LTR otherwise.<p>
+ *
+ * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
+ * be added at the beginning of the result string to ensure round trip
+ * (that the result string, when reordered back to visual, will produce
+ * the original source text).
+ * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
+ * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 2.0
+ */
+#define UBIDI_DEFAULT_LTR 0xfe
+
+/** Paragraph level setting.<p>
+ *
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present,
+ * then set the paragraph level to 1 (right-to-left).<p>
+ *
+ * If this value is used in conjunction with reordering modes
+ * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
+ * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
+ * is assumed to be visual LTR, and the text after reordering is required
+ * to be the corresponding logical string with appropriate contextual
+ * direction. The direction of the result string will be RTL if either
+ * the righmost or leftmost strong character of the source text is RTL
+ * or Arabic Letter, or if the text contains no strong character;
+ * the direction will be LTR otherwise.<p>
+ *
+ * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
+ * be added at the beginning of the result string to ensure round trip
+ * (that the result string, when reordered back to visual, will produce
+ * the original source text).
+ * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
+ * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 2.0
+ */
+#define UBIDI_DEFAULT_RTL 0xff
+
+/**
+ * Maximum explicit embedding level.
+ * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
+ * @stable ICU 2.0
+ */
+#define UBIDI_MAX_EXPLICIT_LEVEL 61
+
+/** Bit flag for level input.
+ *  Overrides directional properties.
+ * @stable ICU 2.0
+ */
+#define UBIDI_LEVEL_OVERRIDE 0x80
+
+/**
+ * Special value which can be returned by the mapping functions when a logical
+ * index has no corresponding visual index or vice-versa. This may happen
+ * for the logical-to-visual mapping of a Bidi control when option
+ * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen
+ * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted
+ * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getVisualMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getLogicalMap
+ * @stable ICU 3.6
+ */
+#define UBIDI_MAP_NOWHERE   (-1)
+
+/**
+ * <code>UBiDiDirection</code> values indicate the text direction.
+ * @stable ICU 2.0
+ */
+enum UBiDiDirection {
+    /** All left-to-right text. This is a 0 value. @stable ICU 2.0 */
+    UBIDI_LTR,
+    /** All right-to-left text. This is a 1 value. @stable ICU 2.0 */
+    UBIDI_RTL,
+    /** Mixed-directional text. @stable ICU 2.0 */
+    UBIDI_MIXED
+};
+
+/** @stable ICU 2.0 */
+typedef enum UBiDiDirection UBiDiDirection;
+
+/**
+ * Forward declaration of the <code>UBiDi</code> structure for the declaration of
+ * the API functions. Its fields are implementation-specific.<p>
+ * This structure holds information about a paragraph (or multiple paragraphs)
+ * of text with Bidi-algorithm-related details, or about one line of
+ * such a paragraph.<p>
+ * Reordering can be done on a line, or on one or more paragraphs which are
+ * then interpreted each as one single line.
+ * @stable ICU 2.0
+ */
+struct UBiDi;
+
+/** @stable ICU 2.0 */
+typedef struct UBiDi UBiDi;
+
+/**
+ * Allocate a <code>UBiDi</code> structure.
+ * Such an object is initially empty. It is assigned
+ * the Bidi properties of a piece of text containing one or more paragraphs
+ * by <code>ubidi_setPara()</code>
+ * or the Bidi properties of a line within a paragraph by
+ * <code>ubidi_setLine()</code>.<p>
+ * This object can be reused for as long as it is not deallocated
+ * by calling <code>ubidi_close()</code>.<p>
+ * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate
+ * additional memory for internal structures as necessary.
+ *
+ * @return An empty <code>UBiDi</code> object.
+ * @stable ICU 2.0
+ */
+U_STABLE UBiDi * U_EXPORT2
+ubidi_open(void);
+
+/**
+ * Allocate a <code>UBiDi</code> structure with preallocated memory
+ * for internal structures.
+ * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
+ * with no arguments, but it also preallocates memory for internal structures
+ * according to the sizings supplied by the caller.<p>
+ * Subsequent functions will not allocate any more memory, and are thus
+ * guaranteed not to fail because of lack of memory.<p>
+ * The preallocation can be limited to some of the internal memory
+ * by setting some values to 0 here. That means that if, e.g.,
+ * <code>maxRunCount</code> cannot be reasonably predetermined and should not
+ * be set to <code>maxLength</code> (the only failproof value) to avoid
+ * wasting memory, then <code>maxRunCount</code> could be set to 0 here
+ * and the internal structures that are associated with it will be allocated
+ * on demand, just like with <code>ubidi_open()</code>.
+ *
+ * @param maxLength is the maximum text or line length that internal memory
+ *        will be preallocated for. An attempt to associate this object with a
+ *        longer text will fail, unless this value is 0, which leaves the allocation
+ *        up to the implementation.
+ *
+ * @param maxRunCount is the maximum anticipated number of same-level runs
+ *        that internal memory will be preallocated for. An attempt to access
+ *        visual runs on an object that was not preallocated for as many runs
+ *        as the text was actually resolved to will fail,
+ *        unless this value is 0, which leaves the allocation up to the implementation.<br><br>
+ *        The number of runs depends on the actual text and maybe anywhere between
+ *        1 and <code>maxLength</code>. It is typically small.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return An empty <code>UBiDi</code> object with preallocated memory.
+ * @stable ICU 2.0
+ */
+U_STABLE UBiDi * U_EXPORT2
+ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode);
+
+/**
+ * <code>ubidi_close()</code> must be called to free the memory
+ * associated with a UBiDi object.<p>
+ *
+ * <strong>Important: </strong>
+ * A parent <code>UBiDi</code> object must not be destroyed or reused if
+ * it still has children.
+ * If a <code>UBiDi</code> object has become the <i>child</i>
+ * of another one (its <i>parent</i>) by calling
+ * <code>ubidi_setLine()</code>, then the child object must
+ * be destroyed (closed) or reused (by calling
+ * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
+ * before the parent object.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_setLine
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_close(UBiDi *pBiDi);
+
+/**
+ * Modify the operation of the Bidi algorithm such that it
+ * approximates an "inverse Bidi" algorithm. This function
+ * must be called before <code>ubidi_setPara()</code>.
+ *
+ * <p>The normal operation of the Bidi algorithm as described
+ * in the Unicode Technical Report is to take text stored in logical
+ * (keyboard, typing) order and to determine the reordering of it for visual
+ * rendering.
+ * Some legacy systems store text in visual order, and for operations
+ * with standard, Unicode-based algorithms, the text needs to be transformed
+ * to logical order. This is effectively the inverse algorithm of the
+ * described Bidi algorithm. Note that there is no standard algorithm for
+ * this "inverse Bidi" and that the current implementation provides only an
+ * approximation of "inverse Bidi".</p>
+ *
+ * <p>With <code>isInverse</code> set to <code>TRUE</code>,
+ * this function changes the behavior of some of the subsequent functions
+ * in a way that they can be used for the inverse Bidi algorithm.
+ * Specifically, runs of text with numeric characters will be treated in a
+ * special way and may need to be surrounded with LRM characters when they are
+ * written in reordered sequence.</p>
+ *
+ * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>.
+ * Since the actual input for "inverse Bidi" is visually ordered text and
+ * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually
+ * the runs of the logically ordered output.</p>
+ *
+ * <p>Calling this function with argument <code>isInverse</code> set to
+ * <code>TRUE</code> is equivalent to calling
+ * <code>ubidi_setReorderingMode</code> with argument
+ * <code>reorderingMode</code>
+ * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
+ * Calling this function with argument <code>isInverse</code> set to
+ * <code>FALSE</code> is equivalent to calling
+ * <code>ubidi_setReorderingMode</code> with argument
+ * <code>reorderingMode</code>
+ * set to <code>#UBIDI_REORDER_DEFAULT</code>.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ *
+ * @param isInverse specifies "forward" or "inverse" Bidi operation.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_writeReordered
+ * @see ubidi_setReorderingMode
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_setInverse(UBiDi *pBiDi, UBool isInverse);
+
+/**
+ * Is this Bidi object set to perform the inverse Bidi algorithm?
+ * <p>Note: calling this function after setting the reordering mode with
+ * <code>ubidi_setReorderingMode</code> will return <code>TRUE</code> if the
+ * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>,
+ * <code>FALSE</code> for all other values.</p>
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @return TRUE if the Bidi object is set to perform the inverse Bidi algorithm
+ * by handling numbers as L.
+ *
+ * @see ubidi_setInverse
+ * @see ubidi_setReorderingMode
+ * @stable ICU 2.0
+ */
+
+U_STABLE UBool U_EXPORT2
+ubidi_isInverse(UBiDi *pBiDi);
+
+/**
+ * Specify whether block separators must be allocated level zero,
+ * so that successive paragraphs will progress from left to right.
+ * This function must be called before <code>ubidi_setPara()</code>.
+ * Paragraph separators (B) may appear in the text.  Setting them to level zero
+ * means that all paragraph separators (including one possibly appearing
+ * in the last text position) are kept in the reordered text after the text
+ * that they follow in the source text.
+ * When this feature is not enabled, a paragraph separator at the last
+ * position of the text before reordering will go to the first position
+ * of the reordered text when the paragraph level is odd.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ *
+ * @param orderParagraphsLTR specifies whether paragraph separators (B) must
+ * receive level 0, so that successive paragraphs progress from left to right.
+ *
+ * @see ubidi_setPara
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);
+
+/**
+ * Is this Bidi object set to allocate level 0 to block separators so that
+ * successive paragraphs progress from left to right?
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @return TRUE if the Bidi object is set to allocate level 0 to block
+ *         separators.
+ *
+ * @see ubidi_orderParagraphsLTR
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
+
+/**
+ * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi
+ * algorithm to use.
+ *
+ * @see ubidi_setReorderingMode
+ * @stable ICU 3.6
+ */
+typedef enum UBiDiReorderingMode {
+    /** Regular Logical to Visual Bidi algorithm according to Unicode.
+      * This is a 0 value.
+      * @stable ICU 3.6 */
+    UBIDI_REORDER_DEFAULT = 0,
+    /** Logical to Visual algorithm which handles numbers in a way which
+      * mimicks the behavior of Windows XP.
+      * @stable ICU 3.6 */
+    UBIDI_REORDER_NUMBERS_SPECIAL,
+    /** Logical to Visual algorithm grouping numbers with adjacent R characters
+      * (reversible algorithm).
+      * @stable ICU 3.6 */
+    UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
+    /** Reorder runs only to transform a Logical LTR string to the Logical RTL
+      * string with the same display, or vice-versa.<br>
+      * If this mode is set together with option
+      * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source
+      * text may be removed and other controls may be added to produce the
+      * minimum combination which has the required display.
+      * @stable ICU 3.6 */
+    UBIDI_REORDER_RUNS_ONLY,
+    /** Visual to Logical algorithm which handles numbers like L
+      * (same algorithm as selected by <code>ubidi_setInverse(TRUE)</code>.
+      * @see ubidi_setInverse
+      * @stable ICU 3.6 */
+    UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
+    /** Visual to Logical algorithm equivalent to the regular Logical to Visual
+      * algorithm.
+      * @stable ICU 3.6 */
+    UBIDI_REORDER_INVERSE_LIKE_DIRECT,
+    /** Inverse Bidi (Visual to Logical) algorithm for the
+      * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
+      * @stable ICU 3.6 */
+    UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL,
+    /** Number of values for reordering mode.
+      * @stable ICU 3.6 */
+    UBIDI_REORDER_COUNT
+} UBiDiReorderingMode;
+
+/**
+ * Modify the operation of the Bidi algorithm such that it implements some
+ * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
+ * algorithm, depending on different values of the "reordering mode".
+ * This function must be called before <code>ubidi_setPara()</code>, and stays
+ * in effect until called again with a different argument.
+ *
+ * <p>The normal operation of the Bidi algorithm as described
+ * in the Unicode Standard Annex #9 is to take text stored in logical
+ * (keyboard, typing) order and to determine how to reorder it for visual
+ * rendering.</p>
+ *
+ * <p>With the reordering mode set to a value other than
+ * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of
+ * some of the subsequent functions in a way such that they implement an
+ * inverse Bidi algorithm or some other algorithm variants.</p>
+ *
+ * <p>Some legacy systems store text in visual order, and for operations
+ * with standard, Unicode-based algorithms, the text needs to be transformed
+ * into logical order. This is effectively the inverse algorithm of the
+ * described Bidi algorithm. Note that there is no standard algorithm for
+ * this "inverse Bidi", so a number of variants are implemented here.</p>
+ *
+ * <p>In other cases, it may be desirable to emulate some variant of the
+ * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
+ * Logical to Logical transformation.</p>
+ *
+ * <ul>
+ * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>,
+ * the standard Bidi Logical to Visual algorithm is applied.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>,
+ * the algorithm used to perform Bidi transformations when calling
+ * <code>ubidi_setPara</code> should approximate the algorithm used in
+ * Microsoft Windows XP rather than strictly conform to the Unicode Bidi
+ * algorithm.
+ * <br>
+ * The differences between the basic algorithm and the algorithm addressed
+ * by this option are as follows:
+ * <ul>
+ *   <li>Within text at an even embedding level, the sequence "123AB"
+ *   (where AB represent R or AL letters) is transformed to "123BA" by the
+ *   Unicode algorithm and to "BA123" by the Windows algorithm.</li>
+ *   <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just
+ *   like regular numbers (EN).</li>
+ * </ul></li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>,
+ * numbers located between LTR text and RTL text are associated with the RTL
+ * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
+ * upper case letters represent RTL characters) will be transformed to
+ * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
+ * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
+ * This makes the algorithm reversible and makes it useful when round trip
+ * (from visual to logical and back to visual) must be achieved without
+ * adding LRM characters. However, this is a variation from the standard
+ * Unicode Bidi algorithm.<br>
+ * The source text should not contain Bidi control characters other than LRM
+ * or RLM.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_RUNS_ONLY</code>,
+ * a "Logical to Logical" transformation must be performed:
+ * <ul>
+ * <li>If the default text level of the source text (argument <code>paraLevel</code>
+ * in <code>ubidi_setPara</code>) is even, the source text will be handled as
+ * LTR logical text and will be transformed to the RTL logical text which has
+ * the same LTR visual display.</li>
+ * <li>If the default level of the source text is odd, the source text
+ * will be handled as RTL logical text and will be transformed to the
+ * LTR logical text which has the same LTR visual display.</li>
+ * </ul>
+ * This mode may be needed when logical text which is basically Arabic or
+ * Hebrew, with possible included numbers or phrases in English, has to be
+ * displayed as if it had an even embedding level (this can happen if the
+ * displaying application treats all text as if it was basically LTR).
+ * <br>
+ * This mode may also be needed in the reverse case, when logical text which is
+ * basically English, with possible included phrases in Arabic or Hebrew, has to
+ * be displayed as if it had an odd embedding level.
+ * <br>
+ * Both cases could be handled by adding LRE or RLE at the head of the text,
+ * if the display subsystem supports these formatting controls. If it does not,
+ * the problem may be handled by transforming the source text in this mode
+ * before displaying it, so that it will be displayed properly.<br>
+ * The source text should not contain Bidi control characters other than LRM
+ * or RLM.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm
+ * is applied.
+ * Runs of text with numeric characters will be treated like LTR letters and
+ * may need to be surrounded with LRM characters when they are written in
+ * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can
+ * be used with function <code>ubidi_writeReordered</code> to this end. This
+ * mode is equivalent to calling <code>ubidi_setInverse()</code> with
+ * argument <code>isInverse</code> set to <code>TRUE</code>.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual
+ * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm.
+ * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>
+ * but is closer to the regular Bidi algorithm.
+ * <br>
+ * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
+ * upper case represents RTL characters) will be transformed to
+ * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
+ * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
+ * When used in conjunction with option
+ * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally
+ * adds Bidi marks to the output significantly more sparingly than mode
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option
+ * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to
+ * <code>ubidi_writeReordered</code>.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual
+ * Bidi algorithm used in Windows XP is used as an approximation of an
+ * "inverse Bidi" algorithm.
+ * <br>
+ * For example, an LTR paragraph with the content "abc FED123" (where
+ * upper case represents RTL characters) will be transformed to
+ * "abc 123DEF.</li>
+ * </ul>
+ *
+ * <p>In all the reordering modes specifying an "inverse Bidi" algorithm
+ * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>),
+ * output runs should be retrieved using
+ * <code>ubidi_getVisualRun()</code>, and the output text with
+ * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in
+ * "inverse Bidi" modes the input is actually visually ordered text and
+ * reordered output returned by <code>ubidi_getVisualRun()</code> or
+ * <code>ubidi_writeReordered()</code> are actually runs or character string
+ * of logically ordered output.<br>
+ * For all the "inverse Bidi" modes, the source text should not contain
+ * Bidi control characters other than LRM or RLM.</p>
+ *
+ * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of
+ * <code>ubidi_writeReordered</code> has no useful meaning and should not be
+ * used in conjunction with any value of the reordering mode specifying
+ * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @param reorderingMode specifies the required variant of the Bidi algorithm.
+ *
+ * @see UBiDiReorderingMode
+ * @see ubidi_setInverse
+ * @see ubidi_setPara
+ * @see ubidi_writeReordered
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode);
+
+/**
+ * What is the requested reordering mode for a given Bidi object?
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @return the current reordering mode of the Bidi object
+ * @see ubidi_setReorderingMode
+ * @stable ICU 3.6
+ */
+U_STABLE UBiDiReorderingMode U_EXPORT2
+ubidi_getReorderingMode(UBiDi *pBiDi);
+
+/**
+ * <code>UBiDiReorderingOption</code> values indicate which options are
+ * specified to affect the Bidi algorithm.
+ *
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+typedef enum UBiDiReorderingOption {
+    /**
+     * option value for <code>ubidi_setReorderingOptions</code>:
+     * disable all the options which can be set with this function
+     * @see ubidi_setReorderingOptions
+     * @stable ICU 3.6
+     */
+    UBIDI_OPTION_DEFAULT = 0,
+
+    /**
+     * option bit for <code>ubidi_setReorderingOptions</code>:
+     * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
+     * a reordering to a Logical order
+     *
+     * <p>This option must be set or reset before calling
+     * <code>ubidi_setPara</code>.</p>
+     *
+     * <p>This option is significant only with reordering modes which generate
+     * a result with Logical order, specifically:</p>
+     * <ul>
+     *   <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li>
+     *   <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li>
+     *   <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li>
+     *   <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
+     * </ul>
+     *
+     * <p>If this option is set in conjunction with reordering mode
+     * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
+     * <code>ubidi_setInverse(TRUE)</code>, it implies
+     * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>
+     * in calls to function <code>ubidi_writeReordered()</code>.</p>
+     *
+     * <p>For other reordering modes, a minimum number of LRM or RLM characters
+     * will be added to the source text after reordering it so as to ensure
+     * round trip, i.e. when applying the inverse reordering mode on the
+     * resulting logical text with removal of Bidi marks
+     * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling
+     * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
+     * in <code>ubidi_writeReordered</code>), the result will be identical to the
+     * source text in the first transformation.
+     *
+     * <p>This option will be ignored if specified together with option
+     * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option
+     * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function
+     * <code>ubidi_writeReordered()</code> and it implies option
+     * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function
+     * <code>ubidi_writeReordered()</code> if the reordering mode is
+     * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
+     *
+     * @see ubidi_setReorderingMode
+     * @see ubidi_setReorderingOptions
+     * @stable ICU 3.6
+     */
+    UBIDI_OPTION_INSERT_MARKS = 1,
+
+    /**
+     * option bit for <code>ubidi_setReorderingOptions</code>:
+     * remove Bidi control characters
+     *
+     * <p>This option must be set or reset before calling
+     * <code>ubidi_setPara</code>.</p>
+     *
+     * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
+     * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls
+     * to function <code>ubidi_writeReordered()</code> and it implies option
+     * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p>
+     *
+     * @see ubidi_setReorderingMode
+     * @see ubidi_setReorderingOptions
+     * @stable ICU 3.6
+     */
+    UBIDI_OPTION_REMOVE_CONTROLS = 2,
+
+    /**
+     * option bit for <code>ubidi_setReorderingOptions</code>:
+     * process the output as part of a stream to be continued
+     *
+     * <p>This option must be set or reset before calling
+     * <code>ubidi_setPara</code>.</p>
+     *
+     * <p>This option specifies that the caller is interested in processing large
+     * text object in parts.
+     * The results of the successive calls are expected to be concatenated by the
+     * caller. Only the call for the last part will have this option bit off.</p>
+     *
+     * <p>When this option bit is on, <code>ubidi_setPara()</code> may process
+     * less than the full source text in order to truncate the text at a meaningful
+     * boundary. The caller should call <code>ubidi_getProcessedLength()</code>
+     * immediately after calling <code>ubidi_setPara()</code> in order to
+     * determine how much of the source text has been processed.
+     * Source text beyond that length should be resubmitted in following calls to
+     * <code>ubidi_setPara</code>. The processed length may be less than
+     * the length of the source text if a character preceding the last character of
+     * the source text constitutes a reasonable boundary (like a block separator)
+     * for text to be continued.<br>
+     * If the last character of the source text constitutes a reasonable
+     * boundary, the whole text will be processed at once.<br>
+     * If nowhere in the source text there exists
+     * such a reasonable boundary, the processed length will be zero.<br>
+     * The caller should check for such an occurrence and do one of the following:
+     * <ul><li>submit a larger amount of text with a better chance to include
+     *         a reasonable boundary.</li>
+     *     <li>resubmit the same text after turning off option
+     *         <code>UBIDI_OPTION_STREAMING</code>.</li></ul>
+     * In all cases, this option should be turned off before processing the last
+     * part of the text.</p>
+     *
+     * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used,
+     * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with
+     * argument <code>orderParagraphsLTR</code> set to <code>TRUE</code> before
+     * calling <code>ubidi_setPara</code> so that later paragraphs may be
+     * concatenated to previous paragraphs on the right.</p>
+     *
+     * @see ubidi_setReorderingMode
+     * @see ubidi_setReorderingOptions
+     * @see ubidi_getProcessedLength
+     * @see ubidi_orderParagraphsLTR
+     * @stable ICU 3.6
+     */
+    UBIDI_OPTION_STREAMING = 4
+} UBiDiReorderingOption;
+
+/**
+ * Specify which of the reordering options
+ * should be applied during Bidi transformations.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @param reorderingOptions is a combination of zero or more of the following
+ * options:
+ * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>,
+ * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>.
+ *
+ * @see ubidi_getReorderingOptions
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions);
+
+/**
+ * What are the reordering options applied to a given Bidi object?
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @return the current reordering options of the Bidi object
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+U_STABLE uint32_t U_EXPORT2
+ubidi_getReorderingOptions(UBiDi *pBiDi);
+
+/**
+ * Perform the Unicode Bidi algorithm. It is defined in the
+ * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Anned #9</a>,
+ * version 13,
+ * also described in The Unicode Standard, Version 4.0 .<p>
+ *
+ * This function takes a piece of plain text containing one or more paragraphs,
+ * with or without externally specified embedding levels from <i>styled</i>
+ * text and computes the left-right-directionality of each character.<p>
+ *
+ * If the entire text is all of the same directionality, then
+ * the function may not perform all the steps described by the algorithm,
+ * i.e., some levels may not be the same as if all steps were performed.
+ * This is not relevant for unidirectional text.<br>
+ * For example, in pure LTR text with numbers the numbers would get
+ * a resolved level of 2 higher than the surrounding text according to
+ * the algorithm. This implementation may set all resolved levels to
+ * the same value in such a case.<p>
+ *
+ * The text can be composed of multiple paragraphs. Occurrence of a block
+ * separator in the text terminates a paragraph, and whatever comes next starts
+ * a new paragraph. The exception to this rule is when a Carriage Return (CR)
+ * is followed by a Line Feed (LF). Both CR and LF are block separators, but
+ * in that case, the pair of characters is considered as terminating the
+ * preceding paragraph, and a new paragraph will be started by a character
+ * coming after the LF.
+ *
+ * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
+ *        which will be set to contain the reordering information,
+ *        especially the resolved levels for all the characters in <code>text</code>.
+ *
+ * @param text is a pointer to the text that the Bidi algorithm will be performed on.
+ *        This pointer is stored in the UBiDi object and can be retrieved
+ *        with <code>ubidi_getText()</code>.<br>
+ *        <strong>Note:</strong> the text must be (at least) <code>length</code> long.
+ *
+ * @param length is the length of the text; if <code>length==-1</code> then
+ *        the text must be zero-terminated.
+ *
+ * @param paraLevel specifies the default level for the text;
+ *        it is typically 0 (LTR) or 1 (RTL).
+ *        If the function shall determine the paragraph level from the text,
+ *        then <code>paraLevel</code> can be set to
+ *        either <code>#UBIDI_DEFAULT_LTR</code>
+ *        or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple
+ *        paragraphs, the paragraph level shall be determined separately for
+ *        each paragraph; if a paragraph does not include any strongly typed
+ *        character, then the desired default is used (0 for LTR or 1 for RTL).
+ *        Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code>
+ *        is also valid, with odd levels indicating RTL.
+ *
+ * @param embeddingLevels (in) may be used to preset the embedding and override levels,
+ *        ignoring characters like LRE and PDF in the text.
+ *        A level overrides the directional property of its corresponding
+ *        (same index) character if the level has the
+ *        <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br>
+ *        Except for that bit, it must be
+ *        <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>,
+ *        with one exception: a level of zero may be specified for a paragraph
+ *        separator even if <code>paraLevel>0</code> when multiple paragraphs
+ *        are submitted in the same call to <code>ubidi_setPara()</code>.<br><br>
+ *        <strong>Caution: </strong>A copy of this pointer, not of the levels,
+ *        will be stored in the <code>UBiDi</code> object;
+ *        the <code>embeddingLevels</code> array must not be
+ *        deallocated before the <code>UBiDi</code> structure is destroyed or reused,
+ *        and the <code>embeddingLevels</code>
+ *        should not be modified to avoid unexpected results on subsequent Bidi operations.
+ *        However, the <code>ubidi_setPara()</code> and
+ *        <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br>
+ *        After the <code>UBiDi</code> object is reused or destroyed, the caller
+ *        must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br>
+ *        <strong>Note:</strong> the <code>embeddingLevels</code> array must be
+ *        at least <code>length</code> long.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
+              UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
+              UErrorCode *pErrorCode);
+
+/**
+ * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to
+ * contain the reordering information, especially the resolved levels,
+ * for all the characters in a line of text. This line of text is
+ * specified by referring to a <code>UBiDi</code> object representing
+ * this information for a piece of text containing one or more paragraphs,
+ * and by specifying a range of indexes in this text.<p>
+ * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
+ *
+ * This is used after calling <code>ubidi_setPara()</code>
+ * for a piece of text, and after line-breaking on that text.
+ * It is not necessary if each paragraph is treated as a single line.<p>
+ *
+ * After line-breaking, rules (L1) and (L2) for the treatment of
+ * trailing WS and for reordering are performed on
+ * a <code>UBiDi</code> object that represents a line.<p>
+ *
+ * <strong>Important: </strong><code>pLineBiDi</code> shares data with
+ * <code>pParaBiDi</code>.
+ * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
+ * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
+ * before the object for its parent paragraph.<p>
+ *
+ * The text pointer that was stored in <code>pParaBiDi</code> is also copied,
+ * and <code>start</code> is added to it so that it points to the beginning of the
+ * line for this object.
+ *
+ * @param pParaBiDi is the parent paragraph object. It must have been set
+ * by a successful call to ubidi_setPara.
+ *
+ * @param start is the line's first index into the text.
+ *
+ * @param limit is just behind the line's last index into the text
+ *        (its last index +1).<br>
+ *        It must be <code>0<=start<limit<=</code>containing paragraph limit.
+ *        If the specified line crosses a paragraph boundary, the function
+ *        will terminate with error code U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * @param pLineBiDi is the object that will now represent a line of the text.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_setLine(const UBiDi *pParaBiDi,
+              int32_t start, int32_t limit,
+              UBiDi *pLineBiDi,
+              UErrorCode *pErrorCode);
+
+/**
+ * Get the directionality of the text.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>
+ *         or <code>UBIDI_MIXED</code>
+ *         that indicates if the entire text
+ *         represented by this object is unidirectional,
+ *         and which direction, or if it is mixed-directional.
+ *
+ * @see UBiDiDirection
+ * @stable ICU 2.0
+ */
+U_STABLE UBiDiDirection U_EXPORT2
+ubidi_getDirection(const UBiDi *pBiDi);
+
+/**
+ * Get the pointer to the text.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return The pointer to the text that the UBiDi object was created for.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_setLine
+ * @stable ICU 2.0
+ */
+U_STABLE const UChar * U_EXPORT2
+ubidi_getText(const UBiDi *pBiDi);
+
+/**
+ * Get the length of the text.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return The length of the text that the UBiDi object was created for.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_getLength(const UBiDi *pBiDi);
+
+/**
+ * Get the paragraph level of the text.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return The paragraph level. If there are multiple paragraphs, their
+ *         level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or
+ *         UBIDI_DEFAULT_RTL.  In that case, the level of the first paragraph
+ *         is returned.
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getParagraph
+ * @see ubidi_getParagraphByIndex
+ * @stable ICU 2.0
+ */
+U_STABLE UBiDiLevel U_EXPORT2
+ubidi_getParaLevel(const UBiDi *pBiDi);
+
+/**
+ * Get the number of paragraphs.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return The number of paragraphs.
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_countParagraphs(UBiDi *pBiDi);
+
+/**
+ * Get a paragraph, given a position within the text.
+ * This function returns information about a paragraph.<br>
+ * Note: if the paragraph index is known, it is more efficient to
+ * retrieve the paragraph information using ubidi_getParagraphByIndex().<p>
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param charIndex is the index of a character within the text, in the
+ *        range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>.
+ *
+ * @param pParaStart will receive the index of the first character of the
+ *        paragraph in the text.
+ *        This pointer can be <code>NULL</code> if this
+ *        value is not necessary.
+ *
+ * @param pParaLimit will receive the limit of the paragraph.
+ *        The l-value that you point to here may be the
+ *        same expression (variable) as the one for
+ *        <code>charIndex</code>.
+ *        This pointer can be <code>NULL</code> if this
+ *        value is not necessary.
+ *
+ * @param pParaLevel will receive the level of the paragraph.
+ *        This pointer can be <code>NULL</code> if this
+ *        value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The index of the paragraph containing the specified position.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
+                   int32_t *pParaLimit, UBiDiLevel *pParaLevel,
+                   UErrorCode *pErrorCode);
+
+/**
+ * Get a paragraph, given the index of this paragraph.
+ *
+ * This function returns information about a paragraph.<p>
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @param paraIndex is the number of the paragraph, in the
+ *        range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>.
+ *
+ * @param pParaStart will receive the index of the first character of the
+ *        paragraph in the text.
+ *        This pointer can be <code>NULL</code> if this
+ *        value is not necessary.
+ *
+ * @param pParaLimit will receive the limit of the paragraph.
+ *        This pointer can be <code>NULL</code> if this
+ *        value is not necessary.
+ *
+ * @param pParaLevel will receive the level of the paragraph.
+ *        This pointer can be <code>NULL</code> if this
+ *        value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
+                          int32_t *pParaStart, int32_t *pParaLimit,
+                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode);
+
+/**
+ * Get the level for one character.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param charIndex the index of a character. It must be in the range
+ *         [0..ubidi_getProcessedLength(pBiDi)].
+ *
+ * @return The level for the character at charIndex (0 if charIndex is not
+ *         in the valid range).
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_STABLE UBiDiLevel U_EXPORT2
+ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex);
+
+/**
+ * Get an array of levels for each character.<p>
+ *
+ * Note that this function may allocate memory under some
+ * circumstances, unlike <code>ubidi_getLevelAt()</code>.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose
+ *        text length must be strictly positive.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The levels array for the text,
+ *         or <code>NULL</code> if an error occurs.
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_STABLE const UBiDiLevel * U_EXPORT2
+ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
+
+/**
+ * Get a logical run.
+ * This function returns information about a run and is used
+ * to retrieve runs in logical order.<p>
+ * This is especially useful for line-breaking on a paragraph.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param logicalPosition is a logical position within the source text.
+ *
+ * @param pLogicalLimit will receive the limit of the corresponding run.
+ *        The l-value that you point to here may be the
+ *        same expression (variable) as the one for
+ *        <code>logicalPosition</code>.
+ *        This pointer can be <code>NULL</code> if this
+ *        value is not necessary.
+ *
+ * @param pLevel will receive the level of the corresponding run.
+ *        This pointer can be <code>NULL</code> if this
+ *        value is not necessary.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
+                    int32_t *pLogicalLimit, UBiDiLevel *pLevel);
+
+/**
+ * Get the number of runs.
+ * This function may invoke the actual reordering on the
+ * <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
+ * may have resolved only the levels of the text. Therefore,
+ * <code>ubidi_countRuns()</code> may have to allocate memory,
+ * and may fail doing so.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The number of runs.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
+
+/**
+ * Get one run's logical start, length, and directionality,
+ * which can be 0 for LTR or 1 for RTL.
+ * In an RTL run, the character at the logical start is
+ * visually on the right of the displayed run.
+ * The length is the number of characters in the run.<p>
+ * <code>ubidi_countRuns()</code> should be called
+ * before the runs are retrieved.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param runIndex is the number of the run in visual order, in the
+ *        range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
+ *
+ * @param pLogicalStart is the first logical character index in the text.
+ *        The pointer may be <code>NULL</code> if this index is not needed.
+ *
+ * @param pLength is the number of characters (at least one) in the run.
+ *        The pointer may be <code>NULL</code> if this is not needed.
+ *
+ * @return the directionality of the run,
+ *         <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
+ *         never <code>UBIDI_MIXED</code>.
+ *
+ * @see ubidi_countRuns
+ *
+ * Example:
+ * <pre>
+ * \code
+ * int32_t i, count=ubidi_countRuns(pBiDi),
+ *         logicalStart, visualIndex=0, length;
+ * for(i=0; i<count; ++i) {
+ *    if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
+ *         do { // LTR
+ *             show_char(text[logicalStart++], visualIndex++);
+ *         } while(--length>0);
+ *     } else {
+ *         logicalStart+=length;  // logicalLimit
+ *         do { // RTL
+ *             show_char(text[--logicalStart], visualIndex++);
+ *         } while(--length>0);
+ *     }
+ * }
+ *\endcode
+ * </pre>
+ *
+ * Note that in right-to-left runs, code like this places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * <p>
+ * Use of <code>ubidi_writeReordered()</code>, optionally with the
+ * <code>#UBIDI_KEEP_BASE_COMBINING</code> option, can be considered in order
+ * to avoid these issues.
+ * @stable ICU 2.0
+ */
+U_STABLE UBiDiDirection U_EXPORT2
+ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
+                   int32_t *pLogicalStart, int32_t *pLength);
+
+/**
+ * Get the visual position from a logical text position.
+ * If such a mapping is used many times on the same
+ * <code>UBiDi</code> object, then calling
+ * <code>ubidi_getLogicalMap()</code> is more efficient.<p>
+ *
+ * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
+ * visual position because the corresponding text character is a Bidi control
+ * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ * <p>
+ * When the visual output is altered by using options of
+ * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ * <p>
+ * Note that in right-to-left runs, this mapping places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * Use of <code>ubidi_writeReordered()</code>, optionally with the
+ * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
+ * of using the mapping, in order to avoid these issues.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param logicalIndex is the index of a character in the text.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The visual position of this character.
+ *
+ * @see ubidi_getLogicalMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode);
+
+/**
+ * Get the logical text position from a visual position.
+ * If such a mapping is used many times on the same
+ * <code>UBiDi</code> object, then calling
+ * <code>ubidi_getVisualMap()</code> is more efficient.<p>
+ *
+ * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
+ * logical position because the corresponding text character is a Bidi mark
+ * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
+ * <p>
+ * This is the inverse function to <code>ubidi_getVisualIndex()</code>.
+ * <p>
+ * When the visual output is altered by using options of
+ * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param visualIndex is the visual position of a character.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The index of this character in the text.
+ *
+ * @see ubidi_getVisualMap
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode);
+
+/**
+ * Get a logical-to-visual index map (array) for the characters in the UBiDi
+ * (paragraph or line) object.
+ * <p>
+ * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
+ * corresponding text characters are Bidi controls removed from the visual
+ * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ * <p>
+ * When the visual output is altered by using options of
+ * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ * <p>
+ * Note that in right-to-left runs, this mapping places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * Use of <code>ubidi_writeReordered()</code>, optionally with the
+ * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
+ * of using the mapping, in order to avoid these issues.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code>
+ *        indexes which will reflect the reordering of the characters.
+ *        If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number
+ *        of elements allocated in <code>indexMap</code> must be no less than
+ *        <code>ubidi_getResultLength()</code>.
+ *        The array does not need to be initialized.<br><br>
+ *        The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getVisualMap
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getProcessedLength
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
+
+/**
+ * Get a visual-to-logical index map (array) for the characters in the UBiDi
+ * (paragraph or line) object.
+ * <p>
+ * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
+ * corresponding text characters are Bidi marks inserted in the visual output
+ * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
+ * <p>
+ * When the visual output is altered by using options of
+ * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code>
+ *        indexes which will reflect the reordering of the characters.
+ *        If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number
+ *        of elements allocated in <code>indexMap</code> must be no less than
+ *        <code>ubidi_getProcessedLength()</code>.
+ *        The array does not need to be initialized.<br><br>
+ *        The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getLogicalMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getProcessedLength
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
+
+/**
+ * This is a convenience function that does not use a UBiDi object.
+ * It is intended to be used for when an application has determined the levels
+ * of objects (character sequences) and just needs to have them reordered (L2).
+ * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a
+ * <code>UBiDi</code> object.
+ *
+ * @param levels is an array with <code>length</code> levels that have been determined by
+ *        the application.
+ *
+ * @param length is the number of levels in the array, or, semantically,
+ *        the number of objects to be reordered.
+ *        It must be <code>length>0</code>.
+ *
+ * @param indexMap is a pointer to an array of <code>length</code>
+ *        indexes which will reflect the reordering of the characters.
+ *        The array does not need to be initialized.<p>
+ *        The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
+
+/**
+ * This is a convenience function that does not use a UBiDi object.
+ * It is intended to be used for when an application has determined the levels
+ * of objects (character sequences) and just needs to have them reordered (L2).
+ * This is equivalent to using <code>ubidi_getVisualMap()</code> on a
+ * <code>UBiDi</code> object.
+ *
+ * @param levels is an array with <code>length</code> levels that have been determined by
+ *        the application.
+ *
+ * @param length is the number of levels in the array, or, semantically,
+ *        the number of objects to be reordered.
+ *        It must be <code>length>0</code>.
+ *
+ * @param indexMap is a pointer to an array of <code>length</code>
+ *        indexes which will reflect the reordering of the characters.
+ *        The array does not need to be initialized.<p>
+ *        The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
+
+/**
+ * Invert an index map.
+ * The index mapping of the first map is inverted and written to
+ * the second one.
+ *
+ * @param srcMap is an array with <code>length</code> elements
+ *        which defines the original mapping from a source array containing
+ *        <code>length</code> elements to a destination array.
+ *        Some elements of the source array may have no mapping in the
+ *        destination array. In that case, their value will be
+ *        the special value <code>UBIDI_MAP_NOWHERE</code>.
+ *        All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>.
+ *        Some elements may have a value >= <code>length</code>, if the
+ *        destination array has more elements than the source array.
+ *        There must be no duplicate indexes (two or more elements with the
+ *        same value except <code>UBIDI_MAP_NOWHERE</code>).
+ *
+ * @param destMap is an array with a number of elements equal to 1 + the highest
+ *        value in <code>srcMap</code>.
+ *        <code>destMap</code> will be filled with the inverse mapping.
+ *        If element with index i in <code>srcMap</code> has a value k different
+ *        from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of
+ *        the source array maps to element k in the destination array.
+ *        The inverse map will have value i in its k-th element.
+ *        For all elements of the destination array which do not map to
+ *        an element in the source array, the corresponding element in the
+ *        inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>.
+ *
+ * @param length is the length of each array.
+ * @see UBIDI_MAP_NOWHERE
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);
+
+/** option flags for ubidi_writeReordered() */
+
+/**
+ * option bit for ubidi_writeReordered():
+ * keep combining characters after their base characters in RTL runs
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_KEEP_BASE_COMBINING       1
+
+/**
+ * option bit for ubidi_writeReordered():
+ * replace characters with the "mirrored" property in RTL runs
+ * by their mirror-image mappings
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_DO_MIRRORING              2
+
+/**
+ * option bit for ubidi_writeReordered():
+ * surround the run with LRMs if necessary;
+ * this is part of the approximate "inverse Bidi" algorithm
+ *
+ * <p>This option does not imply corresponding adjustment of the index
+ * mappings.</p>
+ *
+ * @see ubidi_setInverse
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_INSERT_LRM_FOR_NUMERIC    4
+
+/**
+ * option bit for ubidi_writeReordered():
+ * remove Bidi control characters
+ * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC)
+ *
+ * <p>This option does not imply corresponding adjustment of the index
+ * mappings.</p>
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_REMOVE_BIDI_CONTROLS      8
+
+/**
+ * option bit for ubidi_writeReordered():
+ * write the output in reverse order
+ *
+ * <p>This has the same effect as calling <code>ubidi_writeReordered()</code>
+ * first without this option, and then calling
+ * <code>ubidi_writeReverse()</code> without mirroring.
+ * Doing this in the same step is faster and avoids a temporary buffer.
+ * An example for using this option is output to a character terminal that
+ * is designed for RTL scripts and stores text in reverse order.</p>
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_OUTPUT_REVERSE            16
+
+/**
+ * Get the length of the source text processed by the last call to
+ * <code>ubidi_setPara()</code>. This length may be different from the length
+ * of the source text if option <code>#UBIDI_OPTION_STREAMING</code>
+ * has been set.
+ * <br>
+ * Note that whenever the length of the text affects the execution or the
+ * result of a function, it is the processed length which must be considered,
+ * except for <code>ubidi_setPara</code> (which receives unprocessed source
+ * text) and <code>ubidi_getLength</code> (which returns the original length
+ * of the source text).<br>
+ * In particular, the processed length is the one to consider in the following
+ * cases:
+ * <ul>
+ * <li>maximum value of the <code>limit</code> argument of
+ * <code>ubidi_setLine</code></li>
+ * <li>maximum value of the <code>charIndex</code> argument of
+ * <code>ubidi_getParagraph</code></li>
+ * <li>maximum value of the <code>charIndex</code> argument of
+ * <code>ubidi_getLevelAt</code></li>
+ * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li>
+ * <li>maximum value of the <code>logicalStart</code> argument of
+ * <code>ubidi_getLogicalRun</code></li>
+ * <li>maximum value of the <code>logicalIndex</code> argument of
+ * <code>ubidi_getVisualIndex</code></li>
+ * <li>number of elements filled in the <code>*indexMap</code> argument of
+ * <code>ubidi_getLogicalMap</code></li>
+ * <li>length of text processed by <code>ubidi_writeReordered</code></li>
+ * </ul>
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @return The length of the part of the source text processed by
+ *         the last call to <code>ubidi_setPara</code>.
+ * @see ubidi_setPara
+ * @see UBIDI_OPTION_STREAMING
+ * @stable ICU 3.6
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_getProcessedLength(const UBiDi *pBiDi);
+
+/**
+ * Get the length of the reordered text resulting from the last call to
+ * <code>ubidi_setPara()</code>. This length may be different from the length
+ * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code>
+ * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set.
+ * <br>
+ * This resulting length is the one to consider in the following cases:
+ * <ul>
+ * <li>maximum value of the <code>visualIndex</code> argument of
+ * <code>ubidi_getLogicalIndex</code></li>
+ * <li>number of elements of the <code>*indexMap</code> argument of
+ * <code>ubidi_getVisualMap</code></li>
+ * </ul>
+ * Note that this length stays identical to the source text length if
+ * Bidi marks are inserted or removed using option bits of
+ * <code>ubidi_writeReordered</code>, or if option
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set.
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @return The length of the reordered text resulting from
+ *         the last call to <code>ubidi_setPara</code>.
+ * @see ubidi_setPara
+ * @see UBIDI_OPTION_INSERT_MARKS
+ * @see UBIDI_OPTION_REMOVE_CONTROLS
+ * @stable ICU 3.6
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_getResultLength(const UBiDi *pBiDi);
+
+U_CDECL_BEGIN
+/**
+ * value returned by <code>UBiDiClassCallback</code> callbacks when
+ * there is no need to override the standard Bidi class for a given code point.
+ * @see UBiDiClassCallback
+ * @stable ICU 3.6
+ */
+#define U_BIDI_CLASS_DEFAULT  U_CHAR_DIRECTION_COUNT
+
+/**
+ * Callback type declaration for overriding default Bidi class values with
+ * custom ones.
+ * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code>
+ * object by calling the <code>ubidi_setClassCallback()</code> function;
+ * then the callback will be invoked by the UBA implementation any time the
+ * class of a character is to be determined.</p>
+ *
+ * @param context is a pointer to the callback private data.
+ *
+ * @param c       is the code point to get a Bidi class for.
+ *
+ * @return The directional property / Bidi class for the given code point
+ *         <code>c</code> if the default class has been overridden, or
+ *         <code>#U_BIDI_CLASS_DEFAULT</code> if the standard Bidi class value
+ *         for <code>c</code> is to be used.
+ * @see ubidi_setClassCallback
+ * @see ubidi_getClassCallback
+ * @stable ICU 3.6
+ */
+typedef UCharDirection U_CALLCONV
+UBiDiClassCallback(const void *context, UChar32 c);
+
+U_CDECL_END
+
+/**
+ * Retrieve the Bidi class for a given code point.
+ * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a
+ * value other than <code>#U_BIDI_CLASS_DEFAULT</code>, that value is used;
+ * otherwise the default class determination mechanism is invoked.</p>
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @param c     is the code point whose Bidi class must be retrieved.
+ *
+ * @return The Bidi class for character <code>c</code> based
+ *         on the given <code>pBiDi</code> instance.
+ * @see UBiDiClassCallback
+ * @stable ICU 3.6
+ */
+U_STABLE UCharDirection U_EXPORT2
+ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c);
+
+/**
+ * Set the callback function and callback data used by the UBA
+ * implementation for Bidi class determination.
+ * <p>This may be useful for assigning Bidi classes to PUA characters, or
+ * for special application needs. For instance, an application may want to
+ * handle all spaces like L or R characters (according to the base direction)
+ * when creating the visual ordering of logical lines which are part of a report
+ * organized in columns: there should not be interaction between adjacent
+ * cells.<p>
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @param newFn is the new callback function pointer.
+ *
+ * @param newContext is the new callback context pointer. This can be NULL.
+ *
+ * @param oldFn fillin: Returns the old callback function pointer. This can be
+ *                      NULL.
+ *
+ * @param oldContext fillin: Returns the old callback's context. This can be
+ *                           NULL.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getClassCallback
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
+                       const void *newContext, UBiDiClassCallback **oldFn,
+                       const void **oldContext, UErrorCode *pErrorCode);
+
+/**
+ * Get the current callback function used for Bidi class determination.
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @param fn fillin: Returns the callback function pointer.
+ *
+ * @param context fillin: Returns the callback's private context.
+ *
+ * @see ubidi_setClassCallback
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context);
+
+/**
+ * Take a <code>UBiDi</code> object containing the reordering
+ * information for a piece of text (one or more paragraphs) set by
+ * <code>ubidi_setPara()</code> or for a line of text set by
+ * <code>ubidi_setLine()</code> and write a reordered string to the
+ * destination buffer.
+ *
+ * This function preserves the integrity of characters with multiple
+ * code units and (optionally) combining characters.
+ * Characters in RTL runs can be replaced by mirror-image characters
+ * in the destination buffer. Note that "real" mirroring has
+ * to be done in a rendering engine by glyph selection
+ * and that for many "mirrored" characters there are no
+ * Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters; see the description of the <code>destSize</code>
+ * and <code>options</code> parameters and of the option bit flags.
+ *
+ * @param pBiDi A pointer to a <code>UBiDi</code> object that
+ *              is set by <code>ubidi_setPara()</code> or
+ *              <code>ubidi_setLine()</code> and contains the reordering
+ *              information for the text that it was defined for,
+ *              as well as a pointer to that text.<br><br>
+ *              The text was aliased (only the pointer was stored
+ *              without copying the contents) and must not have been modified
+ *              since the <code>ubidi_setPara()</code> call.
+ *
+ * @param dest A pointer to where the reordered text is to be copied.
+ *             The source text and <code>dest[destSize]</code>
+ *             must not overlap.
+ *
+ * @param destSize The size of the <code>dest</code> buffer,
+ *                 in number of UChars.
+ *                 If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>
+ *                 option is set, then the destination length could be
+ *                 as large as
+ *                 <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>.
+ *                 If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
+ *                 is set, then the destination length may be less than
+ *                 <code>ubidi_getLength(pBiDi)</code>.
+ *                 If none of these options is set, then the destination length
+ *                 will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>.
+ *
+ * @param options A bit set of options for the reordering that control
+ *                how the reordered text is written.
+ *                The options include mirroring the characters on a code
+ *                point basis and inserting LRM characters, which is used
+ *                especially for transforming visually stored text
+ *                to logically stored text (although this is still an
+ *                imperfect implementation of an "inverse Bidi" algorithm
+ *                because it uses the "forward Bidi" algorithm at its core).
+ *                The available options are:
+ *                <code>#UBIDI_DO_MIRRORING</code>,
+ *                <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ *                <code>#UBIDI_KEEP_BASE_COMBINING</code>,
+ *                <code>#UBIDI_OUTPUT_REVERSE</code>,
+ *                <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The length of the output string.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_writeReordered(UBiDi *pBiDi,
+                     UChar *dest, int32_t destSize,
+                     uint16_t options,
+                     UErrorCode *pErrorCode);
+
+/**
+ * Reverse a Right-To-Left run of Unicode text.
+ *
+ * This function preserves the integrity of characters with multiple
+ * code units and (optionally) combining characters.
+ * Characters can be replaced by mirror-image characters
+ * in the destination buffer. Note that "real" mirroring has
+ * to be done in a rendering engine by glyph selection
+ * and that for many "mirrored" characters there are no
+ * Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters.
+ *
+ * This function is the implementation for reversing RTL runs as part
+ * of <code>ubidi_writeReordered()</code>. For detailed descriptions
+ * of the parameters, see there.
+ * Since no Bidi controls are inserted here, the output string length
+ * will never exceed <code>srcLength</code>.
+ *
+ * @see ubidi_writeReordered
+ *
+ * @param src A pointer to the RTL run text.
+ *
+ * @param srcLength The length of the RTL run.
+ *
+ * @param dest A pointer to where the reordered text is to be copied.
+ *             <code>src[srcLength]</code> and <code>dest[destSize]</code>
+ *             must not overlap.
+ *
+ * @param destSize The size of the <code>dest</code> buffer,
+ *                 in number of UChars.
+ *                 If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
+ *                 is set, then the destination length may be less than
+ *                 <code>srcLength</code>.
+ *                 If this option is not set, then the destination length
+ *                 will be exactly <code>srcLength</code>.
+ *
+ * @param options A bit set of options for the reordering that control
+ *                how the reordered text is written.
+ *                See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The length of the output string.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubidi_writeReverse(const UChar *src, int32_t srcLength,
+                   UChar *dest, int32_t destSize,
+                   uint16_t options,
+                   UErrorCode *pErrorCode);
+
+/*#define BIDI_SAMPLE_CODE*/
+/*@}*/
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/ubrk.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ubrk.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ubrk.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,482 +0,0 @@
-/*
-******************************************************************************
-* Copyright (C) 1996-2007, International Business Machines Corporation and others.
-* All Rights Reserved.
-******************************************************************************
-*/
-
-#ifndef UBRK_H
-#define UBRK_H
-
-#include "unicode/utypes.h"
-#include "unicode/uloc.h"
-#include "unicode/utext.h"
-
-/**
- * A text-break iterator.
- *  For usage in C programs.
- */
-#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
-#   define UBRK_TYPEDEF_UBREAK_ITERATOR
-    /**
-     *  Opaque type representing an ICU Break iterator object.
-     *  @stable ICU 2.0
-     */
-    typedef void UBreakIterator;
-#endif
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/parseerr.h"
-
-/**
- * \file
- * \brief C API: BreakIterator
- *
- * <h2> BreakIterator C API </h2>
- *
- * The BreakIterator C API defines  methods for finding the location
- * of boundaries in text. Pointer to a UBreakIterator maintain a
- * current position and scan over text returning the index of characters
- * where boundaries occur.
- * <p>
- * Line boundary analysis determines where a text string can be broken
- * when line-wrapping. The mechanism correctly handles punctuation and
- * hyphenated words.
- * <p>
- * Sentence boundary analysis allows selection with correct
- * interpretation of periods within numbers and abbreviations, and
- * trailing punctuation marks such as quotation marks and parentheses.
- * <p>
- * Word boundary analysis is used by search and replace functions, as
- * well as within text editing applications that allow the user to
- * select words with a double click. Word selection provides correct
- * interpretation of punctuation marks within and following
- * words. Characters that are not part of a word, such as symbols or
- * punctuation marks, have word-breaks on both sides.
- * <p>
- * Character boundary analysis allows users to interact with
- * characters as they expect to, for example, when moving the cursor
- * through a text string. Character boundary analysis provides correct
- * navigation of through character strings, regardless of how the
- * character is stored.  For example, an accented character might be
- * stored as a base character and a diacritical mark. What users
- * consider to be a character can differ between languages.
- * <p>
- * Title boundary analysis locates all positions,
- * typically starts of words, that should be set to Title Case
- * when title casing the text.
- * <p>
- * The text boundary positions are found according to the rules
- * described in Unicode Standard Annex #29, Text Boundaries, and
- * Unicode Standard Annex #14, Line Breaking Properties.  These
- * are available at http://www.unicode.org/reports/tr14/ and
- * http://www.unicode.org/reports/tr29/.
- * <p>
- * In addition to the plain C API defined in this header file, an
- * object oriented C++ API with equivalent functionality is defined in the
- * file brkiter.h.
- * <p>
- * Code snippits illustrating the use of the Break Iterator APIs
- * are available in the ICU User Guide,
- * http://icu-project.org/userguide/boundaryAnalysis.html
- * and in the sample program icu/source/samples/break/break.cpp"
- */
-
-/** The possible types of text boundaries.  @stable ICU 2.0 */
-typedef enum UBreakIteratorType {
-  /** Character breaks  @stable ICU 2.0 */
-  UBRK_CHARACTER = 0,
-  /** Word breaks @stable ICU 2.0 */
-  UBRK_WORD = 1,
-  /** Line breaks @stable ICU 2.0 */
-  UBRK_LINE = 2,
-  /** Sentence breaks @stable ICU 2.0 */
-  UBRK_SENTENCE = 3,
-
-#ifndef U_HIDE_DEPRECATED_API
-  /**
-   * Title Case breaks
-   * The iterator created using this type locates title boundaries as described for
-   * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
-   * please use Word Boundary iterator.
-   *
-   * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
-   */
-  UBRK_TITLE = 4,
-#endif /* U_HIDE_DEPRECATED_API */
-  UBRK_COUNT = 5
-} UBreakIteratorType;
-
-/** Value indicating all text boundaries have been returned.
- *  @stable ICU 2.0
- */
-#define UBRK_DONE ((int32_t) -1)
-
-
-/**
- *  Enum constants for the word break tags returned by
- *  getRuleStatus().  A range of values is defined for each category of
- *  word, to allow for further subdivisions of a category in future releases.
- *  Applications should check for tag values falling within the range, rather
- *  than for single individual values.
- *  @stable ICU 2.2
-*/
-typedef enum UWordBreak {
-    /** Tag value for "words" that do not fit into any of other categories.
-     *  Includes spaces and most punctuation. */
-    UBRK_WORD_NONE           = 0,
-    /** Upper bound for tags for uncategorized words. */
-    UBRK_WORD_NONE_LIMIT     = 100,
-    /** Tag value for words that appear to be numbers, lower limit.    */
-    UBRK_WORD_NUMBER         = 100,
-    /** Tag value for words that appear to be numbers, upper limit.    */
-    UBRK_WORD_NUMBER_LIMIT   = 200,
-    /** Tag value for words that contain letters, excluding
-     *  hiragana, katakana or ideographic characters, lower limit.    */
-    UBRK_WORD_LETTER         = 200,
-    /** Tag value for words containing letters, upper limit  */
-    UBRK_WORD_LETTER_LIMIT   = 300,
-    /** Tag value for words containing kana characters, lower limit */
-    UBRK_WORD_KANA           = 300,
-    /** Tag value for words containing kana characters, upper limit */
-    UBRK_WORD_KANA_LIMIT     = 400,
-    /** Tag value for words containing ideographic characters, lower limit */
-    UBRK_WORD_IDEO           = 400,
-    /** Tag value for words containing ideographic characters, upper limit */
-    UBRK_WORD_IDEO_LIMIT     = 500
-} UWordBreak;
-
-/**
- *  Enum constants for the line break tags returned by getRuleStatus().
- *  A range of values is defined for each category of
- *  word, to allow for further subdivisions of a category in future releases.
- *  Applications should check for tag values falling within the range, rather
- *  than for single individual values.
- *  @stable ICU 2.8
-*/
-typedef enum ULineBreakTag {
-    /** Tag value for soft line breaks, positions at which a line break
-      *  is acceptable but not required                */
-    UBRK_LINE_SOFT            = 0,
-    /** Upper bound for soft line breaks.              */
-    UBRK_LINE_SOFT_LIMIT      = 100,
-    /** Tag value for a hard, or mandatory line break  */
-    UBRK_LINE_HARD            = 100,
-    /** Upper bound for hard line breaks.              */
-    UBRK_LINE_HARD_LIMIT      = 200
-} ULineBreakTag;
-
-
-
-/**
- *  Enum constants for the sentence break tags returned by getRuleStatus().
- *  A range of values is defined for each category of
- *  sentence, to allow for further subdivisions of a category in future releases.
- *  Applications should check for tag values falling within the range, rather
- *  than for single individual values.
- *  @stable ICU 2.8
-*/
-typedef enum USentenceBreakTag {
-    /** Tag value for for sentences  ending with a sentence terminator
-      * ('.', '?', '!', etc.) character, possibly followed by a
-      * hard separator (CR, LF, PS, etc.)
-      */
-    UBRK_SENTENCE_TERM       = 0,
-    /** Upper bound for tags for sentences ended by sentence terminators.    */
-    UBRK_SENTENCE_TERM_LIMIT = 100,
-    /** Tag value for for sentences that do not contain an ending
-      * sentence terminator ('.', '?', '!', etc.) character, but
-      * are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
-      */
-    UBRK_SENTENCE_SEP        = 100,
-    /** Upper bound for tags for sentences ended by a separator.              */
-    UBRK_SENTENCE_SEP_LIMIT  = 200
-    /** Tag value for a hard, or mandatory line break  */
-} USentenceBreakTag;
-
-
-/**
- * Open a new UBreakIterator for locating text boundaries for a specified locale.
- * A UBreakIterator may be used for detecting character, line, word,
- * and sentence breaks in text.
- * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
- * UBRK_LINE, UBRK_SENTENCE
- * @param locale The locale specifying the text-breaking conventions.
- * @param text The text to be iterated over.
- * @param textLength The number of characters in text, or -1 if null-terminated.
- * @param status A UErrorCode to receive any errors.
- * @return A UBreakIterator for the specified locale.
- * @see ubrk_openRules
- * @stable ICU 2.0
- */
-U_STABLE UBreakIterator* U_EXPORT2
-ubrk_open(UBreakIteratorType type,
-      const char *locale,
-      const UChar *text,
-      int32_t textLength,
-      UErrorCode *status);
-
-/**
- * Open a new UBreakIterator for locating text boundaries using specified breaking rules.
- * The rule syntax is ... (TBD)
- * @param rules A set of rules specifying the text breaking conventions.
- * @param rulesLength The number of characters in rules, or -1 if null-terminated.
- * @param text The text to be iterated over.  May be null, in which case ubrk_setText() is
- *        used to specify the text to be iterated.
- * @param textLength The number of characters in text, or -1 if null-terminated.
- * @param parseErr   Receives position and context information for any syntax errors
- *                   detected while parsing the rules.
- * @param status A UErrorCode to receive any errors.
- * @return A UBreakIterator for the specified rules.
- * @see ubrk_open
- * @stable ICU 2.2
- */
-U_STABLE UBreakIterator* U_EXPORT2
-ubrk_openRules(const UChar     *rules,
-               int32_t         rulesLength,
-               const UChar     *text,
-               int32_t          textLength,
-               UParseError     *parseErr,
-               UErrorCode      *status);
-
-/**
- * Thread safe cloning operation
- * @param bi iterator to be cloned
- * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
- *  If buffer is not large enough, new memory will be allocated.
- *  Clients can use the U_BRK_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
- * @param pBufferSize pointer to size of allocated space.
- *  If *pBufferSize == 0, a sufficient size for use in cloning will
- *  be returned ('pre-flighting')
- *  If *pBufferSize is not enough for a stack-based safe clone,
- *  new memory will be allocated.
- * @param status to indicate whether the operation went on smoothly or there were errors
- *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
- * @return pointer to the new clone
- * @stable ICU 2.0
- */
-U_STABLE UBreakIterator * U_EXPORT2
-ubrk_safeClone(
-          const UBreakIterator *bi,
-          void *stackBuffer,
-          int32_t *pBufferSize,
-          UErrorCode *status);
-
-/**
-  * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
-  * @stable ICU 2.0
-  */
-#define U_BRK_SAFECLONE_BUFFERSIZE 512
-
-/**
-* Close a UBreakIterator.
-* Once closed, a UBreakIterator may no longer be used.
-* @param bi The break iterator to close.
- * @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2
-ubrk_close(UBreakIterator *bi);
-
-/**
- * Sets an existing iterator to point to a new piece of text
- * @param bi The iterator to use
- * @param text The text to be set
- * @param textLength The length of the text
- * @param status The error code
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ubrk_setText(UBreakIterator* bi,
-             const UChar*    text,
-             int32_t         textLength,
-             UErrorCode*     status);
-
-
-/**
- * Sets an existing iterator to point to a new piece of text
- * @param bi The iterator to use
- * @param text The text to be set.
- *             This function makes a shallow clone of the supplied UText.  This means
- *             that the caller is free to immediately close or otherwise reuse the
- *             UText that was passed as a parameter, but that the underlying text itself
- *             must not be altered while being referenced by the break iterator.
- * @param status The error code
- * @stable ICU 3.4
- */
-U_STABLE void U_EXPORT2
-ubrk_setUText(UBreakIterator* bi,
-             UText*          text,
-             UErrorCode*     status);
-
-
-
-/**
- * Determine the most recently-returned text boundary.
- *
- * @param bi The break iterator to use.
- * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
- * \ref ubrk_first, or \ref ubrk_last.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubrk_current(const UBreakIterator *bi);
-
-/**
- * Determine the text boundary following the current text boundary.
- *
- * @param bi The break iterator to use.
- * @return The character index of the next text boundary, or UBRK_DONE
- * if all text boundaries have been returned.
- * @see ubrk_previous
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubrk_next(UBreakIterator *bi);
-
-/**
- * Determine the text boundary preceding the current text boundary.
- *
- * @param bi The break iterator to use.
- * @return The character index of the preceding text boundary, or UBRK_DONE
- * if all text boundaries have been returned.
- * @see ubrk_next
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubrk_previous(UBreakIterator *bi);
-
-/**
- * Determine the index of the first character in the text being scanned.
- * This is not always the same as index 0 of the text.
- * @param bi The break iterator to use.
- * @return The character index of the first character in the text being scanned.
- * @see ubrk_last
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubrk_first(UBreakIterator *bi);
-
-/**
- * Determine the index immediately <EM>beyond</EM> the last character in the text being
- * scanned.
- * This is not the same as the last character.
- * @param bi The break iterator to use.
- * @return The character offset immediately <EM>beyond</EM> the last character in the
- * text being scanned.
- * @see ubrk_first
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubrk_last(UBreakIterator *bi);
-
-/**
- * Determine the text boundary preceding the specified offset.
- * The value returned is always smaller than offset, or UBRK_DONE.
- * @param bi The break iterator to use.
- * @param offset The offset to begin scanning.
- * @return The text boundary preceding offset, or UBRK_DONE.
- * @see ubrk_following
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubrk_preceding(UBreakIterator *bi,
-           int32_t offset);
-
-/**
- * Determine the text boundary following the specified offset.
- * The value returned is always greater than offset, or UBRK_DONE.
- * @param bi The break iterator to use.
- * @param offset The offset to begin scanning.
- * @return The text boundary following offset, or UBRK_DONE.
- * @see ubrk_preceding
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ubrk_following(UBreakIterator *bi,
-           int32_t offset);
-
-/**
-* Get a locale for which text breaking information is available.
-* A UBreakIterator in a locale returned by this function will perform the correct
-* text breaking for the locale.
-* @param index The index of the desired locale.
-* @return A locale for which number text breaking information is available, or 0 if none.
-* @see ubrk_countAvailable
-* @stable ICU 2.0
-*/
-U_STABLE const char* U_EXPORT2
-ubrk_getAvailable(int32_t index);
-
-/**
-* Determine how many locales have text breaking information available.
-* This function is most useful as determining the loop ending condition for
-* calls to \ref ubrk_getAvailable.
-* @return The number of locales for which text breaking information is available.
-* @see ubrk_getAvailable
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2
-ubrk_countAvailable(void);
-
-
-/**
-* Returns true if the specfied position is a boundary position.  As a side
-* effect, leaves the iterator pointing to the first boundary position at
-* or after "offset".
-* @param bi The break iterator to use.
-* @param offset the offset to check.
-* @return True if "offset" is a boundary position.
-* @stable ICU 2.0
-*/
-U_STABLE  UBool U_EXPORT2
-ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
-
-/**
- * Return the status from the break rule that determined the most recently
- * returned break position.  The values appear in the rule source
- * within brackets, {123}, for example.  For rules that do not specify a
- * status, a default value of 0 is returned.
- * <p>
- * For word break iterators, the possible values are defined in enum UWordBreak.
- * @stable ICU 2.2
- */
-U_STABLE  int32_t U_EXPORT2
-ubrk_getRuleStatus(UBreakIterator *bi);
-
-/**
- * Get the statuses from the break rules that determined the most recently
- * returned break position.  The values appear in the rule source
- * within brackets, {123}, for example.  The default status value for rules
- * that do not explicitly provide one is zero.
- * <p>
- * For word break iterators, the possible values are defined in enum UWordBreak.
- * @param bi        The break iterator to use
- * @param fillInVec an array to be filled in with the status values.
- * @param capacity  the length of the supplied vector.  A length of zero causes
- *                  the function to return the number of status values, in the
- *                  normal way, without attemtping to store any values.
- * @param status    receives error codes.
- * @return          The number of rule status values from rules that determined
- *                  the most recent boundary returned by the break iterator.
- * @stable ICU 3.0
- */
-U_STABLE  int32_t U_EXPORT2
-ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
-
-/**
- * Return the locale of the break iterator. You can choose between the valid and
- * the actual locale.
- * @param bi break iterator
- * @param type locale type (valid or actual)
- * @param status error code
- * @return locale string
- * @stable ICU 2.8
- */
-U_STABLE const char* U_EXPORT2
-ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
-
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/ubrk.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ubrk.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ubrk.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ubrk.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,482 @@
+/*
+******************************************************************************
+* Copyright (C) 1996-2007, International Business Machines Corporation and others.
+* All Rights Reserved.
+******************************************************************************
+*/
+
+#ifndef UBRK_H
+#define UBRK_H
+
+#include "unicode/utypes.h"
+#include "unicode/uloc.h"
+#include "unicode/utext.h"
+
+/**
+ * A text-break iterator.
+ *  For usage in C programs.
+ */
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+#   define UBRK_TYPEDEF_UBREAK_ITERATOR
+    /**
+     *  Opaque type representing an ICU Break iterator object.
+     *  @stable ICU 2.0
+     */
+    typedef void UBreakIterator;
+#endif
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/parseerr.h"
+
+/**
+ * \file
+ * \brief C API: BreakIterator
+ *
+ * <h2> BreakIterator C API </h2>
+ *
+ * The BreakIterator C API defines  methods for finding the location
+ * of boundaries in text. Pointer to a UBreakIterator maintain a
+ * current position and scan over text returning the index of characters
+ * where boundaries occur.
+ * <p>
+ * Line boundary analysis determines where a text string can be broken
+ * when line-wrapping. The mechanism correctly handles punctuation and
+ * hyphenated words.
+ * <p>
+ * Sentence boundary analysis allows selection with correct
+ * interpretation of periods within numbers and abbreviations, and
+ * trailing punctuation marks such as quotation marks and parentheses.
+ * <p>
+ * Word boundary analysis is used by search and replace functions, as
+ * well as within text editing applications that allow the user to
+ * select words with a double click. Word selection provides correct
+ * interpretation of punctuation marks within and following
+ * words. Characters that are not part of a word, such as symbols or
+ * punctuation marks, have word-breaks on both sides.
+ * <p>
+ * Character boundary analysis allows users to interact with
+ * characters as they expect to, for example, when moving the cursor
+ * through a text string. Character boundary analysis provides correct
+ * navigation of through character strings, regardless of how the
+ * character is stored.  For example, an accented character might be
+ * stored as a base character and a diacritical mark. What users
+ * consider to be a character can differ between languages.
+ * <p>
+ * Title boundary analysis locates all positions,
+ * typically starts of words, that should be set to Title Case
+ * when title casing the text.
+ * <p>
+ * The text boundary positions are found according to the rules
+ * described in Unicode Standard Annex #29, Text Boundaries, and
+ * Unicode Standard Annex #14, Line Breaking Properties.  These
+ * are available at http://www.unicode.org/reports/tr14/ and
+ * http://www.unicode.org/reports/tr29/.
+ * <p>
+ * In addition to the plain C API defined in this header file, an
+ * object oriented C++ API with equivalent functionality is defined in the
+ * file brkiter.h.
+ * <p>
+ * Code snippits illustrating the use of the Break Iterator APIs
+ * are available in the ICU User Guide,
+ * http://icu-project.org/userguide/boundaryAnalysis.html
+ * and in the sample program icu/source/samples/break/break.cpp"
+ */
+
+/** The possible types of text boundaries.  @stable ICU 2.0 */
+typedef enum UBreakIteratorType {
+  /** Character breaks  @stable ICU 2.0 */
+  UBRK_CHARACTER = 0,
+  /** Word breaks @stable ICU 2.0 */
+  UBRK_WORD = 1,
+  /** Line breaks @stable ICU 2.0 */
+  UBRK_LINE = 2,
+  /** Sentence breaks @stable ICU 2.0 */
+  UBRK_SENTENCE = 3,
+
+#ifndef U_HIDE_DEPRECATED_API
+  /**
+   * Title Case breaks
+   * The iterator created using this type locates title boundaries as described for
+   * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+   * please use Word Boundary iterator.
+   *
+   * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
+   */
+  UBRK_TITLE = 4,
+#endif /* U_HIDE_DEPRECATED_API */
+  UBRK_COUNT = 5
+} UBreakIteratorType;
+
+/** Value indicating all text boundaries have been returned.
+ *  @stable ICU 2.0
+ */
+#define UBRK_DONE ((int32_t) -1)
+
+
+/**
+ *  Enum constants for the word break tags returned by
+ *  getRuleStatus().  A range of values is defined for each category of
+ *  word, to allow for further subdivisions of a category in future releases.
+ *  Applications should check for tag values falling within the range, rather
+ *  than for single individual values.
+ *  @stable ICU 2.2
+*/
+typedef enum UWordBreak {
+    /** Tag value for "words" that do not fit into any of other categories.
+     *  Includes spaces and most punctuation. */
+    UBRK_WORD_NONE           = 0,
+    /** Upper bound for tags for uncategorized words. */
+    UBRK_WORD_NONE_LIMIT     = 100,
+    /** Tag value for words that appear to be numbers, lower limit.    */
+    UBRK_WORD_NUMBER         = 100,
+    /** Tag value for words that appear to be numbers, upper limit.    */
+    UBRK_WORD_NUMBER_LIMIT   = 200,
+    /** Tag value for words that contain letters, excluding
+     *  hiragana, katakana or ideographic characters, lower limit.    */
+    UBRK_WORD_LETTER         = 200,
+    /** Tag value for words containing letters, upper limit  */
+    UBRK_WORD_LETTER_LIMIT   = 300,
+    /** Tag value for words containing kana characters, lower limit */
+    UBRK_WORD_KANA           = 300,
+    /** Tag value for words containing kana characters, upper limit */
+    UBRK_WORD_KANA_LIMIT     = 400,
+    /** Tag value for words containing ideographic characters, lower limit */
+    UBRK_WORD_IDEO           = 400,
+    /** Tag value for words containing ideographic characters, upper limit */
+    UBRK_WORD_IDEO_LIMIT     = 500
+} UWordBreak;
+
+/**
+ *  Enum constants for the line break tags returned by getRuleStatus().
+ *  A range of values is defined for each category of
+ *  word, to allow for further subdivisions of a category in future releases.
+ *  Applications should check for tag values falling within the range, rather
+ *  than for single individual values.
+ *  @stable ICU 2.8
+*/
+typedef enum ULineBreakTag {
+    /** Tag value for soft line breaks, positions at which a line break
+      *  is acceptable but not required                */
+    UBRK_LINE_SOFT            = 0,
+    /** Upper bound for soft line breaks.              */
+    UBRK_LINE_SOFT_LIMIT      = 100,
+    /** Tag value for a hard, or mandatory line break  */
+    UBRK_LINE_HARD            = 100,
+    /** Upper bound for hard line breaks.              */
+    UBRK_LINE_HARD_LIMIT      = 200
+} ULineBreakTag;
+
+
+
+/**
+ *  Enum constants for the sentence break tags returned by getRuleStatus().
+ *  A range of values is defined for each category of
+ *  sentence, to allow for further subdivisions of a category in future releases.
+ *  Applications should check for tag values falling within the range, rather
+ *  than for single individual values.
+ *  @stable ICU 2.8
+*/
+typedef enum USentenceBreakTag {
+    /** Tag value for for sentences  ending with a sentence terminator
+      * ('.', '?', '!', etc.) character, possibly followed by a
+      * hard separator (CR, LF, PS, etc.)
+      */
+    UBRK_SENTENCE_TERM       = 0,
+    /** Upper bound for tags for sentences ended by sentence terminators.    */
+    UBRK_SENTENCE_TERM_LIMIT = 100,
+    /** Tag value for for sentences that do not contain an ending
+      * sentence terminator ('.', '?', '!', etc.) character, but
+      * are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
+      */
+    UBRK_SENTENCE_SEP        = 100,
+    /** Upper bound for tags for sentences ended by a separator.              */
+    UBRK_SENTENCE_SEP_LIMIT  = 200
+    /** Tag value for a hard, or mandatory line break  */
+} USentenceBreakTag;
+
+
+/**
+ * Open a new UBreakIterator for locating text boundaries for a specified locale.
+ * A UBreakIterator may be used for detecting character, line, word,
+ * and sentence breaks in text.
+ * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
+ * UBRK_LINE, UBRK_SENTENCE
+ * @param locale The locale specifying the text-breaking conventions.
+ * @param text The text to be iterated over.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified locale.
+ * @see ubrk_openRules
+ * @stable ICU 2.0
+ */
+U_STABLE UBreakIterator* U_EXPORT2
+ubrk_open(UBreakIteratorType type,
+      const char *locale,
+      const UChar *text,
+      int32_t textLength,
+      UErrorCode *status);
+
+/**
+ * Open a new UBreakIterator for locating text boundaries using specified breaking rules.
+ * The rule syntax is ... (TBD)
+ * @param rules A set of rules specifying the text breaking conventions.
+ * @param rulesLength The number of characters in rules, or -1 if null-terminated.
+ * @param text The text to be iterated over.  May be null, in which case ubrk_setText() is
+ *        used to specify the text to be iterated.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param parseErr   Receives position and context information for any syntax errors
+ *                   detected while parsing the rules.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified rules.
+ * @see ubrk_open
+ * @stable ICU 2.2
+ */
+U_STABLE UBreakIterator* U_EXPORT2
+ubrk_openRules(const UChar     *rules,
+               int32_t         rulesLength,
+               const UChar     *text,
+               int32_t          textLength,
+               UParseError     *parseErr,
+               UErrorCode      *status);
+
+/**
+ * Thread safe cloning operation
+ * @param bi iterator to be cloned
+ * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
+ *  If buffer is not large enough, new memory will be allocated.
+ *  Clients can use the U_BRK_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
+ * @param pBufferSize pointer to size of allocated space.
+ *  If *pBufferSize == 0, a sufficient size for use in cloning will
+ *  be returned ('pre-flighting')
+ *  If *pBufferSize is not enough for a stack-based safe clone,
+ *  new memory will be allocated.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ *  An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
+ * @return pointer to the new clone
+ * @stable ICU 2.0
+ */
+U_STABLE UBreakIterator * U_EXPORT2
+ubrk_safeClone(
+          const UBreakIterator *bi,
+          void *stackBuffer,
+          int32_t *pBufferSize,
+          UErrorCode *status);
+
+/**
+  * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
+  * @stable ICU 2.0
+  */
+#define U_BRK_SAFECLONE_BUFFERSIZE 512
+
+/**
+* Close a UBreakIterator.
+* Once closed, a UBreakIterator may no longer be used.
+* @param bi The break iterator to close.
+ * @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2
+ubrk_close(UBreakIterator *bi);
+
+/**
+ * Sets an existing iterator to point to a new piece of text
+ * @param bi The iterator to use
+ * @param text The text to be set
+ * @param textLength The length of the text
+ * @param status The error code
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ubrk_setText(UBreakIterator* bi,
+             const UChar*    text,
+             int32_t         textLength,
+             UErrorCode*     status);
+
+
+/**
+ * Sets an existing iterator to point to a new piece of text
+ * @param bi The iterator to use
+ * @param text The text to be set.
+ *             This function makes a shallow clone of the supplied UText.  This means
+ *             that the caller is free to immediately close or otherwise reuse the
+ *             UText that was passed as a parameter, but that the underlying text itself
+ *             must not be altered while being referenced by the break iterator.
+ * @param status The error code
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ubrk_setUText(UBreakIterator* bi,
+             UText*          text,
+             UErrorCode*     status);
+
+
+
+/**
+ * Determine the most recently-returned text boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
+ * \ref ubrk_first, or \ref ubrk_last.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_current(const UBreakIterator *bi);
+
+/**
+ * Determine the text boundary following the current text boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index of the next text boundary, or UBRK_DONE
+ * if all text boundaries have been returned.
+ * @see ubrk_previous
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_next(UBreakIterator *bi);
+
+/**
+ * Determine the text boundary preceding the current text boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index of the preceding text boundary, or UBRK_DONE
+ * if all text boundaries have been returned.
+ * @see ubrk_next
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_previous(UBreakIterator *bi);
+
+/**
+ * Determine the index of the first character in the text being scanned.
+ * This is not always the same as index 0 of the text.
+ * @param bi The break iterator to use.
+ * @return The character index of the first character in the text being scanned.
+ * @see ubrk_last
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_first(UBreakIterator *bi);
+
+/**
+ * Determine the index immediately <EM>beyond</EM> the last character in the text being
+ * scanned.
+ * This is not the same as the last character.
+ * @param bi The break iterator to use.
+ * @return The character offset immediately <EM>beyond</EM> the last character in the
+ * text being scanned.
+ * @see ubrk_first
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_last(UBreakIterator *bi);
+
+/**
+ * Determine the text boundary preceding the specified offset.
+ * The value returned is always smaller than offset, or UBRK_DONE.
+ * @param bi The break iterator to use.
+ * @param offset The offset to begin scanning.
+ * @return The text boundary preceding offset, or UBRK_DONE.
+ * @see ubrk_following
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_preceding(UBreakIterator *bi,
+           int32_t offset);
+
+/**
+ * Determine the text boundary following the specified offset.
+ * The value returned is always greater than offset, or UBRK_DONE.
+ * @param bi The break iterator to use.
+ * @param offset The offset to begin scanning.
+ * @return The text boundary following offset, or UBRK_DONE.
+ * @see ubrk_preceding
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ubrk_following(UBreakIterator *bi,
+           int32_t offset);
+
+/**
+* Get a locale for which text breaking information is available.
+* A UBreakIterator in a locale returned by this function will perform the correct
+* text breaking for the locale.
+* @param index The index of the desired locale.
+* @return A locale for which number text breaking information is available, or 0 if none.
+* @see ubrk_countAvailable
+* @stable ICU 2.0
+*/
+U_STABLE const char* U_EXPORT2
+ubrk_getAvailable(int32_t index);
+
+/**
+* Determine how many locales have text breaking information available.
+* This function is most useful as determining the loop ending condition for
+* calls to \ref ubrk_getAvailable.
+* @return The number of locales for which text breaking information is available.
+* @see ubrk_getAvailable
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2
+ubrk_countAvailable(void);
+
+
+/**
+* Returns true if the specfied position is a boundary position.  As a side
+* effect, leaves the iterator pointing to the first boundary position at
+* or after "offset".
+* @param bi The break iterator to use.
+* @param offset the offset to check.
+* @return True if "offset" is a boundary position.
+* @stable ICU 2.0
+*/
+U_STABLE  UBool U_EXPORT2
+ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
+
+/**
+ * Return the status from the break rule that determined the most recently
+ * returned break position.  The values appear in the rule source
+ * within brackets, {123}, for example.  For rules that do not specify a
+ * status, a default value of 0 is returned.
+ * <p>
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @stable ICU 2.2
+ */
+U_STABLE  int32_t U_EXPORT2
+ubrk_getRuleStatus(UBreakIterator *bi);
+
+/**
+ * Get the statuses from the break rules that determined the most recently
+ * returned break position.  The values appear in the rule source
+ * within brackets, {123}, for example.  The default status value for rules
+ * that do not explicitly provide one is zero.
+ * <p>
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @param bi        The break iterator to use
+ * @param fillInVec an array to be filled in with the status values.
+ * @param capacity  the length of the supplied vector.  A length of zero causes
+ *                  the function to return the number of status values, in the
+ *                  normal way, without attemtping to store any values.
+ * @param status    receives error codes.
+ * @return          The number of rule status values from rules that determined
+ *                  the most recent boundary returned by the break iterator.
+ * @stable ICU 3.0
+ */
+U_STABLE  int32_t U_EXPORT2
+ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
+
+/**
+ * Return the locale of the break iterator. You can choose between the valid and
+ * the actual locale.
+ * @param bi break iterator
+ * @param type locale type (valid or actual)
+ * @param status error code
+ * @return locale string
+ * @stable ICU 2.8
+ */
+U_STABLE const char* U_EXPORT2
+ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
+
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/ucal.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ucal.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ucal.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1161 +0,0 @@
-/*
- *******************************************************************************
- * Copyright (C) 1996-2008, International Business Machines Corporation and
- * others. All Rights Reserved.
- *******************************************************************************
- */
-
-#ifndef UCAL_H
-#define UCAL_H
-
-#include "unicode/utypes.h"
-#include "unicode/uenum.h"
-#include "unicode/uloc.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-/**
- * \file
- * \brief C API: Calendar
- *
- * <h2>Calendar C API</h2>
- *
- * UCalendar C API is used  for converting between a <code>UDate</code> object
- * and a set of integer fields such as <code>UCAL_YEAR</code>, <code>UCAL_MONTH</code>,
- * <code>UCAL_DAY</code>, <code>UCAL_HOUR</code>, and so on.
- * (A <code>UDate</code> object represents a specific instant in
- * time with millisecond precision. See UDate
- * for information about the <code>UDate</code> .)
- *
- * <p>
- * Types of <code>UCalendar</code> interpret a <code>UDate</code>
- * according to the rules of a specific calendar system. The U_STABLE
- * provides the enum UCalendarType with UCAL_TRADITIONAL and
- * UCAL_GREGORIAN.
- * <p>
- * Like other locale-sensitive C API, calendar API  provides a
- * function, <code>ucal_open()</code>, which returns a pointer to
- * <code>UCalendar</code> whose time fields have been initialized
- * with the current date and time. We need to specify the type of
- * calendar to be opened and the  timezoneId.
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * \code
- * UCalendar *caldef;
- * UChar *tzId;
- * UErrorCode status;
- * tzId=(UChar*)malloc(sizeof(UChar) * (strlen("PST") +1) );
- * u_uastrcpy(tzId, "PST");
- * caldef=ucal_open(tzID, u_strlen(tzID), NULL, UCAL_TRADITIONAL, &status);
- * \endcode
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- *
- * <p>
- * A <code>UCalendar</code> object can produce all the time field values
- * needed to implement the date-time formatting for a particular language
- * and calendar style (for example, Japanese-Gregorian, Japanese-Traditional).
- *
- * <p>
- * When computing a <code>UDate</code> from time fields, two special circumstances
- * may arise: there may be insufficient information to compute the
- * <code>UDate</code> (such as only year and month but no day in the month),
- * or there may be inconsistent information (such as "Tuesday, July 15, 1996"
- * -- July 15, 1996 is actually a Monday).
- *
- * <p>
- * <strong>Insufficient information.</strong> The calendar will use default
- * information to specify the missing fields. This may vary by calendar; for
- * the Gregorian calendar, the default for a field is the same as that of the
- * start of the epoch: i.e., UCAL_YEAR = 1970, UCAL_MONTH = JANUARY, UCAL_DATE = 1, etc.
- *
- * <p>
- * <strong>Inconsistent information.</strong> If fields conflict, the calendar
- * will give preference to fields set more recently. For example, when
- * determining the day, the calendar will look for one of the following
- * combinations of fields.  The most recent combination, as determined by the
- * most recently set single field, will be used.
- *
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * \code
- * UCAL_MONTH + UCAL_DAY_OF_MONTH
- * UCAL_MONTH + UCAL_WEEK_OF_MONTH + UCAL_DAY_OF_WEEK
- * UCAL_MONTH + UCAL_DAY_OF_WEEK_IN_MONTH + UCAL_DAY_OF_WEEK
- * UCAL_DAY_OF_YEAR
- * UCAL_DAY_OF_WEEK + UCAL_WEEK_OF_YEAR
- * \endcode
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- *
- * For the time of day:
- *
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * \code
- * UCAL_HOUR_OF_DAY
- * UCAL_AM_PM + UCAL_HOUR
- * \endcode
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- *
- * <p>
- * <strong>Note:</strong> for some non-Gregorian calendars, different
- * fields may be necessary for complete disambiguation. For example, a full
- * specification of the historial Arabic astronomical calendar requires year,
- * month, day-of-month <em>and</em> day-of-week in some cases.
- *
- * <p>
- * <strong>Note:</strong> There are certain possible ambiguities in
- * interpretation of certain singular times, which are resolved in the
- * following ways:
- * <ol>
- *     <li> 24:00:00 "belongs" to the following day. That is,
- *          23:59 on Dec 31, 1969 &lt; 24:00 on Jan 1, 1970 &lt; 24:01:00 on Jan 1, 1970
- *
- *     <li> Although historically not precise, midnight also belongs to "am",
- *          and noon belongs to "pm", so on the same day,
- *          12:00 am (midnight) &lt; 12:01 am, and 12:00 pm (noon) &lt; 12:01 pm
- * </ol>
- *
- * <p>
- * The date or time format strings are not part of the definition of a
- * calendar, as those must be modifiable or overridable by the user at
- * runtime. Use {@link DateFormat}
- * to format dates.
- *
- * <p>
- * <code>Calendar</code> provides an API for field "rolling", where fields
- * can be incremented or decremented, but wrap around. For example, rolling the
- * month up in the date <code>December 12, <b>1996</b></code> results in
- * <code>January 12, <b>1996</b></code>.
- *
- * <p>
- * <code>Calendar</code> also provides a date arithmetic function for
- * adding the specified (signed) amount of time to a particular time field.
- * For example, subtracting 5 days from the date <code>September 12, 1996</code>
- * results in <code>September 7, 1996</code>.
- *
- * @stable ICU 2.0
- */
-
-/** A calendar.
- *  For usage in C programs.
- * @stable ICU 2.0
- */
-typedef void* UCalendar;
-
-/** Possible types of UCalendars 
- * @stable ICU 2.0
- */
-enum UCalendarType {
-  /**
-   * Despite the name, UCAL_TRADITIONAL designates the locale's default calendar,
-   * which may be the Gregorian calendar or some other calendar.
-   * @stable ICU 2.0
-   */
-  UCAL_TRADITIONAL,
-  /**
-   * Unambiguously designates the Gregorian calendar for the locale.
-   * @stable ICU 2.0
-   */
-  UCAL_GREGORIAN,
-  /**
-   * A better name for UCAL_TRADITIONAL.
-   * @draft ICU 4.2
-   */
-  UCAL_DEFAULT = UCAL_TRADITIONAL
-};
-
-/** @stable ICU 2.0 */
-typedef enum UCalendarType UCalendarType;
-
-/** Possible fields in a UCalendar 
- * @stable ICU 2.0
- */
-enum UCalendarDateFields {
-  /** 
-   * Field number indicating the era, e.g., AD or BC in the Gregorian (Julian) calendar. 
-   * This is a calendar-specific value.
-   * @stable ICU 2.6 
-   */
-  UCAL_ERA,
-
-  /**
-   * Field number indicating the year. This is a calendar-specific value.
-   * @stable ICU 2.6 
-   */
-  UCAL_YEAR,
-
-  /**
-   * Field number indicating the month. This is a calendar-specific value. 
-   * The first month of the year is
-   * <code>JANUARY</code>; the last depends on the number of months in a year.
-   * @see #UCAL_JANUARY
-   * @see #UCAL_FEBRUARY
-   * @see #UCAL_MARCH
-   * @see #UCAL_APRIL
-   * @see #UCAL_MAY
-   * @see #UCAL_JUNE
-   * @see #UCAL_JULY
-   * @see #UCAL_AUGUST
-   * @see #UCAL_SEPTEMBER
-   * @see #UCAL_OCTOBER
-   * @see #UCAL_NOVEMBER
-   * @see #UCAL_DECEMBER
-   * @see #UCAL_UNDECIMBER
-   * @stable ICU 2.6 
-   */
-  UCAL_MONTH,
-
-  /**
-   * Field number indicating the
-   * week number within the current year.  The first week of the year, as
-   * defined by <code>UCAL_FIRST_DAY_OF_WEEK</code> and <code>UCAL_MINIMAL_DAYS_IN_FIRST_WEEK</code>
-   * attributes, has value 1.  Subclasses define
-   * the value of <code>UCAL_WEEK_OF_YEAR</code> for days before the first week of
-   * the year.
-   * @see ucal_getAttribute
-   * @see ucal_setAttribute
-   * @stable ICU 2.6 
-   */
-  UCAL_WEEK_OF_YEAR,
-
- /**
-   * Field number indicating the
-   * week number within the current month.  The first week of the month, as
-   * defined by <code>UCAL_FIRST_DAY_OF_WEEK</code> and <code>UCAL_MINIMAL_DAYS_IN_FIRST_WEEK</code>
-   * attributes, has value 1.  Subclasses define
-   * the value of <code>WEEK_OF_MONTH</code> for days before the first week of
-   * the month.
-   * @see ucal_getAttribute
-   * @see ucal_setAttribute
-   * @see #UCAL_FIRST_DAY_OF_WEEK
-   * @see #UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
-   * @stable ICU 2.6 
-   */
-  UCAL_WEEK_OF_MONTH,
-
- /**
-   * Field number indicating the
-   * day of the month. This is a synonym for <code>DAY_OF_MONTH</code>.
-   * The first day of the month has value 1.
-   * @see #UCAL_DAY_OF_MONTH
-   * @stable ICU 2.6 
-   */
-  UCAL_DATE,
-
- /**
-   * Field number indicating the day
-   * number within the current year.  The first day of the year has value 1.
-   * @stable ICU 2.6 
-   */
-  UCAL_DAY_OF_YEAR,
-
- /**
-   * Field number indicating the day
-   * of the week.  This field takes values <code>SUNDAY</code>,
-   * <code>MONDAY</code>, <code>TUESDAY</code>, <code>WEDNESDAY</code>,
-   * <code>THURSDAY</code>, <code>FRIDAY</code>, and <code>SATURDAY</code>.
-   * @see #UCAL_SUNDAY
-   * @see #UCAL_MONDAY
-   * @see #UCAL_TUESDAY
-   * @see #UCAL_WEDNESDAY
-   * @see #UCAL_THURSDAY
-   * @see #UCAL_FRIDAY
-   * @see #UCAL_SATURDAY
-   * @stable ICU 2.6 
-   */
-  UCAL_DAY_OF_WEEK,
-
- /**
-   * Field number indicating the
-   * ordinal number of the day of the week within the current month. Together
-   * with the <code>DAY_OF_WEEK</code> field, this uniquely specifies a day
-   * within a month.  Unlike <code>WEEK_OF_MONTH</code> and
-   * <code>WEEK_OF_YEAR</code>, this field's value does <em>not</em> depend on
-   * <code>getFirstDayOfWeek()</code> or
-   * <code>getMinimalDaysInFirstWeek()</code>.  <code>DAY_OF_MONTH 1</code>
-   * through <code>7</code> always correspond to <code>DAY_OF_WEEK_IN_MONTH
-   * 1</code>; <code>8</code> through <code>15</code> correspond to
-   * <code>DAY_OF_WEEK_IN_MONTH 2</code>, and so on.
-   * <code>DAY_OF_WEEK_IN_MONTH 0</code> indicates the week before
-   * <code>DAY_OF_WEEK_IN_MONTH 1</code>.  Negative values count back from the
-   * end of the month, so the last Sunday of a month is specified as
-   * <code>DAY_OF_WEEK = SUNDAY, DAY_OF_WEEK_IN_MONTH = -1</code>.  Because
-   * negative values count backward they will usually be aligned differently
-   * within the month than positive values.  For example, if a month has 31
-   * days, <code>DAY_OF_WEEK_IN_MONTH -1</code> will overlap
-   * <code>DAY_OF_WEEK_IN_MONTH 5</code> and the end of <code>4</code>.
-   * @see #UCAL_DAY_OF_WEEK
-   * @see #UCAL_WEEK_OF_MONTH
-   * @stable ICU 2.6 
-   */
-  UCAL_DAY_OF_WEEK_IN_MONTH,
-
- /**
-   * Field number indicating
-   * whether the <code>HOUR</code> is before or after noon.
-   * E.g., at 10:04:15.250 PM the <code>AM_PM</code> is <code>PM</code>.
-   * @see #UCAL_AM
-   * @see #UCAL_PM
-   * @see #UCAL_HOUR
-   * @stable ICU 2.6 
-   */
-  UCAL_AM_PM,
-
- /**
-   * Field number indicating the
-   * hour of the morning or afternoon. <code>HOUR</code> is used for the 12-hour
-   * clock.
-   * E.g., at 10:04:15.250 PM the <code>HOUR</code> is 10.
-   * @see #UCAL_AM_PM
-   * @see #UCAL_HOUR_OF_DAY
-   * @stable ICU 2.6 
-   */
-  UCAL_HOUR,
-
- /**
-   * Field number indicating the
-   * hour of the day. <code>HOUR_OF_DAY</code> is used for the 24-hour clock.
-   * E.g., at 10:04:15.250 PM the <code>HOUR_OF_DAY</code> is 22.
-   * @see #UCAL_HOUR
-   * @stable ICU 2.6 
-   */
-  UCAL_HOUR_OF_DAY,
-
- /**
-   * Field number indicating the
-   * minute within the hour.
-   * E.g., at 10:04:15.250 PM the <code>UCAL_MINUTE</code> is 4.
-   * @stable ICU 2.6 
-   */
-  UCAL_MINUTE,
-
- /**
-   * Field number indicating the
-   * second within the minute.
-   * E.g., at 10:04:15.250 PM the <code>UCAL_SECOND</code> is 15.
-   * @stable ICU 2.6 
-   */
-  UCAL_SECOND,
-
- /**
-   * Field number indicating the
-   * millisecond within the second.
-   * E.g., at 10:04:15.250 PM the <code>UCAL_MILLISECOND</code> is 250.
-   * @stable ICU 2.6 
-   */
-  UCAL_MILLISECOND,
-
- /**
-   * Field number indicating the
-   * raw offset from GMT in milliseconds.
-   * @stable ICU 2.6 
-   */
-  UCAL_ZONE_OFFSET,
-
- /**
-   * Field number indicating the
-   * daylight savings offset in milliseconds.
-   * @stable ICU 2.6 
-   */
-  UCAL_DST_OFFSET,
-  
- /**
-   * Field number 
-   * indicating the extended year corresponding to the
-   * <code>UCAL_WEEK_OF_YEAR</code> field.  This may be one greater or less
-   * than the value of <code>UCAL_EXTENDED_YEAR</code>.
-   * @stable ICU 2.6
-   */
-  UCAL_YEAR_WOY,
-
- /**
-   * Field number 
-   * indicating the localized day of week.  This will be a value from 1
-   * to 7 inclusive, with 1 being the localized first day of the week.
-   * @stable ICU 2.6
-   */
-  UCAL_DOW_LOCAL,
-
-  /**
-   * Year of this calendar system, encompassing all supra-year fields. For example, 
-   * in Gregorian/Julian calendars, positive Extended Year values indicate years AD,
-   *  1 BC = 0 extended, 2 BC = -1 extended, and so on. 
-   * @stable ICU 2.8 
-   */
-  UCAL_EXTENDED_YEAR,
-
- /**
-   * Field number 
-   * indicating the modified Julian day number.  This is different from
-   * the conventional Julian day number in two regards.  First, it
-   * demarcates days at local zone midnight, rather than noon GMT.
-   * Second, it is a local number; that is, it depends on the local time
-   * zone.  It can be thought of as a single number that encompasses all
-   * the date-related fields.
-   * @stable ICU 2.8
-   */
-  UCAL_JULIAN_DAY, 
-
-  /**
-   * Ranges from 0 to 23:59:59.999 (regardless of DST).  This field behaves <em>exactly</em> 
-   * like a composite of all time-related fields, not including the zone fields.  As such, 
-   * it also reflects discontinuities of those fields on DST transition days.  On a day
-   * of DST onset, it will jump forward.  On a day of DST cessation, it will jump 
-   * backward.  This reflects the fact that it must be combined with the DST_OFFSET field
-   * to obtain a unique local time value.
-   * @stable ICU 2.8
-   */
-  UCAL_MILLISECONDS_IN_DAY,
-
-  /**
-   * Whether or not the current month is a leap month (0 or 1). See the Chinese calendar for
-   * an example of this.
-   */
-  UCAL_IS_LEAP_MONTH,
-  
-  /**
-   * Field count
-   * @stable ICU 2.6
-   */
-  UCAL_FIELD_COUNT,
-
- /**
-   * Field number indicating the
-   * day of the month. This is a synonym for <code>UCAL_DATE</code>.
-   * The first day of the month has value 1.
-   * @see #UCAL_DATE
-   * Synonym for UCAL_DATE
-   * @stable ICU 2.8
-   **/
-  UCAL_DAY_OF_MONTH=UCAL_DATE
-};
-
-/** @stable ICU 2.0 */
-typedef enum UCalendarDateFields UCalendarDateFields;
-    /**
-     * Useful constant for days of week. Note: Calendar day-of-week is 1-based. Clients
-     * who create locale resources for the field of first-day-of-week should be aware of
-     * this. For instance, in US locale, first-day-of-week is set to 1, i.e., UCAL_SUNDAY.
-     */
-/** Possible days of the week in a UCalendar 
- * @stable ICU 2.0
- */
-enum UCalendarDaysOfWeek {
-  /** Sunday */
-  UCAL_SUNDAY = 1,
-  /** Monday */
-  UCAL_MONDAY,
-  /** Tuesday */
-  UCAL_TUESDAY,
-  /** Wednesday */
-  UCAL_WEDNESDAY,
-  /** Thursday */
-  UCAL_THURSDAY,
-  /** Friday */
-  UCAL_FRIDAY,
-  /** Saturday */
-  UCAL_SATURDAY
-};
-
-/** @stable ICU 2.0 */
-typedef enum UCalendarDaysOfWeek UCalendarDaysOfWeek;
-
-/** Possible months in a UCalendar. Note: Calendar month is 0-based.
- * @stable ICU 2.0
- */
-enum UCalendarMonths {
-  /** January */
-  UCAL_JANUARY,
-  /** February */
-  UCAL_FEBRUARY,
-  /** March */
-  UCAL_MARCH,
-  /** April */
-  UCAL_APRIL,
-  /** May */
-  UCAL_MAY,
-  /** June */
-  UCAL_JUNE,
-  /** July */
-  UCAL_JULY,
-  /** August */
-  UCAL_AUGUST,
-  /** September */
-  UCAL_SEPTEMBER,
-  /** October */
-  UCAL_OCTOBER,
-  /** November */
-  UCAL_NOVEMBER,
-  /** December */
-  UCAL_DECEMBER,
-  /** Value of the <code>UCAL_MONTH</code> field indicating the
-    * thirteenth month of the year. Although the Gregorian calendar
-    * does not use this value, lunar calendars do.
-    */
-  UCAL_UNDECIMBER
-};
-
-/** @stable ICU 2.0 */
-typedef enum UCalendarMonths UCalendarMonths;
-
-/** Possible AM/PM values in a UCalendar 
- * @stable ICU 2.0
- */
-enum UCalendarAMPMs {
-    /** AM */
-  UCAL_AM,
-  /** PM */
-  UCAL_PM
-};
-
-/** @stable ICU 2.0 */
-typedef enum UCalendarAMPMs UCalendarAMPMs;
-
-/**
- * Create an enumeration over all time zones.
- *
- * @param ec input/output error code
- *
- * @return an enumeration object that the caller must dispose of using
- * uenum_close(), or NULL upon failure. In case of failure *ec will
- * indicate the error.
- *
- * @stable ICU 2.6
- */
-U_STABLE UEnumeration* U_EXPORT2
-ucal_openTimeZones(UErrorCode* ec);
-
-/**
- * Create an enumeration over all time zones associated with the given
- * country. Some zones are affiliated with no country (e.g., "UTC");
- * these may also be retrieved, as a group.
- *
- * @param country the ISO 3166 two-letter country code, or NULL to
- * retrieve zones not affiliated with any country
- *
- * @param ec input/output error code
- *
- * @return an enumeration object that the caller must dispose of using
- * uenum_close(), or NULL upon failure. In case of failure *ec will
- * indicate the error.
- *
- * @stable ICU 2.6
- */
-U_STABLE UEnumeration* U_EXPORT2
-ucal_openCountryTimeZones(const char* country, UErrorCode* ec);
-
-/**
- * Return the default time zone. The default is determined initially
- * by querying the host operating system. It may be changed with
- * ucal_setDefaultTimeZone() or with the C++ TimeZone API.
- *
- * @param result A buffer to receive the result, or NULL
- *
- * @param resultCapacity The capacity of the result buffer
- *
- * @param ec input/output error code
- *
- * @return The result string length, not including the terminating
- * null
- *
- * @stable ICU 2.6
- */
-U_STABLE int32_t U_EXPORT2
-ucal_getDefaultTimeZone(UChar* result, int32_t resultCapacity, UErrorCode* ec);
-
-/**
- * Set the default time zone.
- *
- * @param zoneID null-terminated time zone ID
- *
- * @param ec input/output error code
- *
- * @stable ICU 2.6
- */
-U_STABLE void U_EXPORT2
-ucal_setDefaultTimeZone(const UChar* zoneID, UErrorCode* ec);
-
-/**
- * Return the amount of time in milliseconds that the clock is
- * advanced during daylight savings time for the given time zone, or
- * zero if the time zone does not observe daylight savings time.
- *
- * @param zoneID null-terminated time zone ID
- *
- * @param ec input/output error code
- *
- * @return the number of milliseconds the time is advanced with
- * respect to standard time when the daylight savings rules are in
- * effect. This is always a non-negative number, most commonly either
- * 3,600,000 (one hour) or zero.
- *
- * @stable ICU 2.6
- */
-U_STABLE int32_t U_EXPORT2
-ucal_getDSTSavings(const UChar* zoneID, UErrorCode* ec);
-
-/**
- * Get the current date and time.
- * The value returned is represented as milliseconds from the epoch.
- * @return The current date and time.
- * @stable ICU 2.0
- */
-U_STABLE UDate U_EXPORT2 
-ucal_getNow(void);
-
-/**
- * Open a UCalendar.
- * A UCalendar may be used to convert a millisecond value to a year,
- * month, and day.
- * @param zoneID The desired TimeZone ID.  If 0, use the default time zone.
- * @param len The length of zoneID, or -1 if null-terminated.
- * @param locale The desired locale
- * @param type The type of UCalendar to open. This can be UCAL_GREGORIAN to open the Gregorian
- * calendar for the locale, or UCAL_DEFAULT to open the default calendar for the locale (the
- * default calendar may also be Gregorian). To open a specific non-Gregorian calendar for the
- * locale, use uloc_setKeywordValue to set the value of the calendar keyword for the locale
- * and then pass the locale to ucal_open with UCAL_DEFAULT as the type.
- * @param status A pointer to an UErrorCode to receive any errors
- * @return A pointer to a UCalendar, or 0 if an error occurred.
- * @stable ICU 2.0
- */
-U_STABLE UCalendar* U_EXPORT2 
-ucal_open(const UChar*   zoneID,
-          int32_t        len,
-          const char*    locale,
-          UCalendarType  type,
-          UErrorCode*    status);
-
-/**
- * Close a UCalendar.
- * Once closed, a UCalendar may no longer be used.
- * @param cal The UCalendar to close.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucal_close(UCalendar *cal);
-
-/**
- * Open a copy of a UCalendar.
- * This function performs a deep copy.
- * @param cal The calendar to copy
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return A pointer to a UCalendar identical to cal.
- * @draft ICU 4.0
- */
-U_DRAFT UCalendar* U_EXPORT2 
-ucal_clone(const UCalendar* cal,
-           UErrorCode*      status);
-
-/**
- * Set the TimeZone used by a UCalendar.
- * A UCalendar uses a timezone for converting from Greenwich time to local time.
- * @param cal The UCalendar to set.
- * @param zoneID The desired TimeZone ID.  If 0, use the default time zone.
- * @param len The length of zoneID, or -1 if null-terminated.
- * @param status A pointer to an UErrorCode to receive any errors.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucal_setTimeZone(UCalendar*    cal,
-                 const UChar*  zoneID,
-                 int32_t       len,
-                 UErrorCode*   status);
-
-/**
- * Possible formats for a UCalendar's display name 
- * @stable ICU 2.0
- */
-enum UCalendarDisplayNameType {
-  /** Standard display name */
-  UCAL_STANDARD,
-  /** Short standard display name */
-  UCAL_SHORT_STANDARD,
-  /** Daylight savings display name */
-  UCAL_DST,
-  /** Short daylight savings display name */
-  UCAL_SHORT_DST
-};
-
-/** @stable ICU 2.0 */
-typedef enum UCalendarDisplayNameType UCalendarDisplayNameType;
-
-/**
- * Get the display name for a UCalendar's TimeZone.
- * A display name is suitable for presentation to a user.
- * @param cal          The UCalendar to query.
- * @param type         The desired display name format; one of UCAL_STANDARD, UCAL_SHORT_STANDARD,
- *                     UCAL_DST, UCAL_SHORT_DST
- * @param locale       The desired locale for the display name.
- * @param result       A pointer to a buffer to receive the formatted number.
- * @param resultLength The maximum size of result.
- * @param status       A pointer to an UErrorCode to receive any errors
- * @return             The total buffer size needed; if greater than resultLength, the output was truncated.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucal_getTimeZoneDisplayName(const UCalendar*          cal,
-                            UCalendarDisplayNameType  type,
-                            const char*               locale,
-                            UChar*                    result,
-                            int32_t                   resultLength,
-                            UErrorCode*               status);
-
-/**
- * Determine if a UCalendar is currently in daylight savings time.
- * Daylight savings time is not used in all parts of the world.
- * @param cal The UCalendar to query.
- * @param status A pointer to an UErrorCode to receive any errors
- * @return TRUE if cal is currently in daylight savings time, FALSE otherwise
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2 
-ucal_inDaylightTime(const UCalendar*  cal,
-                    UErrorCode*       status );
-
-/**
- * Sets the GregorianCalendar change date. This is the point when the switch from
- * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October
- * 15, 1582. Previous to this time and date will be Julian dates.
- *
- * This function works only for Gregorian calendars. If the UCalendar is not
- * an instance of a Gregorian calendar, then a U_UNSUPPORTED_ERROR
- * error code is set.
- *
- * @param cal        The calendar object.
- * @param date       The given Gregorian cutover date.
- * @param pErrorCode Pointer to a standard ICU error code. Its input value must
- *                   pass the U_SUCCESS() test, or else the function returns
- *                   immediately. Check for U_FAILURE() on output or use with
- *                   function chaining. (See User Guide for details.)
- *
- * @see GregorianCalendar::setGregorianChange
- * @see ucal_getGregorianChange
- * @stable ICU 3.6
- */
-U_STABLE void U_EXPORT2
-ucal_setGregorianChange(UCalendar *cal, UDate date, UErrorCode *pErrorCode);
-
-/**
- * Gets the Gregorian Calendar change date. This is the point when the switch from
- * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October
- * 15, 1582. Previous to this time and date will be Julian dates.
- *
- * This function works only for Gregorian calendars. If the UCalendar is not
- * an instance of a Gregorian calendar, then a U_UNSUPPORTED_ERROR
- * error code is set.
- *
- * @param cal        The calendar object.
- * @param pErrorCode Pointer to a standard ICU error code. Its input value must
- *                   pass the U_SUCCESS() test, or else the function returns
- *                   immediately. Check for U_FAILURE() on output or use with
- *                   function chaining. (See User Guide for details.)
- * @return   The Gregorian cutover time for this calendar.
- *
- * @see GregorianCalendar::getGregorianChange
- * @see ucal_setGregorianChange
- * @stable ICU 3.6
- */
-U_STABLE UDate U_EXPORT2
-ucal_getGregorianChange(const UCalendar *cal, UErrorCode *pErrorCode);
-
-/**
- * Types of UCalendar attributes 
- * @stable ICU 2.0
- */
-enum UCalendarAttribute {
-    /** Lenient parsing */
-  UCAL_LENIENT,
-  /** First day of week */
-  UCAL_FIRST_DAY_OF_WEEK,
-  /** Minimum number of days in first week */
-  UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
-};
-
-/** @stable ICU 2.0 */
-typedef enum UCalendarAttribute UCalendarAttribute;
-
-/**
- * Get a numeric attribute associated with a UCalendar.
- * Numeric attributes include the first day of the week, or the minimal numbers
- * of days in the first week of the month.
- * @param cal The UCalendar to query.
- * @param attr The desired attribute; one of UCAL_LENIENT, UCAL_FIRST_DAY_OF_WEEK,
- * or UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
- * @return The value of attr.
- * @see ucal_setAttribute
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucal_getAttribute(const UCalendar*    cal,
-                  UCalendarAttribute  attr);
-
-/**
- * Set a numeric attribute associated with a UCalendar.
- * Numeric attributes include the first day of the week, or the minimal numbers
- * of days in the first week of the month.
- * @param cal The UCalendar to set.
- * @param attr The desired attribute; one of UCAL_LENIENT, UCAL_FIRST_DAY_OF_WEEK,
- * or UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
- * @param newValue The new value of attr.
- * @see ucal_getAttribute
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucal_setAttribute(UCalendar*          cal,
-                  UCalendarAttribute  attr,
-                  int32_t             newValue);
-
-/**
- * Get a locale for which calendars are available.
- * A UCalendar in a locale returned by this function will contain the correct
- * day and month names for the locale.
- * @param index The index of the desired locale.
- * @return A locale for which calendars are available, or 0 if none.
- * @see ucal_countAvailable
- * @stable ICU 2.0
- */
-U_STABLE const char* U_EXPORT2 
-ucal_getAvailable(int32_t index);
-
-/**
- * Determine how many locales have calendars available.
- * This function is most useful as determining the loop ending condition for
- * calls to \ref ucal_getAvailable.
- * @return The number of locales for which calendars are available.
- * @see ucal_getAvailable
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucal_countAvailable(void);
-
-/**
- * Get a UCalendar's current time in millis.
- * The time is represented as milliseconds from the epoch.
- * @param cal The UCalendar to query.
- * @param status A pointer to an UErrorCode to receive any errors
- * @return The calendar's current time in millis.
- * @see ucal_setMillis
- * @see ucal_setDate
- * @see ucal_setDateTime
- * @stable ICU 2.0
- */
-U_STABLE UDate U_EXPORT2 
-ucal_getMillis(const UCalendar*  cal,
-               UErrorCode*       status);
-
-/**
- * Set a UCalendar's current time in millis.
- * The time is represented as milliseconds from the epoch.
- * @param cal The UCalendar to set.
- * @param dateTime The desired date and time.
- * @param status A pointer to an UErrorCode to receive any errors
- * @see ucal_getMillis
- * @see ucal_setDate
- * @see ucal_setDateTime
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucal_setMillis(UCalendar*   cal,
-               UDate        dateTime,
-               UErrorCode*  status );
-
-/**
- * Set a UCalendar's current date.
- * The date is represented as a series of 32-bit integers.
- * @param cal The UCalendar to set.
- * @param year The desired year.
- * @param month The desired month; one of UCAL_JANUARY, UCAL_FEBRUARY, UCAL_MARCH, UCAL_APRIL, UCAL_MAY,
- * UCAL_JUNE, UCAL_JULY, UCAL_AUGUST, UCAL_SEPTEMBER, UCAL_OCTOBER, UCAL_NOVEMBER, UCAL_DECEMBER, UCAL_UNDECIMBER
- * @param date The desired day of the month.
- * @param status A pointer to an UErrorCode to receive any errors
- * @see ucal_getMillis
- * @see ucal_setMillis
- * @see ucal_setDateTime
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucal_setDate(UCalendar*   cal,
-             int32_t      year,
-             int32_t      month,
-             int32_t      date,
-             UErrorCode*  status);
-
-/**
- * Set a UCalendar's current date.
- * The date is represented as a series of 32-bit integers.
- * @param cal The UCalendar to set.
- * @param year The desired year.
- * @param month The desired month; one of UCAL_JANUARY, UCAL_FEBRUARY, UCAL_MARCH, UCAL_APRIL, UCAL_MAY,
- * UCAL_JUNE, UCAL_JULY, UCAL_AUGUST, UCAL_SEPTEMBER, UCAL_OCTOBER, UCAL_NOVEMBER, UCAL_DECEMBER, UCAL_UNDECIMBER
- * @param date The desired day of the month.
- * @param hour The desired hour of day.
- * @param minute The desired minute.
- * @param second The desirec second.
- * @param status A pointer to an UErrorCode to receive any errors
- * @see ucal_getMillis
- * @see ucal_setMillis
- * @see ucal_setDate
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucal_setDateTime(UCalendar*   cal,
-                 int32_t      year,
-                 int32_t      month,
-                 int32_t      date,
-                 int32_t      hour,
-                 int32_t      minute,
-                 int32_t      second,
-                 UErrorCode*  status);
-
-/**
- * Returns TRUE if two UCalendars are equivalent.  Equivalent
- * UCalendars will behave identically, but they may be set to
- * different times.
- * @param cal1 The first of the UCalendars to compare.
- * @param cal2 The second of the UCalendars to compare.
- * @return TRUE if cal1 and cal2 are equivalent, FALSE otherwise.
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2 
-ucal_equivalentTo(const UCalendar*  cal1,
-                  const UCalendar*  cal2);
-
-/**
- * Add a specified signed amount to a particular field in a UCalendar.
- * This can modify more significant fields in the calendar.
- * @param cal The UCalendar to which to add.
- * @param field The field to which to add the signed value; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
- * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
- * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
- * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
- * @param amount The signed amount to add to field. If the amount causes the value
- * to exceed to maximum or minimum values for that field, other fields are modified
- * to preserve the magnitude of the change.
- * @param status A pointer to an UErrorCode to receive any errors
- * @see ucal_roll
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucal_add(UCalendar*           cal,
-         UCalendarDateFields  field,
-         int32_t              amount,
-         UErrorCode*          status);
-
-/**
- * Add a specified signed amount to a particular field in a UCalendar.
- * This will not modify more significant fields in the calendar.
- * @param cal The UCalendar to which to add.
- * @param field The field to which to add the signed value; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
- * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
- * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
- * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
- * @param amount The signed amount to add to field. If the amount causes the value
- * to exceed to maximum or minimum values for that field, the field is pinned to a permissible
- * value.
- * @param status A pointer to an UErrorCode to receive any errors
- * @see ucal_add
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucal_roll(UCalendar*           cal,
-          UCalendarDateFields  field,
-          int32_t              amount,
-          UErrorCode*          status);
-
-/**
- * Get the current value of a field from a UCalendar.
- * All fields are represented as 32-bit integers.
- * @param cal The UCalendar to query.
- * @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
- * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
- * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
- * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
- * @param status A pointer to an UErrorCode to receive any errors
- * @return The value of the desired field.
- * @see ucal_set
- * @see ucal_isSet
- * @see ucal_clearField
- * @see ucal_clear
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucal_get(const UCalendar*     cal,
-         UCalendarDateFields  field,
-         UErrorCode*          status );
-
-/**
- * Set the value of a field in a UCalendar.
- * All fields are represented as 32-bit integers.
- * @param cal The UCalendar to set.
- * @param field The field to set; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
- * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
- * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
- * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
- * @param value The desired value of field.
- * @see ucal_get
- * @see ucal_isSet
- * @see ucal_clearField
- * @see ucal_clear
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucal_set(UCalendar*           cal,
-         UCalendarDateFields  field,
-         int32_t              value);
-
-/**
- * Determine if a field in a UCalendar is set.
- * All fields are represented as 32-bit integers.
- * @param cal The UCalendar to query.
- * @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
- * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
- * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
- * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
- * @return TRUE if field is set, FALSE otherwise.
- * @see ucal_get
- * @see ucal_set
- * @see ucal_clearField
- * @see ucal_clear
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2 
-ucal_isSet(const UCalendar*     cal,
-           UCalendarDateFields  field);
-
-/**
- * Clear a field in a UCalendar.
- * All fields are represented as 32-bit integers.
- * @param cal The UCalendar containing the field to clear.
- * @param field The field to clear; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
- * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
- * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
- * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
- * @see ucal_get
- * @see ucal_set
- * @see ucal_isSet
- * @see ucal_clear
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucal_clearField(UCalendar*           cal,
-                UCalendarDateFields  field);
-
-/**
- * Clear all fields in a UCalendar.
- * All fields are represented as 32-bit integers.
- * @param calendar The UCalendar to clear.
- * @see ucal_get
- * @see ucal_set
- * @see ucal_isSet
- * @see ucal_clearField
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucal_clear(UCalendar* calendar);
-
-/**
- * Possible limit values for a UCalendar 
- * @stable ICU 2.0
- */
-enum UCalendarLimitType {
-  /** Minimum value */
-  UCAL_MINIMUM,
-  /** Maximum value */
-  UCAL_MAXIMUM,
-  /** Greatest minimum value */
-  UCAL_GREATEST_MINIMUM,
-  /** Leaest maximum value */
-  UCAL_LEAST_MAXIMUM,
-  /** Actual minimum value */
-  UCAL_ACTUAL_MINIMUM,
-  /** Actual maximum value */
-  UCAL_ACTUAL_MAXIMUM
-};
-
-/** @stable ICU 2.0 */
-typedef enum UCalendarLimitType UCalendarLimitType;
-
-/**
- * Determine a limit for a field in a UCalendar.
- * A limit is a maximum or minimum value for a field.
- * @param cal The UCalendar to query.
- * @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
- * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
- * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
- * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
- * @param type The desired critical point; one of UCAL_MINIMUM, UCAL_MAXIMUM, UCAL_GREATEST_MINIMUM,
- * UCAL_LEAST_MAXIMUM, UCAL_ACTUAL_MINIMUM, UCAL_ACTUAL_MAXIMUM
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return The requested value.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucal_getLimit(const UCalendar*     cal,
-              UCalendarDateFields  field,
-              UCalendarLimitType   type,
-              UErrorCode*          status);
-
-/** Get the locale for this calendar object. You can choose between valid and actual locale.
- *  @param cal The calendar object
- *  @param type type of the locale we're looking for (valid or actual) 
- *  @param status error code for the operation
- *  @return the locale name
- *  @stable ICU 2.8
- */
-U_STABLE const char * U_EXPORT2
-ucal_getLocaleByType(const UCalendar *cal, ULocDataLocaleType type, UErrorCode* status);
-
-/**
- * Returns the timezone data version currently used by ICU.
- * @param status error code for the operation
- * @return the version string, such as "2007f"
- * @stable ICU 3.8
- */
-U_DRAFT const char * U_EXPORT2
-ucal_getTZDataVersion(UErrorCode* status);
-
-/**
- * Returns the canonical system timezone ID or the normalized
- * custom time zone ID for the given time zone ID.
- * @param id        The input timezone ID to be canonicalized.
- * @param len       The length of id, or -1 if null-terminated.
- * @param result    The buffer receives the canonical system timezone ID
- *                  or the custom timezone ID in normalized format.
- * @param resultCapacity    The capacity of the result buffer.
- * @param isSystemID        Receives if the given ID is a known system
-     *                      timezone ID.
- * @param status    Recevies the status.  When the given timezone ID
- *                  is neither a known system time zone ID nor a
- *                  valid custom timezone ID, U_ILLEGAL_ARGUMENT_ERROR
- *                  is set.
- * @return          The result string length, not including the terminating
- *                  null.
- * @draft ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-ucal_getCanonicalTimeZoneID(const UChar* id, int32_t len,
-                            UChar* result, int32_t resultCapacity, UBool *isSystemID, UErrorCode* status);
-/**
- * Get the resource keyword value string designating the calendar type for the UCalendar.
- * @param cal The UCalendar to query.
- * @param status The error code for the operation.
- * @return The resource keyword value string.
- * @draft ICU 4.2
- */
-U_DRAFT const char * U_EXPORT2
-ucal_getType(const UCalendar *cal, UErrorCode* status);
-
-/**
- * The following is a temporary Apple-specific API to help InternationalPrefs
- * transition to the updated version of the above ICU API. It will be removed soon.
- */
-U_DRAFT const char * U_EXPORT2
-ucal_getTypeWithError(const UCalendar *cal, UErrorCode* status);
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/ucal.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ucal.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ucal.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ucal.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1161 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2008, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+
+#ifndef UCAL_H
+#define UCAL_H
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+#include "unicode/uloc.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * \file
+ * \brief C API: Calendar
+ *
+ * <h2>Calendar C API</h2>
+ *
+ * UCalendar C API is used  for converting between a <code>UDate</code> object
+ * and a set of integer fields such as <code>UCAL_YEAR</code>, <code>UCAL_MONTH</code>,
+ * <code>UCAL_DAY</code>, <code>UCAL_HOUR</code>, and so on.
+ * (A <code>UDate</code> object represents a specific instant in
+ * time with millisecond precision. See UDate
+ * for information about the <code>UDate</code> .)
+ *
+ * <p>
+ * Types of <code>UCalendar</code> interpret a <code>UDate</code>
+ * according to the rules of a specific calendar system. The U_STABLE
+ * provides the enum UCalendarType with UCAL_TRADITIONAL and
+ * UCAL_GREGORIAN.
+ * <p>
+ * Like other locale-sensitive C API, calendar API  provides a
+ * function, <code>ucal_open()</code>, which returns a pointer to
+ * <code>UCalendar</code> whose time fields have been initialized
+ * with the current date and time. We need to specify the type of
+ * calendar to be opened and the  timezoneId.
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * UCalendar *caldef;
+ * UChar *tzId;
+ * UErrorCode status;
+ * tzId=(UChar*)malloc(sizeof(UChar) * (strlen("PST") +1) );
+ * u_uastrcpy(tzId, "PST");
+ * caldef=ucal_open(tzID, u_strlen(tzID), NULL, UCAL_TRADITIONAL, &status);
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <p>
+ * A <code>UCalendar</code> object can produce all the time field values
+ * needed to implement the date-time formatting for a particular language
+ * and calendar style (for example, Japanese-Gregorian, Japanese-Traditional).
+ *
+ * <p>
+ * When computing a <code>UDate</code> from time fields, two special circumstances
+ * may arise: there may be insufficient information to compute the
+ * <code>UDate</code> (such as only year and month but no day in the month),
+ * or there may be inconsistent information (such as "Tuesday, July 15, 1996"
+ * -- July 15, 1996 is actually a Monday).
+ *
+ * <p>
+ * <strong>Insufficient information.</strong> The calendar will use default
+ * information to specify the missing fields. This may vary by calendar; for
+ * the Gregorian calendar, the default for a field is the same as that of the
+ * start of the epoch: i.e., UCAL_YEAR = 1970, UCAL_MONTH = JANUARY, UCAL_DATE = 1, etc.
+ *
+ * <p>
+ * <strong>Inconsistent information.</strong> If fields conflict, the calendar
+ * will give preference to fields set more recently. For example, when
+ * determining the day, the calendar will look for one of the following
+ * combinations of fields.  The most recent combination, as determined by the
+ * most recently set single field, will be used.
+ *
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * UCAL_MONTH + UCAL_DAY_OF_MONTH
+ * UCAL_MONTH + UCAL_WEEK_OF_MONTH + UCAL_DAY_OF_WEEK
+ * UCAL_MONTH + UCAL_DAY_OF_WEEK_IN_MONTH + UCAL_DAY_OF_WEEK
+ * UCAL_DAY_OF_YEAR
+ * UCAL_DAY_OF_WEEK + UCAL_WEEK_OF_YEAR
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * For the time of day:
+ *
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * UCAL_HOUR_OF_DAY
+ * UCAL_AM_PM + UCAL_HOUR
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <p>
+ * <strong>Note:</strong> for some non-Gregorian calendars, different
+ * fields may be necessary for complete disambiguation. For example, a full
+ * specification of the historial Arabic astronomical calendar requires year,
+ * month, day-of-month <em>and</em> day-of-week in some cases.
+ *
+ * <p>
+ * <strong>Note:</strong> There are certain possible ambiguities in
+ * interpretation of certain singular times, which are resolved in the
+ * following ways:
+ * <ol>
+ *     <li> 24:00:00 "belongs" to the following day. That is,
+ *          23:59 on Dec 31, 1969 &lt; 24:00 on Jan 1, 1970 &lt; 24:01:00 on Jan 1, 1970
+ *
+ *     <li> Although historically not precise, midnight also belongs to "am",
+ *          and noon belongs to "pm", so on the same day,
+ *          12:00 am (midnight) &lt; 12:01 am, and 12:00 pm (noon) &lt; 12:01 pm
+ * </ol>
+ *
+ * <p>
+ * The date or time format strings are not part of the definition of a
+ * calendar, as those must be modifiable or overridable by the user at
+ * runtime. Use {@link DateFormat}
+ * to format dates.
+ *
+ * <p>
+ * <code>Calendar</code> provides an API for field "rolling", where fields
+ * can be incremented or decremented, but wrap around. For example, rolling the
+ * month up in the date <code>December 12, <b>1996</b></code> results in
+ * <code>January 12, <b>1996</b></code>.
+ *
+ * <p>
+ * <code>Calendar</code> also provides a date arithmetic function for
+ * adding the specified (signed) amount of time to a particular time field.
+ * For example, subtracting 5 days from the date <code>September 12, 1996</code>
+ * results in <code>September 7, 1996</code>.
+ *
+ * @stable ICU 2.0
+ */
+
+/** A calendar.
+ *  For usage in C programs.
+ * @stable ICU 2.0
+ */
+typedef void* UCalendar;
+
+/** Possible types of UCalendars 
+ * @stable ICU 2.0
+ */
+enum UCalendarType {
+  /**
+   * Despite the name, UCAL_TRADITIONAL designates the locale's default calendar,
+   * which may be the Gregorian calendar or some other calendar.
+   * @stable ICU 2.0
+   */
+  UCAL_TRADITIONAL,
+  /**
+   * Unambiguously designates the Gregorian calendar for the locale.
+   * @stable ICU 2.0
+   */
+  UCAL_GREGORIAN,
+  /**
+   * A better name for UCAL_TRADITIONAL.
+   * @draft ICU 4.2
+   */
+  UCAL_DEFAULT = UCAL_TRADITIONAL
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarType UCalendarType;
+
+/** Possible fields in a UCalendar 
+ * @stable ICU 2.0
+ */
+enum UCalendarDateFields {
+  /** 
+   * Field number indicating the era, e.g., AD or BC in the Gregorian (Julian) calendar. 
+   * This is a calendar-specific value.
+   * @stable ICU 2.6 
+   */
+  UCAL_ERA,
+
+  /**
+   * Field number indicating the year. This is a calendar-specific value.
+   * @stable ICU 2.6 
+   */
+  UCAL_YEAR,
+
+  /**
+   * Field number indicating the month. This is a calendar-specific value. 
+   * The first month of the year is
+   * <code>JANUARY</code>; the last depends on the number of months in a year.
+   * @see #UCAL_JANUARY
+   * @see #UCAL_FEBRUARY
+   * @see #UCAL_MARCH
+   * @see #UCAL_APRIL
+   * @see #UCAL_MAY
+   * @see #UCAL_JUNE
+   * @see #UCAL_JULY
+   * @see #UCAL_AUGUST
+   * @see #UCAL_SEPTEMBER
+   * @see #UCAL_OCTOBER
+   * @see #UCAL_NOVEMBER
+   * @see #UCAL_DECEMBER
+   * @see #UCAL_UNDECIMBER
+   * @stable ICU 2.6 
+   */
+  UCAL_MONTH,
+
+  /**
+   * Field number indicating the
+   * week number within the current year.  The first week of the year, as
+   * defined by <code>UCAL_FIRST_DAY_OF_WEEK</code> and <code>UCAL_MINIMAL_DAYS_IN_FIRST_WEEK</code>
+   * attributes, has value 1.  Subclasses define
+   * the value of <code>UCAL_WEEK_OF_YEAR</code> for days before the first week of
+   * the year.
+   * @see ucal_getAttribute
+   * @see ucal_setAttribute
+   * @stable ICU 2.6 
+   */
+  UCAL_WEEK_OF_YEAR,
+
+ /**
+   * Field number indicating the
+   * week number within the current month.  The first week of the month, as
+   * defined by <code>UCAL_FIRST_DAY_OF_WEEK</code> and <code>UCAL_MINIMAL_DAYS_IN_FIRST_WEEK</code>
+   * attributes, has value 1.  Subclasses define
+   * the value of <code>WEEK_OF_MONTH</code> for days before the first week of
+   * the month.
+   * @see ucal_getAttribute
+   * @see ucal_setAttribute
+   * @see #UCAL_FIRST_DAY_OF_WEEK
+   * @see #UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
+   * @stable ICU 2.6 
+   */
+  UCAL_WEEK_OF_MONTH,
+
+ /**
+   * Field number indicating the
+   * day of the month. This is a synonym for <code>DAY_OF_MONTH</code>.
+   * The first day of the month has value 1.
+   * @see #UCAL_DAY_OF_MONTH
+   * @stable ICU 2.6 
+   */
+  UCAL_DATE,
+
+ /**
+   * Field number indicating the day
+   * number within the current year.  The first day of the year has value 1.
+   * @stable ICU 2.6 
+   */
+  UCAL_DAY_OF_YEAR,
+
+ /**
+   * Field number indicating the day
+   * of the week.  This field takes values <code>SUNDAY</code>,
+   * <code>MONDAY</code>, <code>TUESDAY</code>, <code>WEDNESDAY</code>,
+   * <code>THURSDAY</code>, <code>FRIDAY</code>, and <code>SATURDAY</code>.
+   * @see #UCAL_SUNDAY
+   * @see #UCAL_MONDAY
+   * @see #UCAL_TUESDAY
+   * @see #UCAL_WEDNESDAY
+   * @see #UCAL_THURSDAY
+   * @see #UCAL_FRIDAY
+   * @see #UCAL_SATURDAY
+   * @stable ICU 2.6 
+   */
+  UCAL_DAY_OF_WEEK,
+
+ /**
+   * Field number indicating the
+   * ordinal number of the day of the week within the current month. Together
+   * with the <code>DAY_OF_WEEK</code> field, this uniquely specifies a day
+   * within a month.  Unlike <code>WEEK_OF_MONTH</code> and
+   * <code>WEEK_OF_YEAR</code>, this field's value does <em>not</em> depend on
+   * <code>getFirstDayOfWeek()</code> or
+   * <code>getMinimalDaysInFirstWeek()</code>.  <code>DAY_OF_MONTH 1</code>
+   * through <code>7</code> always correspond to <code>DAY_OF_WEEK_IN_MONTH
+   * 1</code>; <code>8</code> through <code>15</code> correspond to
+   * <code>DAY_OF_WEEK_IN_MONTH 2</code>, and so on.
+   * <code>DAY_OF_WEEK_IN_MONTH 0</code> indicates the week before
+   * <code>DAY_OF_WEEK_IN_MONTH 1</code>.  Negative values count back from the
+   * end of the month, so the last Sunday of a month is specified as
+   * <code>DAY_OF_WEEK = SUNDAY, DAY_OF_WEEK_IN_MONTH = -1</code>.  Because
+   * negative values count backward they will usually be aligned differently
+   * within the month than positive values.  For example, if a month has 31
+   * days, <code>DAY_OF_WEEK_IN_MONTH -1</code> will overlap
+   * <code>DAY_OF_WEEK_IN_MONTH 5</code> and the end of <code>4</code>.
+   * @see #UCAL_DAY_OF_WEEK
+   * @see #UCAL_WEEK_OF_MONTH
+   * @stable ICU 2.6 
+   */
+  UCAL_DAY_OF_WEEK_IN_MONTH,
+
+ /**
+   * Field number indicating
+   * whether the <code>HOUR</code> is before or after noon.
+   * E.g., at 10:04:15.250 PM the <code>AM_PM</code> is <code>PM</code>.
+   * @see #UCAL_AM
+   * @see #UCAL_PM
+   * @see #UCAL_HOUR
+   * @stable ICU 2.6 
+   */
+  UCAL_AM_PM,
+
+ /**
+   * Field number indicating the
+   * hour of the morning or afternoon. <code>HOUR</code> is used for the 12-hour
+   * clock.
+   * E.g., at 10:04:15.250 PM the <code>HOUR</code> is 10.
+   * @see #UCAL_AM_PM
+   * @see #UCAL_HOUR_OF_DAY
+   * @stable ICU 2.6 
+   */
+  UCAL_HOUR,
+
+ /**
+   * Field number indicating the
+   * hour of the day. <code>HOUR_OF_DAY</code> is used for the 24-hour clock.
+   * E.g., at 10:04:15.250 PM the <code>HOUR_OF_DAY</code> is 22.
+   * @see #UCAL_HOUR
+   * @stable ICU 2.6 
+   */
+  UCAL_HOUR_OF_DAY,
+
+ /**
+   * Field number indicating the
+   * minute within the hour.
+   * E.g., at 10:04:15.250 PM the <code>UCAL_MINUTE</code> is 4.
+   * @stable ICU 2.6 
+   */
+  UCAL_MINUTE,
+
+ /**
+   * Field number indicating the
+   * second within the minute.
+   * E.g., at 10:04:15.250 PM the <code>UCAL_SECOND</code> is 15.
+   * @stable ICU 2.6 
+   */
+  UCAL_SECOND,
+
+ /**
+   * Field number indicating the
+   * millisecond within the second.
+   * E.g., at 10:04:15.250 PM the <code>UCAL_MILLISECOND</code> is 250.
+   * @stable ICU 2.6 
+   */
+  UCAL_MILLISECOND,
+
+ /**
+   * Field number indicating the
+   * raw offset from GMT in milliseconds.
+   * @stable ICU 2.6 
+   */
+  UCAL_ZONE_OFFSET,
+
+ /**
+   * Field number indicating the
+   * daylight savings offset in milliseconds.
+   * @stable ICU 2.6 
+   */
+  UCAL_DST_OFFSET,
+  
+ /**
+   * Field number 
+   * indicating the extended year corresponding to the
+   * <code>UCAL_WEEK_OF_YEAR</code> field.  This may be one greater or less
+   * than the value of <code>UCAL_EXTENDED_YEAR</code>.
+   * @stable ICU 2.6
+   */
+  UCAL_YEAR_WOY,
+
+ /**
+   * Field number 
+   * indicating the localized day of week.  This will be a value from 1
+   * to 7 inclusive, with 1 being the localized first day of the week.
+   * @stable ICU 2.6
+   */
+  UCAL_DOW_LOCAL,
+
+  /**
+   * Year of this calendar system, encompassing all supra-year fields. For example, 
+   * in Gregorian/Julian calendars, positive Extended Year values indicate years AD,
+   *  1 BC = 0 extended, 2 BC = -1 extended, and so on. 
+   * @stable ICU 2.8 
+   */
+  UCAL_EXTENDED_YEAR,
+
+ /**
+   * Field number 
+   * indicating the modified Julian day number.  This is different from
+   * the conventional Julian day number in two regards.  First, it
+   * demarcates days at local zone midnight, rather than noon GMT.
+   * Second, it is a local number; that is, it depends on the local time
+   * zone.  It can be thought of as a single number that encompasses all
+   * the date-related fields.
+   * @stable ICU 2.8
+   */
+  UCAL_JULIAN_DAY, 
+
+  /**
+   * Ranges from 0 to 23:59:59.999 (regardless of DST).  This field behaves <em>exactly</em> 
+   * like a composite of all time-related fields, not including the zone fields.  As such, 
+   * it also reflects discontinuities of those fields on DST transition days.  On a day
+   * of DST onset, it will jump forward.  On a day of DST cessation, it will jump 
+   * backward.  This reflects the fact that it must be combined with the DST_OFFSET field
+   * to obtain a unique local time value.
+   * @stable ICU 2.8
+   */
+  UCAL_MILLISECONDS_IN_DAY,
+
+  /**
+   * Whether or not the current month is a leap month (0 or 1). See the Chinese calendar for
+   * an example of this.
+   */
+  UCAL_IS_LEAP_MONTH,
+  
+  /**
+   * Field count
+   * @stable ICU 2.6
+   */
+  UCAL_FIELD_COUNT,
+
+ /**
+   * Field number indicating the
+   * day of the month. This is a synonym for <code>UCAL_DATE</code>.
+   * The first day of the month has value 1.
+   * @see #UCAL_DATE
+   * Synonym for UCAL_DATE
+   * @stable ICU 2.8
+   **/
+  UCAL_DAY_OF_MONTH=UCAL_DATE
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarDateFields UCalendarDateFields;
+    /**
+     * Useful constant for days of week. Note: Calendar day-of-week is 1-based. Clients
+     * who create locale resources for the field of first-day-of-week should be aware of
+     * this. For instance, in US locale, first-day-of-week is set to 1, i.e., UCAL_SUNDAY.
+     */
+/** Possible days of the week in a UCalendar 
+ * @stable ICU 2.0
+ */
+enum UCalendarDaysOfWeek {
+  /** Sunday */
+  UCAL_SUNDAY = 1,
+  /** Monday */
+  UCAL_MONDAY,
+  /** Tuesday */
+  UCAL_TUESDAY,
+  /** Wednesday */
+  UCAL_WEDNESDAY,
+  /** Thursday */
+  UCAL_THURSDAY,
+  /** Friday */
+  UCAL_FRIDAY,
+  /** Saturday */
+  UCAL_SATURDAY
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarDaysOfWeek UCalendarDaysOfWeek;
+
+/** Possible months in a UCalendar. Note: Calendar month is 0-based.
+ * @stable ICU 2.0
+ */
+enum UCalendarMonths {
+  /** January */
+  UCAL_JANUARY,
+  /** February */
+  UCAL_FEBRUARY,
+  /** March */
+  UCAL_MARCH,
+  /** April */
+  UCAL_APRIL,
+  /** May */
+  UCAL_MAY,
+  /** June */
+  UCAL_JUNE,
+  /** July */
+  UCAL_JULY,
+  /** August */
+  UCAL_AUGUST,
+  /** September */
+  UCAL_SEPTEMBER,
+  /** October */
+  UCAL_OCTOBER,
+  /** November */
+  UCAL_NOVEMBER,
+  /** December */
+  UCAL_DECEMBER,
+  /** Value of the <code>UCAL_MONTH</code> field indicating the
+    * thirteenth month of the year. Although the Gregorian calendar
+    * does not use this value, lunar calendars do.
+    */
+  UCAL_UNDECIMBER
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarMonths UCalendarMonths;
+
+/** Possible AM/PM values in a UCalendar 
+ * @stable ICU 2.0
+ */
+enum UCalendarAMPMs {
+    /** AM */
+  UCAL_AM,
+  /** PM */
+  UCAL_PM
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarAMPMs UCalendarAMPMs;
+
+/**
+ * Create an enumeration over all time zones.
+ *
+ * @param ec input/output error code
+ *
+ * @return an enumeration object that the caller must dispose of using
+ * uenum_close(), or NULL upon failure. In case of failure *ec will
+ * indicate the error.
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UEnumeration* U_EXPORT2
+ucal_openTimeZones(UErrorCode* ec);
+
+/**
+ * Create an enumeration over all time zones associated with the given
+ * country. Some zones are affiliated with no country (e.g., "UTC");
+ * these may also be retrieved, as a group.
+ *
+ * @param country the ISO 3166 two-letter country code, or NULL to
+ * retrieve zones not affiliated with any country
+ *
+ * @param ec input/output error code
+ *
+ * @return an enumeration object that the caller must dispose of using
+ * uenum_close(), or NULL upon failure. In case of failure *ec will
+ * indicate the error.
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UEnumeration* U_EXPORT2
+ucal_openCountryTimeZones(const char* country, UErrorCode* ec);
+
+/**
+ * Return the default time zone. The default is determined initially
+ * by querying the host operating system. It may be changed with
+ * ucal_setDefaultTimeZone() or with the C++ TimeZone API.
+ *
+ * @param result A buffer to receive the result, or NULL
+ *
+ * @param resultCapacity The capacity of the result buffer
+ *
+ * @param ec input/output error code
+ *
+ * @return The result string length, not including the terminating
+ * null
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucal_getDefaultTimeZone(UChar* result, int32_t resultCapacity, UErrorCode* ec);
+
+/**
+ * Set the default time zone.
+ *
+ * @param zoneID null-terminated time zone ID
+ *
+ * @param ec input/output error code
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+ucal_setDefaultTimeZone(const UChar* zoneID, UErrorCode* ec);
+
+/**
+ * Return the amount of time in milliseconds that the clock is
+ * advanced during daylight savings time for the given time zone, or
+ * zero if the time zone does not observe daylight savings time.
+ *
+ * @param zoneID null-terminated time zone ID
+ *
+ * @param ec input/output error code
+ *
+ * @return the number of milliseconds the time is advanced with
+ * respect to standard time when the daylight savings rules are in
+ * effect. This is always a non-negative number, most commonly either
+ * 3,600,000 (one hour) or zero.
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucal_getDSTSavings(const UChar* zoneID, UErrorCode* ec);
+
+/**
+ * Get the current date and time.
+ * The value returned is represented as milliseconds from the epoch.
+ * @return The current date and time.
+ * @stable ICU 2.0
+ */
+U_STABLE UDate U_EXPORT2 
+ucal_getNow(void);
+
+/**
+ * Open a UCalendar.
+ * A UCalendar may be used to convert a millisecond value to a year,
+ * month, and day.
+ * @param zoneID The desired TimeZone ID.  If 0, use the default time zone.
+ * @param len The length of zoneID, or -1 if null-terminated.
+ * @param locale The desired locale
+ * @param type The type of UCalendar to open. This can be UCAL_GREGORIAN to open the Gregorian
+ * calendar for the locale, or UCAL_DEFAULT to open the default calendar for the locale (the
+ * default calendar may also be Gregorian). To open a specific non-Gregorian calendar for the
+ * locale, use uloc_setKeywordValue to set the value of the calendar keyword for the locale
+ * and then pass the locale to ucal_open with UCAL_DEFAULT as the type.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return A pointer to a UCalendar, or 0 if an error occurred.
+ * @stable ICU 2.0
+ */
+U_STABLE UCalendar* U_EXPORT2 
+ucal_open(const UChar*   zoneID,
+          int32_t        len,
+          const char*    locale,
+          UCalendarType  type,
+          UErrorCode*    status);
+
+/**
+ * Close a UCalendar.
+ * Once closed, a UCalendar may no longer be used.
+ * @param cal The UCalendar to close.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucal_close(UCalendar *cal);
+
+/**
+ * Open a copy of a UCalendar.
+ * This function performs a deep copy.
+ * @param cal The calendar to copy
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UCalendar identical to cal.
+ * @draft ICU 4.0
+ */
+U_DRAFT UCalendar* U_EXPORT2 
+ucal_clone(const UCalendar* cal,
+           UErrorCode*      status);
+
+/**
+ * Set the TimeZone used by a UCalendar.
+ * A UCalendar uses a timezone for converting from Greenwich time to local time.
+ * @param cal The UCalendar to set.
+ * @param zoneID The desired TimeZone ID.  If 0, use the default time zone.
+ * @param len The length of zoneID, or -1 if null-terminated.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucal_setTimeZone(UCalendar*    cal,
+                 const UChar*  zoneID,
+                 int32_t       len,
+                 UErrorCode*   status);
+
+/**
+ * Possible formats for a UCalendar's display name 
+ * @stable ICU 2.0
+ */
+enum UCalendarDisplayNameType {
+  /** Standard display name */
+  UCAL_STANDARD,
+  /** Short standard display name */
+  UCAL_SHORT_STANDARD,
+  /** Daylight savings display name */
+  UCAL_DST,
+  /** Short daylight savings display name */
+  UCAL_SHORT_DST
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarDisplayNameType UCalendarDisplayNameType;
+
+/**
+ * Get the display name for a UCalendar's TimeZone.
+ * A display name is suitable for presentation to a user.
+ * @param cal          The UCalendar to query.
+ * @param type         The desired display name format; one of UCAL_STANDARD, UCAL_SHORT_STANDARD,
+ *                     UCAL_DST, UCAL_SHORT_DST
+ * @param locale       The desired locale for the display name.
+ * @param result       A pointer to a buffer to receive the formatted number.
+ * @param resultLength The maximum size of result.
+ * @param status       A pointer to an UErrorCode to receive any errors
+ * @return             The total buffer size needed; if greater than resultLength, the output was truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucal_getTimeZoneDisplayName(const UCalendar*          cal,
+                            UCalendarDisplayNameType  type,
+                            const char*               locale,
+                            UChar*                    result,
+                            int32_t                   resultLength,
+                            UErrorCode*               status);
+
+/**
+ * Determine if a UCalendar is currently in daylight savings time.
+ * Daylight savings time is not used in all parts of the world.
+ * @param cal The UCalendar to query.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return TRUE if cal is currently in daylight savings time, FALSE otherwise
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2 
+ucal_inDaylightTime(const UCalendar*  cal,
+                    UErrorCode*       status );
+
+/**
+ * Sets the GregorianCalendar change date. This is the point when the switch from
+ * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October
+ * 15, 1582. Previous to this time and date will be Julian dates.
+ *
+ * This function works only for Gregorian calendars. If the UCalendar is not
+ * an instance of a Gregorian calendar, then a U_UNSUPPORTED_ERROR
+ * error code is set.
+ *
+ * @param cal        The calendar object.
+ * @param date       The given Gregorian cutover date.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ *                   pass the U_SUCCESS() test, or else the function returns
+ *                   immediately. Check for U_FAILURE() on output or use with
+ *                   function chaining. (See User Guide for details.)
+ *
+ * @see GregorianCalendar::setGregorianChange
+ * @see ucal_getGregorianChange
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ucal_setGregorianChange(UCalendar *cal, UDate date, UErrorCode *pErrorCode);
+
+/**
+ * Gets the Gregorian Calendar change date. This is the point when the switch from
+ * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October
+ * 15, 1582. Previous to this time and date will be Julian dates.
+ *
+ * This function works only for Gregorian calendars. If the UCalendar is not
+ * an instance of a Gregorian calendar, then a U_UNSUPPORTED_ERROR
+ * error code is set.
+ *
+ * @param cal        The calendar object.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ *                   pass the U_SUCCESS() test, or else the function returns
+ *                   immediately. Check for U_FAILURE() on output or use with
+ *                   function chaining. (See User Guide for details.)
+ * @return   The Gregorian cutover time for this calendar.
+ *
+ * @see GregorianCalendar::getGregorianChange
+ * @see ucal_setGregorianChange
+ * @stable ICU 3.6
+ */
+U_STABLE UDate U_EXPORT2
+ucal_getGregorianChange(const UCalendar *cal, UErrorCode *pErrorCode);
+
+/**
+ * Types of UCalendar attributes 
+ * @stable ICU 2.0
+ */
+enum UCalendarAttribute {
+    /** Lenient parsing */
+  UCAL_LENIENT,
+  /** First day of week */
+  UCAL_FIRST_DAY_OF_WEEK,
+  /** Minimum number of days in first week */
+  UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarAttribute UCalendarAttribute;
+
+/**
+ * Get a numeric attribute associated with a UCalendar.
+ * Numeric attributes include the first day of the week, or the minimal numbers
+ * of days in the first week of the month.
+ * @param cal The UCalendar to query.
+ * @param attr The desired attribute; one of UCAL_LENIENT, UCAL_FIRST_DAY_OF_WEEK,
+ * or UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
+ * @return The value of attr.
+ * @see ucal_setAttribute
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucal_getAttribute(const UCalendar*    cal,
+                  UCalendarAttribute  attr);
+
+/**
+ * Set a numeric attribute associated with a UCalendar.
+ * Numeric attributes include the first day of the week, or the minimal numbers
+ * of days in the first week of the month.
+ * @param cal The UCalendar to set.
+ * @param attr The desired attribute; one of UCAL_LENIENT, UCAL_FIRST_DAY_OF_WEEK,
+ * or UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
+ * @param newValue The new value of attr.
+ * @see ucal_getAttribute
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucal_setAttribute(UCalendar*          cal,
+                  UCalendarAttribute  attr,
+                  int32_t             newValue);
+
+/**
+ * Get a locale for which calendars are available.
+ * A UCalendar in a locale returned by this function will contain the correct
+ * day and month names for the locale.
+ * @param index The index of the desired locale.
+ * @return A locale for which calendars are available, or 0 if none.
+ * @see ucal_countAvailable
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2 
+ucal_getAvailable(int32_t index);
+
+/**
+ * Determine how many locales have calendars available.
+ * This function is most useful as determining the loop ending condition for
+ * calls to \ref ucal_getAvailable.
+ * @return The number of locales for which calendars are available.
+ * @see ucal_getAvailable
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucal_countAvailable(void);
+
+/**
+ * Get a UCalendar's current time in millis.
+ * The time is represented as milliseconds from the epoch.
+ * @param cal The UCalendar to query.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return The calendar's current time in millis.
+ * @see ucal_setMillis
+ * @see ucal_setDate
+ * @see ucal_setDateTime
+ * @stable ICU 2.0
+ */
+U_STABLE UDate U_EXPORT2 
+ucal_getMillis(const UCalendar*  cal,
+               UErrorCode*       status);
+
+/**
+ * Set a UCalendar's current time in millis.
+ * The time is represented as milliseconds from the epoch.
+ * @param cal The UCalendar to set.
+ * @param dateTime The desired date and time.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @see ucal_getMillis
+ * @see ucal_setDate
+ * @see ucal_setDateTime
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucal_setMillis(UCalendar*   cal,
+               UDate        dateTime,
+               UErrorCode*  status );
+
+/**
+ * Set a UCalendar's current date.
+ * The date is represented as a series of 32-bit integers.
+ * @param cal The UCalendar to set.
+ * @param year The desired year.
+ * @param month The desired month; one of UCAL_JANUARY, UCAL_FEBRUARY, UCAL_MARCH, UCAL_APRIL, UCAL_MAY,
+ * UCAL_JUNE, UCAL_JULY, UCAL_AUGUST, UCAL_SEPTEMBER, UCAL_OCTOBER, UCAL_NOVEMBER, UCAL_DECEMBER, UCAL_UNDECIMBER
+ * @param date The desired day of the month.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @see ucal_getMillis
+ * @see ucal_setMillis
+ * @see ucal_setDateTime
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucal_setDate(UCalendar*   cal,
+             int32_t      year,
+             int32_t      month,
+             int32_t      date,
+             UErrorCode*  status);
+
+/**
+ * Set a UCalendar's current date.
+ * The date is represented as a series of 32-bit integers.
+ * @param cal The UCalendar to set.
+ * @param year The desired year.
+ * @param month The desired month; one of UCAL_JANUARY, UCAL_FEBRUARY, UCAL_MARCH, UCAL_APRIL, UCAL_MAY,
+ * UCAL_JUNE, UCAL_JULY, UCAL_AUGUST, UCAL_SEPTEMBER, UCAL_OCTOBER, UCAL_NOVEMBER, UCAL_DECEMBER, UCAL_UNDECIMBER
+ * @param date The desired day of the month.
+ * @param hour The desired hour of day.
+ * @param minute The desired minute.
+ * @param second The desirec second.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @see ucal_getMillis
+ * @see ucal_setMillis
+ * @see ucal_setDate
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucal_setDateTime(UCalendar*   cal,
+                 int32_t      year,
+                 int32_t      month,
+                 int32_t      date,
+                 int32_t      hour,
+                 int32_t      minute,
+                 int32_t      second,
+                 UErrorCode*  status);
+
+/**
+ * Returns TRUE if two UCalendars are equivalent.  Equivalent
+ * UCalendars will behave identically, but they may be set to
+ * different times.
+ * @param cal1 The first of the UCalendars to compare.
+ * @param cal2 The second of the UCalendars to compare.
+ * @return TRUE if cal1 and cal2 are equivalent, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2 
+ucal_equivalentTo(const UCalendar*  cal1,
+                  const UCalendar*  cal2);
+
+/**
+ * Add a specified signed amount to a particular field in a UCalendar.
+ * This can modify more significant fields in the calendar.
+ * @param cal The UCalendar to which to add.
+ * @param field The field to which to add the signed value; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @param amount The signed amount to add to field. If the amount causes the value
+ * to exceed to maximum or minimum values for that field, other fields are modified
+ * to preserve the magnitude of the change.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @see ucal_roll
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucal_add(UCalendar*           cal,
+         UCalendarDateFields  field,
+         int32_t              amount,
+         UErrorCode*          status);
+
+/**
+ * Add a specified signed amount to a particular field in a UCalendar.
+ * This will not modify more significant fields in the calendar.
+ * @param cal The UCalendar to which to add.
+ * @param field The field to which to add the signed value; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @param amount The signed amount to add to field. If the amount causes the value
+ * to exceed to maximum or minimum values for that field, the field is pinned to a permissible
+ * value.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @see ucal_add
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucal_roll(UCalendar*           cal,
+          UCalendarDateFields  field,
+          int32_t              amount,
+          UErrorCode*          status);
+
+/**
+ * Get the current value of a field from a UCalendar.
+ * All fields are represented as 32-bit integers.
+ * @param cal The UCalendar to query.
+ * @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return The value of the desired field.
+ * @see ucal_set
+ * @see ucal_isSet
+ * @see ucal_clearField
+ * @see ucal_clear
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucal_get(const UCalendar*     cal,
+         UCalendarDateFields  field,
+         UErrorCode*          status );
+
+/**
+ * Set the value of a field in a UCalendar.
+ * All fields are represented as 32-bit integers.
+ * @param cal The UCalendar to set.
+ * @param field The field to set; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @param value The desired value of field.
+ * @see ucal_get
+ * @see ucal_isSet
+ * @see ucal_clearField
+ * @see ucal_clear
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucal_set(UCalendar*           cal,
+         UCalendarDateFields  field,
+         int32_t              value);
+
+/**
+ * Determine if a field in a UCalendar is set.
+ * All fields are represented as 32-bit integers.
+ * @param cal The UCalendar to query.
+ * @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @return TRUE if field is set, FALSE otherwise.
+ * @see ucal_get
+ * @see ucal_set
+ * @see ucal_clearField
+ * @see ucal_clear
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2 
+ucal_isSet(const UCalendar*     cal,
+           UCalendarDateFields  field);
+
+/**
+ * Clear a field in a UCalendar.
+ * All fields are represented as 32-bit integers.
+ * @param cal The UCalendar containing the field to clear.
+ * @param field The field to clear; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @see ucal_get
+ * @see ucal_set
+ * @see ucal_isSet
+ * @see ucal_clear
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucal_clearField(UCalendar*           cal,
+                UCalendarDateFields  field);
+
+/**
+ * Clear all fields in a UCalendar.
+ * All fields are represented as 32-bit integers.
+ * @param calendar The UCalendar to clear.
+ * @see ucal_get
+ * @see ucal_set
+ * @see ucal_isSet
+ * @see ucal_clearField
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucal_clear(UCalendar* calendar);
+
+/**
+ * Possible limit values for a UCalendar 
+ * @stable ICU 2.0
+ */
+enum UCalendarLimitType {
+  /** Minimum value */
+  UCAL_MINIMUM,
+  /** Maximum value */
+  UCAL_MAXIMUM,
+  /** Greatest minimum value */
+  UCAL_GREATEST_MINIMUM,
+  /** Leaest maximum value */
+  UCAL_LEAST_MAXIMUM,
+  /** Actual minimum value */
+  UCAL_ACTUAL_MINIMUM,
+  /** Actual maximum value */
+  UCAL_ACTUAL_MAXIMUM
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarLimitType UCalendarLimitType;
+
+/**
+ * Determine a limit for a field in a UCalendar.
+ * A limit is a maximum or minimum value for a field.
+ * @param cal The UCalendar to query.
+ * @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @param type The desired critical point; one of UCAL_MINIMUM, UCAL_MAXIMUM, UCAL_GREATEST_MINIMUM,
+ * UCAL_LEAST_MAXIMUM, UCAL_ACTUAL_MINIMUM, UCAL_ACTUAL_MAXIMUM
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The requested value.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucal_getLimit(const UCalendar*     cal,
+              UCalendarDateFields  field,
+              UCalendarLimitType   type,
+              UErrorCode*          status);
+
+/** Get the locale for this calendar object. You can choose between valid and actual locale.
+ *  @param cal The calendar object
+ *  @param type type of the locale we're looking for (valid or actual) 
+ *  @param status error code for the operation
+ *  @return the locale name
+ *  @stable ICU 2.8
+ */
+U_STABLE const char * U_EXPORT2
+ucal_getLocaleByType(const UCalendar *cal, ULocDataLocaleType type, UErrorCode* status);
+
+/**
+ * Returns the timezone data version currently used by ICU.
+ * @param status error code for the operation
+ * @return the version string, such as "2007f"
+ * @stable ICU 3.8
+ */
+U_DRAFT const char * U_EXPORT2
+ucal_getTZDataVersion(UErrorCode* status);
+
+/**
+ * Returns the canonical system timezone ID or the normalized
+ * custom time zone ID for the given time zone ID.
+ * @param id        The input timezone ID to be canonicalized.
+ * @param len       The length of id, or -1 if null-terminated.
+ * @param result    The buffer receives the canonical system timezone ID
+ *                  or the custom timezone ID in normalized format.
+ * @param resultCapacity    The capacity of the result buffer.
+ * @param isSystemID        Receives if the given ID is a known system
+     *                      timezone ID.
+ * @param status    Recevies the status.  When the given timezone ID
+ *                  is neither a known system time zone ID nor a
+ *                  valid custom timezone ID, U_ILLEGAL_ARGUMENT_ERROR
+ *                  is set.
+ * @return          The result string length, not including the terminating
+ *                  null.
+ * @draft ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+ucal_getCanonicalTimeZoneID(const UChar* id, int32_t len,
+                            UChar* result, int32_t resultCapacity, UBool *isSystemID, UErrorCode* status);
+/**
+ * Get the resource keyword value string designating the calendar type for the UCalendar.
+ * @param cal The UCalendar to query.
+ * @param status The error code for the operation.
+ * @return The resource keyword value string.
+ * @draft ICU 4.2
+ */
+U_DRAFT const char * U_EXPORT2
+ucal_getType(const UCalendar *cal, UErrorCode* status);
+
+/**
+ * The following is a temporary Apple-specific API to help InternationalPrefs
+ * transition to the updated version of the above ICU API. It will be removed soon.
+ */
+U_DRAFT const char * U_EXPORT2
+ucal_getTypeWithError(const UCalendar *cal, UErrorCode* status);
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/ucasemap.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ucasemap.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ucasemap.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,391 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 2005-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  ucasemap.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2005may06
-*   created by: Markus W. Scherer
-*
-*   Case mapping service object and functions using it.
-*/
-
-#ifndef __UCASEMAP_H__
-#define __UCASEMAP_H__
-
-#include "unicode/utypes.h"
-#include "unicode/ustring.h"
-
-/**
- * \file
- * \brief C API: Unicode case mapping functions using a UCaseMap service object.
- *
- * The service object takes care of memory allocations, data loading, and setup
- * for the attributes, as usual.
- *
- * Currently, the functionality provided here does not overlap with uchar.h
- * and ustring.h, except for ucasemap_toTitle().
- *
- * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
- */
-
-/**
- * UCaseMap is an opaque service object for newer ICU case mapping functions.
- * Older functions did not use a service object.
- * @stable ICU 3.4
- */
-struct UCaseMap;
-typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
-
-/**
- * Open a UCaseMap service object for a locale and a set of options.
- * The locale ID and options are preprocessed so that functions using the
- * service object need not process them in each call.
- *
- * @param locale ICU locale ID, used for language-dependent
- *               upper-/lower-/title-casing according to the Unicode standard.
- *               Usual semantics: ""=root, NULL=default locale, etc.
- * @param options Options bit set, used for case folding and string comparisons.
- *                Same flags as for u_foldCase(), u_strFoldCase(),
- *                u_strCaseCompare(), etc.
- *                Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                   which must not indicate a failure before the function call.
- * @return Pointer to a UCaseMap service object, if successful.
- *
- * @see U_FOLD_CASE_DEFAULT
- * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
- * @see U_TITLECASE_NO_LOWERCASE
- * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
- * @stable ICU 3.4
- */
-U_STABLE UCaseMap * U_EXPORT2
-ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
-
-/**
- * Close a UCaseMap service object.
- * @param csm Object to be closed.
- * @stable ICU 3.4
- */
-U_STABLE void U_EXPORT2
-ucasemap_close(UCaseMap *csm);
-
-/**
- * Get the locale ID that is used for language-dependent case mappings.
- * @param csm UCaseMap service object.
- * @return locale ID
- * @stable ICU 3.4
- */
-U_STABLE const char * U_EXPORT2
-ucasemap_getLocale(const UCaseMap *csm);
-
-/**
- * Get the options bit set that is used for case folding and string comparisons.
- * @param csm UCaseMap service object.
- * @return options bit set
- * @stable ICU 3.4
- */
-U_STABLE uint32_t U_EXPORT2
-ucasemap_getOptions(const UCaseMap *csm);
-
-/**
- * Set the locale ID that is used for language-dependent case mappings.
- *
- * @param csm UCaseMap service object.
- * @param locale Locale ID, see ucasemap_open().
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                   which must not indicate a failure before the function call.
- *
- * @see ucasemap_open
- * @stable ICU 3.4
- */
-U_STABLE void U_EXPORT2
-ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
-
-/**
- * Set the options bit set that is used for case folding and string comparisons.
- *
- * @param csm UCaseMap service object.
- * @param options Options bit set, see ucasemap_open().
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                   which must not indicate a failure before the function call.
- *
- * @see ucasemap_open
- * @stable ICU 3.4
- */
-U_STABLE void U_EXPORT2
-ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
-
-/**
- * Do not lowercase non-initial parts of words when titlecasing.
- * Option bit for titlecasing APIs that take an options bit set.
- *
- * By default, titlecasing will titlecase the first cased character
- * of a word and lowercase all other characters.
- * With this option, the other characters will not be modified.
- *
- * @see ucasemap_setOptions
- * @see ucasemap_toTitle
- * @see ucasemap_utf8ToTitle
- * @see UnicodeString::toTitle
- * @stable ICU 4.0
- */
-#define U_TITLECASE_NO_LOWERCASE 0x100
-
-/**
- * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
- * titlecase exactly the characters at breaks from the iterator.
- * Option bit for titlecasing APIs that take an options bit set.
- *
- * By default, titlecasing will take each break iterator index,
- * adjust it by looking for the next cased character, and titlecase that one.
- * Other characters are lowercased.
- *
- * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
- *
- * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
- * #29, "Text Boundaries." Between each pair of word boundaries, find the first
- * cased character F. If F exists, map F to default_title(F); then map each
- * subsequent character C to default_lower(C).
- *
- * @see ucasemap_setOptions
- * @see ucasemap_toTitle
- * @see ucasemap_utf8ToTitle
- * @see UnicodeString::toTitle
- * @see U_TITLECASE_NO_LOWERCASE
- * @stable ICU 4.0
- */
-#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-/**
- * Get the break iterator that is used for titlecasing.
- * Do not modify the returned break iterator.
- * @param csm UCaseMap service object.
- * @return titlecasing break iterator
- * @stable ICU 4.0
- */
-U_DRAFT const UBreakIterator * U_EXPORT2
-ucasemap_getBreakIterator(const UCaseMap *csm);
-
-/**
- * Set the break iterator that is used for titlecasing.
- * The UCaseMap service object releases a previously set break iterator
- * and "adopts" this new one, taking ownership of it.
- * It will be released in a subsequent call to ucasemap_setBreakIterator()
- * or ucasemap_close().
- *
- * Break iterator operations are not thread-safe. Therefore, titlecasing
- * functions use non-const UCaseMap objects. It is not possible to titlecase
- * strings concurrently using the same UCaseMap.
- *
- * @param csm UCaseMap service object.
- * @param iterToAdopt Break iterator to be adopted for titlecasing.
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                   which must not indicate a failure before the function call.
- *
- * @see ucasemap_toTitle
- * @see ucasemap_utf8ToTitle
- * @stable ICU 4.0
- */
-U_DRAFT void U_EXPORT2
-ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
-
-/**
- * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
- * except that it takes ucasemap_setOptions() into account and has performance
- * advantages from being able to use a UCaseMap object for multiple case mapping
- * operations, saving setup time.
- *
- * Casing is locale-dependent and context-sensitive.
- * Titlecasing uses a break iterator to find the first characters of words
- * that are to be titlecased. It titlecases those characters and lowercases
- * all others. (This can be modified with ucasemap_setOptions().)
- *
- * The titlecase break iterator can be provided to customize for arbitrary
- * styles, using rules and dictionaries beyond the standard iterators.
- * It may be more efficient to always provide an iterator to avoid
- * opening and closing one for each string.
- * The standard titlecase iterator for the root locale implements the
- * algorithm of Unicode TR 21.
- *
- * This function uses only the setText(), first() and next() methods of the
- * provided break iterator.
- *
- * The result may be longer or shorter than the original.
- * The source string and the destination buffer must not overlap.
- *
- * @param csm       UCaseMap service object.
- * @param dest      A buffer for the result string. The result will be NUL-terminated if
- *                  the buffer is large enough.
- *                  The contents is undefined in case of failure.
- * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
- *                  dest may be NULL and the function will only return the length of the result
- *                  without writing any of the result string.
- * @param src       The original string.
- * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                  which must not indicate a failure before the function call.
- * @return The length of the result string, if successful - or in case of a buffer overflow,
- *         in which case it will be greater than destCapacity.
- *
- * @see u_strToTitle
- * @stable ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-ucasemap_toTitle(UCaseMap *csm,
-                 UChar *dest, int32_t destCapacity,
-                 const UChar *src, int32_t srcLength,
-                 UErrorCode *pErrorCode);
-
-#endif
-
-/**
- * Lowercase the characters in a UTF-8 string.
- * Casing is locale-dependent and context-sensitive.
- * The result may be longer or shorter than the original.
- * The source string and the destination buffer must not overlap.
- *
- * @param csm       UCaseMap service object.
- * @param dest      A buffer for the result string. The result will be NUL-terminated if
- *                  the buffer is large enough.
- *                  The contents is undefined in case of failure.
- * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
- *                  dest may be NULL and the function will only return the length of the result
- *                  without writing any of the result string.
- * @param src       The original string.
- * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                  which must not indicate a failure before the function call.
- * @return The length of the result string, if successful - or in case of a buffer overflow,
- *         in which case it will be greater than destCapacity.
- *
- * @see u_strToLower
- * @stable ICU 3.4
- */
-U_STABLE int32_t U_EXPORT2
-ucasemap_utf8ToLower(const UCaseMap *csm,
-                     char *dest, int32_t destCapacity,
-                     const char *src, int32_t srcLength,
-                     UErrorCode *pErrorCode);
-
-/**
- * Uppercase the characters in a UTF-8 string.
- * Casing is locale-dependent and context-sensitive.
- * The result may be longer or shorter than the original.
- * The source string and the destination buffer must not overlap.
- *
- * @param csm       UCaseMap service object.
- * @param dest      A buffer for the result string. The result will be NUL-terminated if
- *                  the buffer is large enough.
- *                  The contents is undefined in case of failure.
- * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
- *                  dest may be NULL and the function will only return the length of the result
- *                  without writing any of the result string.
- * @param src       The original string.
- * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                  which must not indicate a failure before the function call.
- * @return The length of the result string, if successful - or in case of a buffer overflow,
- *         in which case it will be greater than destCapacity.
- *
- * @see u_strToUpper
- * @stable ICU 3.4
- */
-U_STABLE int32_t U_EXPORT2
-ucasemap_utf8ToUpper(const UCaseMap *csm,
-                     char *dest, int32_t destCapacity,
-                     const char *src, int32_t srcLength,
-                     UErrorCode *pErrorCode);
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-/**
- * Titlecase a UTF-8 string.
- * Casing is locale-dependent and context-sensitive.
- * Titlecasing uses a break iterator to find the first characters of words
- * that are to be titlecased. It titlecases those characters and lowercases
- * all others. (This can be modified with ucasemap_setOptions().)
- *
- * The titlecase break iterator can be provided to customize for arbitrary
- * styles, using rules and dictionaries beyond the standard iterators.
- * It may be more efficient to always provide an iterator to avoid
- * opening and closing one for each string.
- * The standard titlecase iterator for the root locale implements the
- * algorithm of Unicode TR 21.
- *
- * This function uses only the setText(), first() and next() methods of the
- * provided break iterator.
- *
- * The result may be longer or shorter than the original.
- * The source string and the destination buffer must not overlap.
- *
- * @param csm       UCaseMap service object.
- * @param dest      A buffer for the result string. The result will be NUL-terminated if
- *                  the buffer is large enough.
- *                  The contents is undefined in case of failure.
- * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
- *                  dest may be NULL and the function will only return the length of the result
- *                  without writing any of the result string.
- * @param src       The original string.
- * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                  which must not indicate a failure before the function call.
- * @return The length of the result string, if successful - or in case of a buffer overflow,
- *         in which case it will be greater than destCapacity.
- *
- * @see u_strToTitle
- * @see U_TITLECASE_NO_LOWERCASE
- * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
- * @stable ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-ucasemap_utf8ToTitle(UCaseMap *csm,
-                    char *dest, int32_t destCapacity,
-                    const char *src, int32_t srcLength,
-                    UErrorCode *pErrorCode);
-
-#endif
-
-/**
- * Case-fold the characters in a UTF-8 string.
- * Case-folding is locale-independent and not context-sensitive,
- * but there is an option for whether to include or exclude mappings for dotted I
- * and dotless i that are marked with 'I' in CaseFolding.txt.
- * The result may be longer or shorter than the original.
- * The source string and the destination buffer must not overlap.
- *
- * @param csm       UCaseMap service object.
- * @param dest      A buffer for the result string. The result will be NUL-terminated if
- *                  the buffer is large enough.
- *                  The contents is undefined in case of failure.
- * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
- *                  dest may be NULL and the function will only return the length of the result
- *                  without writing any of the result string.
- * @param src       The original string.
- * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                  which must not indicate a failure before the function call.
- * @return The length of the result string, if successful - or in case of a buffer overflow,
- *         in which case it will be greater than destCapacity.
- *
- * @see u_strFoldCase
- * @see ucasemap_setOptions
- * @see U_FOLD_CASE_DEFAULT
- * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
- * @stable ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-ucasemap_utf8FoldCase(const UCaseMap *csm,
-                      char *dest, int32_t destCapacity,
-                      const char *src, int32_t srcLength,
-                      UErrorCode *pErrorCode);
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/ucasemap.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ucasemap.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ucasemap.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ucasemap.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,391 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2005-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  ucasemap.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2005may06
+*   created by: Markus W. Scherer
+*
+*   Case mapping service object and functions using it.
+*/
+
+#ifndef __UCASEMAP_H__
+#define __UCASEMAP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+
+/**
+ * \file
+ * \brief C API: Unicode case mapping functions using a UCaseMap service object.
+ *
+ * The service object takes care of memory allocations, data loading, and setup
+ * for the attributes, as usual.
+ *
+ * Currently, the functionality provided here does not overlap with uchar.h
+ * and ustring.h, except for ucasemap_toTitle().
+ *
+ * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
+ */
+
+/**
+ * UCaseMap is an opaque service object for newer ICU case mapping functions.
+ * Older functions did not use a service object.
+ * @stable ICU 3.4
+ */
+struct UCaseMap;
+typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
+
+/**
+ * Open a UCaseMap service object for a locale and a set of options.
+ * The locale ID and options are preprocessed so that functions using the
+ * service object need not process them in each call.
+ *
+ * @param locale ICU locale ID, used for language-dependent
+ *               upper-/lower-/title-casing according to the Unicode standard.
+ *               Usual semantics: ""=root, NULL=default locale, etc.
+ * @param options Options bit set, used for case folding and string comparisons.
+ *                Same flags as for u_foldCase(), u_strFoldCase(),
+ *                u_strCaseCompare(), etc.
+ *                Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                   which must not indicate a failure before the function call.
+ * @return Pointer to a UCaseMap service object, if successful.
+ *
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 3.4
+ */
+U_STABLE UCaseMap * U_EXPORT2
+ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
+
+/**
+ * Close a UCaseMap service object.
+ * @param csm Object to be closed.
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucasemap_close(UCaseMap *csm);
+
+/**
+ * Get the locale ID that is used for language-dependent case mappings.
+ * @param csm UCaseMap service object.
+ * @return locale ID
+ * @stable ICU 3.4
+ */
+U_STABLE const char * U_EXPORT2
+ucasemap_getLocale(const UCaseMap *csm);
+
+/**
+ * Get the options bit set that is used for case folding and string comparisons.
+ * @param csm UCaseMap service object.
+ * @return options bit set
+ * @stable ICU 3.4
+ */
+U_STABLE uint32_t U_EXPORT2
+ucasemap_getOptions(const UCaseMap *csm);
+
+/**
+ * Set the locale ID that is used for language-dependent case mappings.
+ *
+ * @param csm UCaseMap service object.
+ * @param locale Locale ID, see ucasemap_open().
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                   which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_open
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
+
+/**
+ * Set the options bit set that is used for case folding and string comparisons.
+ *
+ * @param csm UCaseMap service object.
+ * @param options Options bit set, see ucasemap_open().
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                   which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_open
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
+
+/**
+ * Do not lowercase non-initial parts of words when titlecasing.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will titlecase the first cased character
+ * of a word and lowercase all other characters.
+ * With this option, the other characters will not be modified.
+ *
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @see UnicodeString::toTitle
+ * @stable ICU 4.0
+ */
+#define U_TITLECASE_NO_LOWERCASE 0x100
+
+/**
+ * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
+ * titlecase exactly the characters at breaks from the iterator.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will take each break iterator index,
+ * adjust it by looking for the next cased character, and titlecase that one.
+ * Other characters are lowercased.
+ *
+ * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
+ *
+ * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
+ * #29, "Text Boundaries." Between each pair of word boundaries, find the first
+ * cased character F. If F exists, map F to default_title(F); then map each
+ * subsequent character C to default_lower(C).
+ *
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @see UnicodeString::toTitle
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @stable ICU 4.0
+ */
+#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Get the break iterator that is used for titlecasing.
+ * Do not modify the returned break iterator.
+ * @param csm UCaseMap service object.
+ * @return titlecasing break iterator
+ * @stable ICU 4.0
+ */
+U_DRAFT const UBreakIterator * U_EXPORT2
+ucasemap_getBreakIterator(const UCaseMap *csm);
+
+/**
+ * Set the break iterator that is used for titlecasing.
+ * The UCaseMap service object releases a previously set break iterator
+ * and "adopts" this new one, taking ownership of it.
+ * It will be released in a subsequent call to ucasemap_setBreakIterator()
+ * or ucasemap_close().
+ *
+ * Break iterator operations are not thread-safe. Therefore, titlecasing
+ * functions use non-const UCaseMap objects. It is not possible to titlecase
+ * strings concurrently using the same UCaseMap.
+ *
+ * @param csm UCaseMap service object.
+ * @param iterToAdopt Break iterator to be adopted for titlecasing.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                   which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 4.0
+ */
+U_DRAFT void U_EXPORT2
+ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
+
+/**
+ * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
+ * except that it takes ucasemap_setOptions() into account and has performance
+ * advantages from being able to use a UCaseMap object for multiple case mapping
+ * operations, saving setup time.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm       UCaseMap service object.
+ * @param dest      A buffer for the result string. The result will be NUL-terminated if
+ *                  the buffer is large enough.
+ *                  The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @stable ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+ucasemap_toTitle(UCaseMap *csm,
+                 UChar *dest, int32_t destCapacity,
+                 const UChar *src, int32_t srcLength,
+                 UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Lowercase the characters in a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm       UCaseMap service object.
+ * @param dest      A buffer for the result string. The result will be NUL-terminated if
+ *                  the buffer is large enough.
+ *                  The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ *
+ * @see u_strToLower
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_utf8ToLower(const UCaseMap *csm,
+                     char *dest, int32_t destCapacity,
+                     const char *src, int32_t srcLength,
+                     UErrorCode *pErrorCode);
+
+/**
+ * Uppercase the characters in a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm       UCaseMap service object.
+ * @param dest      A buffer for the result string. The result will be NUL-terminated if
+ *                  the buffer is large enough.
+ *                  The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ *
+ * @see u_strToUpper
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucasemap_utf8ToUpper(const UCaseMap *csm,
+                     char *dest, int32_t destCapacity,
+                     const char *src, int32_t srcLength,
+                     UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm       UCaseMap service object.
+ * @param dest      A buffer for the result string. The result will be NUL-terminated if
+ *                  the buffer is large enough.
+ *                  The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+ucasemap_utf8ToTitle(UCaseMap *csm,
+                    char *dest, int32_t destCapacity,
+                    const char *src, int32_t srcLength,
+                    UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-fold the characters in a UTF-8 string.
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'I' in CaseFolding.txt.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm       UCaseMap service object.
+ * @param dest      A buffer for the result string. The result will be NUL-terminated if
+ *                  the buffer is large enough.
+ *                  The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ *
+ * @see u_strFoldCase
+ * @see ucasemap_setOptions
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @stable ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+ucasemap_utf8FoldCase(const UCaseMap *csm,
+                      char *dest, int32_t destCapacity,
+                      const char *src, int32_t srcLength,
+                      UErrorCode *pErrorCode);
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/ucat.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ucat.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ucat.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,158 +0,0 @@
-/*
-**********************************************************************
-* Copyright (c) 2003-2004, International Business Machines
-* Corporation and others.  All Rights Reserved.
-**********************************************************************
-* Author: Alan Liu
-* Created: March 19 2003
-* Since: ICU 2.6
-**********************************************************************
-*/
-#ifndef UCAT_H
-#define UCAT_H
-
-#include "unicode/utypes.h"
-#include "unicode/ures.h"
-
-/**
- * \file
- * \brief C API: Message Catalog Wrappers
- *
- * This C API provides look-alike functions that deliberately resemble
- * the POSIX catopen, catclose, and catgets functions.  The underlying
- * implementation is in terms of ICU resource bundles, rather than
- * POSIX message catalogs.
- *
- * The ICU resource bundles obey standard ICU inheritance policies.
- * To facilitate this, sets and messages are flattened into one tier.
- * This is done by creating resource bundle keys of the form
- * &lt;set_num&gt;%&lt;msg_num&gt; where set_num is the set number and msg_num is
- * the message number, formatted as decimal strings.
- *
- * Example:  Consider a message catalog containing two sets:
- *
- * Set 1: Message 4  = "Good morning."
- *        Message 5  = "Good afternoon."
- *        Message 7  = "Good evening."
- *        Message 8  = "Good night."
- * Set 4: Message 14 = "Please "
- *        Message 19 = "Thank you."
- *        Message 20 = "Sincerely,"
- *
- * The ICU resource bundle source file would, assuming it is named
- * "greet.txt", would look like this:
- *
- * greet
- * {
- *     1%4  { "Good morning." }
- *     1%5  { "Good afternoon." }
- *     1%7  { "Good evening." }
- *     1%8  { "Good night." }
- * 
- *     4%14 { "Please " }
- *     4%19 { "Thank you." }
- *     4%20 { "Sincerely," }
- * }
- *
- * The catgets function is commonly used in combination with functions
- * like printf and strftime.  ICU components like message format can
- * be used instead, although they use a different format syntax.
- * There is an ICU package, icuio, that provides some of
- * the POSIX-style formatting API.
- */
-
-U_CDECL_BEGIN
-
-/**
- * An ICU message catalog descriptor, analogous to nl_catd.
- * 
- * @stable ICU 2.6
- */
-typedef UResourceBundle* u_nl_catd;
-
-/**
- * Open and return an ICU message catalog descriptor. The descriptor
- * may be passed to u_catgets() to retrieve localized strings.
- *
- * @param name string containing the full path pointing to the
- * directory where the resources reside followed by the package name
- * e.g. "/usr/resource/my_app/resources/guimessages" on a Unix system.
- * If NULL, ICU default data files will be used.
- *
- * Unlike POSIX, environment variables are not interpolated within the
- * name.
- *
- * @param locale the locale for which we want to open the resource. If
- * NULL, the default ICU locale will be used (see uloc_getDefault). If
- * strlen(locale) == 0, the root locale will be used.
- *
- * @param ec input/output error code. Upon output,
- * U_USING_FALLBACK_WARNING indicates that a fallback locale was
- * used. For example, 'de_CH' was requested, but nothing was found
- * there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that the
- * default locale data or root locale data was used; neither the
- * requested locale nor any of its fallback locales were found.
- *
- * @return a message catalog descriptor that may be passed to
- * u_catgets(). If the ec parameter indicates success, then the caller
- * is responsible for calling u_catclose() to close the message
- * catalog. If the ec parameter indicates failure, then NULL will be
- * returned.
- * 
- * @stable ICU 2.6
- */
-U_STABLE u_nl_catd U_EXPORT2
-u_catopen(const char* name, const char* locale, UErrorCode* ec);
-
-/**
- * Close an ICU message catalog, given its descriptor.
- *
- * @param catd a message catalog descriptor to be closed. May be NULL,
- * in which case no action is taken.
- * 
- * @stable ICU 2.6
- */
-U_STABLE void U_EXPORT2
-u_catclose(u_nl_catd catd);
-
-/**
- * Retrieve a localized string from an ICU message catalog.
- *
- * @param catd a message catalog descriptor returned by u_catopen.
- *
- * @param set_num the message catalog set number. Sets need not be
- * numbered consecutively.
- *
- * @param msg_num the message catalog message number within the
- * set. Messages need not be numbered consecutively.
- *
- * @param s the default string. This is returned if the string
- * specified by the set_num and msg_num is not found. It must be
- * zero-terminated.
- *
- * @param len fill-in parameter to receive the length of the result.
- * May be NULL, in which case it is ignored.
- *
- * @param ec input/output error code. May be U_USING_FALLBACK_WARNING
- * or U_USING_DEFAULT_WARNING. U_MISSING_RESOURCE_ERROR indicates that
- * the set_num/msg_num tuple does not specify a valid message string
- * in this catalog.
- *
- * @return a pointer to a zero-terminated UChar array which lives in
- * an internal buffer area, typically a memory mapped/DLL file. The
- * caller must NOT delete this pointer. If the call is unsuccessful
- * for any reason, then s is returned.  This includes the situation in
- * which ec indicates a failing error code upon entry to this
- * function.
- * 
- * @stable ICU 2.6
- */
-U_STABLE const UChar* U_EXPORT2
-u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num,
-          const UChar* s,
-          int32_t* len, UErrorCode* ec);
-
-U_CDECL_END
-
-#endif /*UCAT_H*/
-/*eof*/

Copied: MacRuby/trunk/icu-1060/unicode/ucat.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ucat.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ucat.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ucat.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,158 @@
+/*
+**********************************************************************
+* Copyright (c) 2003-2004, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: March 19 2003
+* Since: ICU 2.6
+**********************************************************************
+*/
+#ifndef UCAT_H
+#define UCAT_H
+
+#include "unicode/utypes.h"
+#include "unicode/ures.h"
+
+/**
+ * \file
+ * \brief C API: Message Catalog Wrappers
+ *
+ * This C API provides look-alike functions that deliberately resemble
+ * the POSIX catopen, catclose, and catgets functions.  The underlying
+ * implementation is in terms of ICU resource bundles, rather than
+ * POSIX message catalogs.
+ *
+ * The ICU resource bundles obey standard ICU inheritance policies.
+ * To facilitate this, sets and messages are flattened into one tier.
+ * This is done by creating resource bundle keys of the form
+ * &lt;set_num&gt;%&lt;msg_num&gt; where set_num is the set number and msg_num is
+ * the message number, formatted as decimal strings.
+ *
+ * Example:  Consider a message catalog containing two sets:
+ *
+ * Set 1: Message 4  = "Good morning."
+ *        Message 5  = "Good afternoon."
+ *        Message 7  = "Good evening."
+ *        Message 8  = "Good night."
+ * Set 4: Message 14 = "Please "
+ *        Message 19 = "Thank you."
+ *        Message 20 = "Sincerely,"
+ *
+ * The ICU resource bundle source file would, assuming it is named
+ * "greet.txt", would look like this:
+ *
+ * greet
+ * {
+ *     1%4  { "Good morning." }
+ *     1%5  { "Good afternoon." }
+ *     1%7  { "Good evening." }
+ *     1%8  { "Good night." }
+ * 
+ *     4%14 { "Please " }
+ *     4%19 { "Thank you." }
+ *     4%20 { "Sincerely," }
+ * }
+ *
+ * The catgets function is commonly used in combination with functions
+ * like printf and strftime.  ICU components like message format can
+ * be used instead, although they use a different format syntax.
+ * There is an ICU package, icuio, that provides some of
+ * the POSIX-style formatting API.
+ */
+
+U_CDECL_BEGIN
+
+/**
+ * An ICU message catalog descriptor, analogous to nl_catd.
+ * 
+ * @stable ICU 2.6
+ */
+typedef UResourceBundle* u_nl_catd;
+
+/**
+ * Open and return an ICU message catalog descriptor. The descriptor
+ * may be passed to u_catgets() to retrieve localized strings.
+ *
+ * @param name string containing the full path pointing to the
+ * directory where the resources reside followed by the package name
+ * e.g. "/usr/resource/my_app/resources/guimessages" on a Unix system.
+ * If NULL, ICU default data files will be used.
+ *
+ * Unlike POSIX, environment variables are not interpolated within the
+ * name.
+ *
+ * @param locale the locale for which we want to open the resource. If
+ * NULL, the default ICU locale will be used (see uloc_getDefault). If
+ * strlen(locale) == 0, the root locale will be used.
+ *
+ * @param ec input/output error code. Upon output,
+ * U_USING_FALLBACK_WARNING indicates that a fallback locale was
+ * used. For example, 'de_CH' was requested, but nothing was found
+ * there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that the
+ * default locale data or root locale data was used; neither the
+ * requested locale nor any of its fallback locales were found.
+ *
+ * @return a message catalog descriptor that may be passed to
+ * u_catgets(). If the ec parameter indicates success, then the caller
+ * is responsible for calling u_catclose() to close the message
+ * catalog. If the ec parameter indicates failure, then NULL will be
+ * returned.
+ * 
+ * @stable ICU 2.6
+ */
+U_STABLE u_nl_catd U_EXPORT2
+u_catopen(const char* name, const char* locale, UErrorCode* ec);
+
+/**
+ * Close an ICU message catalog, given its descriptor.
+ *
+ * @param catd a message catalog descriptor to be closed. May be NULL,
+ * in which case no action is taken.
+ * 
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+u_catclose(u_nl_catd catd);
+
+/**
+ * Retrieve a localized string from an ICU message catalog.
+ *
+ * @param catd a message catalog descriptor returned by u_catopen.
+ *
+ * @param set_num the message catalog set number. Sets need not be
+ * numbered consecutively.
+ *
+ * @param msg_num the message catalog message number within the
+ * set. Messages need not be numbered consecutively.
+ *
+ * @param s the default string. This is returned if the string
+ * specified by the set_num and msg_num is not found. It must be
+ * zero-terminated.
+ *
+ * @param len fill-in parameter to receive the length of the result.
+ * May be NULL, in which case it is ignored.
+ *
+ * @param ec input/output error code. May be U_USING_FALLBACK_WARNING
+ * or U_USING_DEFAULT_WARNING. U_MISSING_RESOURCE_ERROR indicates that
+ * the set_num/msg_num tuple does not specify a valid message string
+ * in this catalog.
+ *
+ * @return a pointer to a zero-terminated UChar array which lives in
+ * an internal buffer area, typically a memory mapped/DLL file. The
+ * caller must NOT delete this pointer. If the call is unsuccessful
+ * for any reason, then s is returned.  This includes the situation in
+ * which ec indicates a failing error code upon entry to this
+ * function.
+ * 
+ * @stable ICU 2.6
+ */
+U_STABLE const UChar* U_EXPORT2
+u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num,
+          const UChar* s,
+          int32_t* len, UErrorCode* ec);
+
+U_CDECL_END
+
+#endif /*UCAT_H*/
+/*eof*/

Deleted: MacRuby/trunk/icu-1060/unicode/uchar.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uchar.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uchar.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,3062 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1997-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*
-* File UCHAR.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   04/02/97    aliu        Creation.
-*   03/29/99    helena      Updated for C APIs.
-*   4/15/99     Madhu       Updated for C Implementation and Javadoc
-*   5/20/99     Madhu       Added the function u_getVersion()
-*   8/19/1999   srl         Upgraded scripts to Unicode 3.0
-*   8/27/1999   schererm    UCharDirection constants: U_...
-*   11/11/1999  weiv        added u_isalnum(), cleaned comments
-*   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion().
-******************************************************************************
-*/
-
-#ifndef UCHAR_H
-#define UCHAR_H
-
-#include "unicode/utypes.h"
-
-U_CDECL_BEGIN
-
-/*==========================================================================*/
-/* Unicode version number                                                   */
-/*==========================================================================*/
-/**
- * Unicode version number, default for the current ICU version.
- * The actual Unicode Character Database (UCD) data is stored in uprops.dat
- * and may be generated from UCD files from a different Unicode version.
- * Call u_getUnicodeVersion to get the actual Unicode version of the data.
- *
- * @see u_getUnicodeVersion
- * @stable ICU 2.0
- */
-#define U_UNICODE_VERSION "5.1"
-
-/**
- * \file
- * \brief C API: Unicode Properties
- *
- * This C API provides low-level access to the Unicode Character Database.
- * In addition to raw property values, some convenience functions calculate
- * derived properties, for example for Java-style programming.
- *
- * Unicode assigns each code point (not just assigned character) values for
- * many properties.
- * Most of them are simple boolean flags, or constants from a small enumerated list.
- * For some properties, values are strings or other relatively more complex types.
- *
- * For more information see
- * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
- * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).
- *
- * Many functions are designed to match java.lang.Character functions.
- * See the individual function documentation,
- * and see the JDK 1.4 java.lang.Character documentation
- * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html
- *
- * There are also functions that provide easy migration from C/POSIX functions
- * like isblank(). Their use is generally discouraged because the C/POSIX
- * standards do not define their semantics beyond the ASCII range, which means
- * that different implementations exhibit very different behavior.
- * Instead, Unicode properties should be used directly.
- *
- * There are also only a few, broad C/POSIX character classes, and they tend
- * to be used for conflicting purposes. For example, the "isalpha()" class
- * is sometimes used to determine word boundaries, while a more sophisticated
- * approach would at least distinguish initial letters from continuation
- * characters (the latter including combining marks).
- * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
- * Another example: There is no "istitle()" class for titlecase characters.
- *
- * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
- * ICU implements them according to the Standard Recommendations in
- * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
- * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
- *
- * API access for C/POSIX character classes is as follows:
- * - alpha:     u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
- * - lower:     u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
- * - upper:     u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
- * - punct:     u_ispunct(c)
- * - digit:     u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER
- * - xdigit:    u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
- * - alnum:     u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
- * - space:     u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
- * - blank:     u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
- * - cntrl:     u_charType(c)==U_CONTROL_CHAR
- * - graph:     u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
- * - print:     u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
- *
- * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
- * the Standard Recommendations in UTS #18. Instead, they match Java
- * functions according to their API documentation.
- *
- * \htmlonly
- * The C/POSIX character classes are also available in UnicodeSet patterns,
- * using patterns like [:graph:] or \p{graph}.
- * \endhtmlonly
- *
- * Note: There are several ICU whitespace functions.
- * Comparison:
- * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
- *       most of general categories "Z" (separators) + most whitespace ISO controls
- *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
- * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
- * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
- * - u_isspace: Z + whitespace ISO controls (including no-break spaces)
- * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP
- */
-
-/**
- * Constants.
- */
-
-/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
-#define UCHAR_MIN_VALUE 0
-
-/**
- * The highest Unicode code point value (scalar value) according to
- * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
- * For a single character, UChar32 is a simple type that can hold any code point value.
- *
- * @see UChar32
- * @stable ICU 2.0
- */
-#define UCHAR_MAX_VALUE 0x10ffff
-
-/**
- * Get a single-bit bit set (a flag) from a bit number 0..31.
- * @stable ICU 2.1
- */
-#define U_MASK(x) ((uint32_t)1<<(x))
-
-/*
- * !! Note: Several comments in this file are machine-read by the
- * genpname tool.  These comments describe the correspondence between
- * icu enum constants and UCD entities.  Do not delete them.  Update
- * these comments as needed.
- *
- * Any comment of the form "/ *[name]* /" (spaces added) is such
- * a comment.
- *
- * The U_JG_* and U_GC_*_MASK constants are matched by their symbolic
- * name, which must match PropertyValueAliases.txt.
- */
-
-/**
- * Selection constants for Unicode properties.
- * These constants are used in functions like u_hasBinaryProperty to select
- * one of the Unicode properties.
- *
- * The properties APIs are intended to reflect Unicode properties as defined
- * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
- * For details about the properties see http://www.unicode.org/ucd/ .
- * For names of Unicode properties see the UCD file PropertyAliases.txt.
- *
- * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
- * then properties marked with "new in Unicode 3.2" are not or not fully available.
- * Check u_getUnicodeVersion to be sure.
- *
- * @see u_hasBinaryProperty
- * @see u_getIntPropertyValue
- * @see u_getUnicodeVersion
- * @stable ICU 2.1
- */
-typedef enum UProperty {
-    /*  See note !!.  Comments of the form "Binary property Dash",
-        "Enumerated property Script", "Double property Numeric_Value",
-        and "String property Age" are read by genpname. */
-
-    /*  Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
-    debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
-    rather than UCHAR_BINARY_START.  Likewise for other *_START
-    identifiers. */
-
-    /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.
-        Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */
-    UCHAR_ALPHABETIC=0,
-    /** First constant for binary Unicode properties. @stable ICU 2.1 */
-    UCHAR_BINARY_START=UCHAR_ALPHABETIC,
-    /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */
-    UCHAR_ASCII_HEX_DIGIT=1,
-    /** Binary property Bidi_Control.
-        Format controls which have specific functions
-        in the Bidi Algorithm. @stable ICU 2.1 */
-    UCHAR_BIDI_CONTROL=2,
-    /** Binary property Bidi_Mirrored.
-        Characters that may change display in RTL text.
-        Same as u_isMirrored.
-        See Bidi Algorithm, UTR 9. @stable ICU 2.1 */
-    UCHAR_BIDI_MIRRORED=3,
-    /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */
-    UCHAR_DASH=4,
-    /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
-        Ignorable in most processing.
-        <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */
-    UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5,
-    /** Binary property Deprecated (new in Unicode 3.2).
-        The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */
-    UCHAR_DEPRECATED=6,
-    /** Binary property Diacritic. Characters that linguistically modify
-        the meaning of another character to which they apply. @stable ICU 2.1 */
-    UCHAR_DIACRITIC=7,
-    /** Binary property Extender.
-        Extend the value or shape of a preceding alphabetic character,
-        e.g., length and iteration marks. @stable ICU 2.1 */
-    UCHAR_EXTENDER=8,
-    /** Binary property Full_Composition_Exclusion.
-        CompositionExclusions.txt+Singleton Decompositions+
-        Non-Starter Decompositions. @stable ICU 2.1 */
-    UCHAR_FULL_COMPOSITION_EXCLUSION=9,
-    /** Binary property Grapheme_Base (new in Unicode 3.2).
-        For programmatic determination of grapheme cluster boundaries.
-        [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */
-    UCHAR_GRAPHEME_BASE=10,
-    /** Binary property Grapheme_Extend (new in Unicode 3.2).
-        For programmatic determination of grapheme cluster boundaries.
-        Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */
-    UCHAR_GRAPHEME_EXTEND=11,
-    /** Binary property Grapheme_Link (new in Unicode 3.2).
-        For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */
-    UCHAR_GRAPHEME_LINK=12,
-    /** Binary property Hex_Digit.
-        Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */
-    UCHAR_HEX_DIGIT=13,
-    /** Binary property Hyphen. Dashes used to mark connections
-        between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */
-    UCHAR_HYPHEN=14,
-    /** Binary property ID_Continue.
-        Characters that can continue an identifier.
-        DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."
-        ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */
-    UCHAR_ID_CONTINUE=15,
-    /** Binary property ID_Start.
-        Characters that can start an identifier.
-        Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */
-    UCHAR_ID_START=16,
-    /** Binary property Ideographic.
-        CJKV ideographs. @stable ICU 2.1 */
-    UCHAR_IDEOGRAPHIC=17,
-    /** Binary property IDS_Binary_Operator (new in Unicode 3.2).
-        For programmatic determination of
-        Ideographic Description Sequences. @stable ICU 2.1 */
-    UCHAR_IDS_BINARY_OPERATOR=18,
-    /** Binary property IDS_Trinary_Operator (new in Unicode 3.2).
-        For programmatic determination of
-        Ideographic Description Sequences. @stable ICU 2.1 */
-    UCHAR_IDS_TRINARY_OPERATOR=19,
-    /** Binary property Join_Control.
-        Format controls for cursive joining and ligation. @stable ICU 2.1 */
-    UCHAR_JOIN_CONTROL=20,
-    /** Binary property Logical_Order_Exception (new in Unicode 3.2).
-        Characters that do not use logical order and
-        require special handling in most processing. @stable ICU 2.1 */
-    UCHAR_LOGICAL_ORDER_EXCEPTION=21,
-    /** Binary property Lowercase. Same as u_isULowercase, different from u_islower.
-        Ll+Other_Lowercase @stable ICU 2.1 */
-    UCHAR_LOWERCASE=22,
-    /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */
-    UCHAR_MATH=23,
-    /** Binary property Noncharacter_Code_Point.
-        Code points that are explicitly defined as illegal
-        for the encoding of characters. @stable ICU 2.1 */
-    UCHAR_NONCHARACTER_CODE_POINT=24,
-    /** Binary property Quotation_Mark. @stable ICU 2.1 */
-    UCHAR_QUOTATION_MARK=25,
-    /** Binary property Radical (new in Unicode 3.2).
-        For programmatic determination of
-        Ideographic Description Sequences. @stable ICU 2.1 */
-    UCHAR_RADICAL=26,
-    /** Binary property Soft_Dotted (new in Unicode 3.2).
-        Characters with a "soft dot", like i or j.
-        An accent placed on these characters causes
-        the dot to disappear. @stable ICU 2.1 */
-    UCHAR_SOFT_DOTTED=27,
-    /** Binary property Terminal_Punctuation.
-        Punctuation characters that generally mark
-        the end of textual units. @stable ICU 2.1 */
-    UCHAR_TERMINAL_PUNCTUATION=28,
-    /** Binary property Unified_Ideograph (new in Unicode 3.2).
-        For programmatic determination of
-        Ideographic Description Sequences. @stable ICU 2.1 */
-    UCHAR_UNIFIED_IDEOGRAPH=29,
-    /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.
-        Lu+Other_Uppercase @stable ICU 2.1 */
-    UCHAR_UPPERCASE=30,
-    /** Binary property White_Space.
-        Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.
-        Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */
-    UCHAR_WHITE_SPACE=31,
-    /** Binary property XID_Continue.
-        ID_Continue modified to allow closure under
-        normalization forms NFKC and NFKD. @stable ICU 2.1 */
-    UCHAR_XID_CONTINUE=32,
-    /** Binary property XID_Start. ID_Start modified to allow
-        closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */
-    UCHAR_XID_START=33,
-    /** Binary property Case_Sensitive. Either the source of a case
-        mapping or _in_ the target of a case mapping. Not the same as
-        the general category Cased_Letter. @stable ICU 2.6 */
-   UCHAR_CASE_SENSITIVE=34,
-    /** Binary property STerm (new in Unicode 4.0.1).
-        Sentence Terminal. Used in UAX #29: Text Boundaries
-        (http://www.unicode.org/reports/tr29/)
-        @stable ICU 3.0 */
-    UCHAR_S_TERM=35,
-    /** Binary property Variation_Selector (new in Unicode 4.0.1).
-        Indicates all those characters that qualify as Variation Selectors.
-        For details on the behavior of these characters,
-        see StandardizedVariants.html and 15.6 Variation Selectors.
-        @stable ICU 3.0 */
-    UCHAR_VARIATION_SELECTOR=36,
-    /** Binary property NFD_Inert.
-        ICU-specific property for characters that are inert under NFD,
-        i.e., they do not interact with adjacent characters.
-        Used for example in normalizing transforms in incremental mode
-        to find the boundary of safely normalizable text despite possible
-        text additions.
-
-        There is one such property per normalization form.
-        These properties are computed as follows - an inert character is:
-        a) unassigned, or ALL of the following:
-        b) of combining class 0.
-        c) not decomposed by this normalization form.
-        AND if NFC or NFKC,
-        d) can never compose with a previous character.
-        e) can never compose with a following character.
-        f) can never change if another character is added.
-           Example: a-breve might satisfy all but f, but if you
-           add an ogonek it changes to a-ogonek + breve
-
-        See also com.ibm.text.UCD.NFSkippable in the ICU4J repository,
-        and icu/source/common/unormimp.h .
-        @stable ICU 3.0 */
-    UCHAR_NFD_INERT=37,
-    /** Binary property NFKD_Inert.
-        ICU-specific property for characters that are inert under NFKD,
-        i.e., they do not interact with adjacent characters.
-        Used for example in normalizing transforms in incremental mode
-        to find the boundary of safely normalizable text despite possible
-        text additions.
-        @see UCHAR_NFD_INERT
-        @stable ICU 3.0 */
-    UCHAR_NFKD_INERT=38,
-    /** Binary property NFC_Inert.
-        ICU-specific property for characters that are inert under NFC,
-        i.e., they do not interact with adjacent characters.
-        Used for example in normalizing transforms in incremental mode
-        to find the boundary of safely normalizable text despite possible
-        text additions.
-        @see UCHAR_NFD_INERT
-        @stable ICU 3.0 */
-    UCHAR_NFC_INERT=39,
-    /** Binary property NFKC_Inert.
-        ICU-specific property for characters that are inert under NFKC,
-        i.e., they do not interact with adjacent characters.
-        Used for example in normalizing transforms in incremental mode
-        to find the boundary of safely normalizable text despite possible
-        text additions.
-        @see UCHAR_NFD_INERT
-        @stable ICU 3.0 */
-    UCHAR_NFKC_INERT=40,
-    /** Binary Property Segment_Starter.
-        ICU-specific property for characters that are starters in terms of
-        Unicode normalization and combining character sequences.
-        They have ccc=0 and do not occur in non-initial position of the
-        canonical decomposition of any character
-        (like " in NFD(a-umlaut) and a Jamo T in an NFD(Hangul LVT)).
-        ICU uses this property for segmenting a string for generating a set of
-        canonically equivalent strings, e.g. for canonical closure while
-        processing collation tailoring rules.
-        @stable ICU 3.0 */
-    UCHAR_SEGMENT_STARTER=41,
-    /** Binary property Pattern_Syntax (new in Unicode 4.1).
-        See UAX #31 Identifier and Pattern Syntax
-        (http://www.unicode.org/reports/tr31/)
-        @stable ICU 3.4 */
-    UCHAR_PATTERN_SYNTAX=42,
-    /** Binary property Pattern_White_Space (new in Unicode 4.1).
-        See UAX #31 Identifier and Pattern Syntax
-        (http://www.unicode.org/reports/tr31/)
-        @stable ICU 3.4 */
-    UCHAR_PATTERN_WHITE_SPACE=43,
-    /** Binary property alnum (a C/POSIX character class).
-        Implemented according to the UTS #18 Annex C Standard Recommendation.
-        See the uchar.h file documentation.
-        @stable ICU 3.4 */
-    UCHAR_POSIX_ALNUM=44,
-    /** Binary property blank (a C/POSIX character class).
-        Implemented according to the UTS #18 Annex C Standard Recommendation.
-        See the uchar.h file documentation.
-        @stable ICU 3.4 */
-    UCHAR_POSIX_BLANK=45,
-    /** Binary property graph (a C/POSIX character class).
-        Implemented according to the UTS #18 Annex C Standard Recommendation.
-        See the uchar.h file documentation.
-        @stable ICU 3.4 */
-    UCHAR_POSIX_GRAPH=46,
-    /** Binary property print (a C/POSIX character class).
-        Implemented according to the UTS #18 Annex C Standard Recommendation.
-        See the uchar.h file documentation.
-        @stable ICU 3.4 */
-    UCHAR_POSIX_PRINT=47,
-    /** Binary property xdigit (a C/POSIX character class).
-        Implemented according to the UTS #18 Annex C Standard Recommendation.
-        See the uchar.h file documentation.
-        @stable ICU 3.4 */
-    UCHAR_POSIX_XDIGIT=48,
-    /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
-    UCHAR_BINARY_LIMIT=49,
-
-    /** Enumerated property Bidi_Class.
-        Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
-    UCHAR_BIDI_CLASS=0x1000,
-    /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
-    UCHAR_INT_START=UCHAR_BIDI_CLASS,
-    /** Enumerated property Block.
-        Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
-    UCHAR_BLOCK=0x1001,
-    /** Enumerated property Canonical_Combining_Class.
-        Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
-    UCHAR_CANONICAL_COMBINING_CLASS=0x1002,
-    /** Enumerated property Decomposition_Type.
-        Returns UDecompositionType values. @stable ICU 2.2 */
-    UCHAR_DECOMPOSITION_TYPE=0x1003,
-    /** Enumerated property East_Asian_Width.
-        See http://www.unicode.org/reports/tr11/
-        Returns UEastAsianWidth values. @stable ICU 2.2 */
-    UCHAR_EAST_ASIAN_WIDTH=0x1004,
-    /** Enumerated property General_Category.
-        Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
-    UCHAR_GENERAL_CATEGORY=0x1005,
-    /** Enumerated property Joining_Group.
-        Returns UJoiningGroup values. @stable ICU 2.2 */
-    UCHAR_JOINING_GROUP=0x1006,
-    /** Enumerated property Joining_Type.
-        Returns UJoiningType values. @stable ICU 2.2 */
-    UCHAR_JOINING_TYPE=0x1007,
-    /** Enumerated property Line_Break.
-        Returns ULineBreak values. @stable ICU 2.2 */
-    UCHAR_LINE_BREAK=0x1008,
-    /** Enumerated property Numeric_Type.
-        Returns UNumericType values. @stable ICU 2.2 */
-    UCHAR_NUMERIC_TYPE=0x1009,
-    /** Enumerated property Script.
-        Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
-    UCHAR_SCRIPT=0x100A,
-    /** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
-        Returns UHangulSyllableType values. @stable ICU 2.6 */
-    UCHAR_HANGUL_SYLLABLE_TYPE=0x100B,
-    /** Enumerated property NFD_Quick_Check.
-        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
-    UCHAR_NFD_QUICK_CHECK=0x100C,
-    /** Enumerated property NFKD_Quick_Check.
-        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
-    UCHAR_NFKD_QUICK_CHECK=0x100D,
-    /** Enumerated property NFC_Quick_Check.
-        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
-    UCHAR_NFC_QUICK_CHECK=0x100E,
-    /** Enumerated property NFKC_Quick_Check.
-        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
-    UCHAR_NFKC_QUICK_CHECK=0x100F,
-    /** Enumerated property Lead_Canonical_Combining_Class.
-        ICU-specific property for the ccc of the first code point
-        of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
-        Useful for checking for canonically ordered text;
-        see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
-        Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
-    UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010,
-    /** Enumerated property Trail_Canonical_Combining_Class.
-        ICU-specific property for the ccc of the last code point
-        of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
-        Useful for checking for canonically ordered text;
-        see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
-        Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
-    UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011,
-    /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
-        Used in UAX #29: Text Boundaries
-        (http://www.unicode.org/reports/tr29/)
-        Returns UGraphemeClusterBreak values. @stable ICU 3.4 */
-    UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012,
-    /** Enumerated property Sentence_Break (new in Unicode 4.1).
-        Used in UAX #29: Text Boundaries
-        (http://www.unicode.org/reports/tr29/)
-        Returns USentenceBreak values. @stable ICU 3.4 */
-    UCHAR_SENTENCE_BREAK=0x1013,
-    /** Enumerated property Word_Break (new in Unicode 4.1).
-        Used in UAX #29: Text Boundaries
-        (http://www.unicode.org/reports/tr29/)
-        Returns UWordBreakValues values. @stable ICU 3.4 */
-    UCHAR_WORD_BREAK=0x1014,
-    /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
-    UCHAR_INT_LIMIT=0x1015,
-
-    /** Bitmask property General_Category_Mask.
-        This is the General_Category property returned as a bit mask.
-        When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),
-        returns bit masks for UCharCategory values where exactly one bit is set.
-        When used with u_getPropertyValueName() and u_getPropertyValueEnum(),
-        a multi-bit mask is used for sets of categories like "Letters".
-        Mask values should be cast to uint32_t.
-        @stable ICU 2.4 */
-    UCHAR_GENERAL_CATEGORY_MASK=0x2000,
-    /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */
-    UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
-    /** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */
-    UCHAR_MASK_LIMIT=0x2001,
-
-    /** Double property Numeric_Value.
-        Corresponds to u_getNumericValue. @stable ICU 2.4 */
-    UCHAR_NUMERIC_VALUE=0x3000,
-    /** First constant for double Unicode properties. @stable ICU 2.4 */
-    UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
-    /** One more than the last constant for double Unicode properties. @stable ICU 2.4 */
-    UCHAR_DOUBLE_LIMIT=0x3001,
-
-    /** String property Age.
-        Corresponds to u_charAge. @stable ICU 2.4 */
-    UCHAR_AGE=0x4000,
-    /** First constant for string Unicode properties. @stable ICU 2.4 */
-    UCHAR_STRING_START=UCHAR_AGE,
-    /** String property Bidi_Mirroring_Glyph.
-        Corresponds to u_charMirror. @stable ICU 2.4 */
-    UCHAR_BIDI_MIRRORING_GLYPH=0x4001,
-    /** String property Case_Folding.
-        Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
-    UCHAR_CASE_FOLDING=0x4002,
-    /** String property ISO_Comment.
-        Corresponds to u_getISOComment. @stable ICU 2.4 */
-    UCHAR_ISO_COMMENT=0x4003,
-    /** String property Lowercase_Mapping.
-        Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
-    UCHAR_LOWERCASE_MAPPING=0x4004,
-    /** String property Name.
-        Corresponds to u_charName. @stable ICU 2.4 */
-    UCHAR_NAME=0x4005,
-    /** String property Simple_Case_Folding.
-        Corresponds to u_foldCase. @stable ICU 2.4 */
-    UCHAR_SIMPLE_CASE_FOLDING=0x4006,
-    /** String property Simple_Lowercase_Mapping.
-        Corresponds to u_tolower. @stable ICU 2.4 */
-    UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007,
-    /** String property Simple_Titlecase_Mapping.
-        Corresponds to u_totitle. @stable ICU 2.4 */
-    UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008,
-    /** String property Simple_Uppercase_Mapping.
-        Corresponds to u_toupper. @stable ICU 2.4 */
-    UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009,
-    /** String property Titlecase_Mapping.
-        Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
-    UCHAR_TITLECASE_MAPPING=0x400A,
-    /** String property Unicode_1_Name.
-        Corresponds to u_charName. @stable ICU 2.4 */
-    UCHAR_UNICODE_1_NAME=0x400B,
-    /** String property Uppercase_Mapping.
-        Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
-    UCHAR_UPPERCASE_MAPPING=0x400C,
-    /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */
-    UCHAR_STRING_LIMIT=0x400D,
-
-    /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
-    UCHAR_INVALID_CODE = -1
-} UProperty;
-
-/**
- * Data for enumerated Unicode general category types.
- * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
- * @stable ICU 2.0
- */
-typedef enum UCharCategory
-{
-    /** See note !!.  Comments of the form "Cn" are read by genpname. */
-
-    /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
-    U_UNASSIGNED              = 0,
-    /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */
-    U_GENERAL_OTHER_TYPES     = 0,
-    /** Lu @stable ICU 2.0 */
-    U_UPPERCASE_LETTER        = 1,
-    /** Ll @stable ICU 2.0 */
-    U_LOWERCASE_LETTER        = 2,
-    /** Lt @stable ICU 2.0 */
-    U_TITLECASE_LETTER        = 3,
-    /** Lm @stable ICU 2.0 */
-    U_MODIFIER_LETTER         = 4,
-    /** Lo @stable ICU 2.0 */
-    U_OTHER_LETTER            = 5,
-    /** Mn @stable ICU 2.0 */
-    U_NON_SPACING_MARK        = 6,
-    /** Me @stable ICU 2.0 */
-    U_ENCLOSING_MARK          = 7,
-    /** Mc @stable ICU 2.0 */
-    U_COMBINING_SPACING_MARK  = 8,
-    /** Nd @stable ICU 2.0 */
-    U_DECIMAL_DIGIT_NUMBER    = 9,
-    /** Nl @stable ICU 2.0 */
-    U_LETTER_NUMBER           = 10,
-    /** No @stable ICU 2.0 */
-    U_OTHER_NUMBER            = 11,
-    /** Zs @stable ICU 2.0 */
-    U_SPACE_SEPARATOR         = 12,
-    /** Zl @stable ICU 2.0 */
-    U_LINE_SEPARATOR          = 13,
-    /** Zp @stable ICU 2.0 */
-    U_PARAGRAPH_SEPARATOR     = 14,
-    /** Cc @stable ICU 2.0 */
-    U_CONTROL_CHAR            = 15,
-    /** Cf @stable ICU 2.0 */
-    U_FORMAT_CHAR             = 16,
-    /** Co @stable ICU 2.0 */
-    U_PRIVATE_USE_CHAR        = 17,
-    /** Cs @stable ICU 2.0 */
-    U_SURROGATE               = 18,
-    /** Pd @stable ICU 2.0 */
-    U_DASH_PUNCTUATION        = 19,
-    /** Ps @stable ICU 2.0 */
-    U_START_PUNCTUATION       = 20,
-    /** Pe @stable ICU 2.0 */
-    U_END_PUNCTUATION         = 21,
-    /** Pc @stable ICU 2.0 */
-    U_CONNECTOR_PUNCTUATION   = 22,
-    /** Po @stable ICU 2.0 */
-    U_OTHER_PUNCTUATION       = 23,
-    /** Sm @stable ICU 2.0 */
-    U_MATH_SYMBOL             = 24,
-    /** Sc @stable ICU 2.0 */
-    U_CURRENCY_SYMBOL         = 25,
-    /** Sk @stable ICU 2.0 */
-    U_MODIFIER_SYMBOL         = 26,
-    /** So @stable ICU 2.0 */
-    U_OTHER_SYMBOL            = 27,
-    /** Pi @stable ICU 2.0 */
-    U_INITIAL_PUNCTUATION     = 28,
-    /** Pf @stable ICU 2.0 */
-    U_FINAL_PUNCTUATION       = 29,
-    /** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */
-    U_CHAR_CATEGORY_COUNT
-} UCharCategory;
-
-/**
- * U_GC_XX_MASK constants are bit flags corresponding to Unicode
- * general category values.
- * For each category, the nth bit is set if the numeric value of the
- * corresponding UCharCategory constant is n.
- *
- * There are also some U_GC_Y_MASK constants for groups of general categories
- * like L for all letter categories.
- *
- * @see u_charType
- * @see U_GET_GC_MASK
- * @see UCharCategory
- * @stable ICU 2.1
- */
-#define U_GC_CN_MASK    U_MASK(U_GENERAL_OTHER_TYPES)
-
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_LU_MASK    U_MASK(U_UPPERCASE_LETTER)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_LL_MASK    U_MASK(U_LOWERCASE_LETTER)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_LT_MASK    U_MASK(U_TITLECASE_LETTER)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_LM_MASK    U_MASK(U_MODIFIER_LETTER)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_LO_MASK    U_MASK(U_OTHER_LETTER)
-
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_MN_MASK    U_MASK(U_NON_SPACING_MARK)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_ME_MASK    U_MASK(U_ENCLOSING_MARK)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_MC_MASK    U_MASK(U_COMBINING_SPACING_MARK)
-
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_ND_MASK    U_MASK(U_DECIMAL_DIGIT_NUMBER)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_NL_MASK    U_MASK(U_LETTER_NUMBER)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_NO_MASK    U_MASK(U_OTHER_NUMBER)
-
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_ZS_MASK    U_MASK(U_SPACE_SEPARATOR)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_ZL_MASK    U_MASK(U_LINE_SEPARATOR)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_ZP_MASK    U_MASK(U_PARAGRAPH_SEPARATOR)
-
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_CC_MASK    U_MASK(U_CONTROL_CHAR)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_CF_MASK    U_MASK(U_FORMAT_CHAR)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_CO_MASK    U_MASK(U_PRIVATE_USE_CHAR)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_CS_MASK    U_MASK(U_SURROGATE)
-
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_PD_MASK    U_MASK(U_DASH_PUNCTUATION)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_PS_MASK    U_MASK(U_START_PUNCTUATION)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_PE_MASK    U_MASK(U_END_PUNCTUATION)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_PC_MASK    U_MASK(U_CONNECTOR_PUNCTUATION)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_PO_MASK    U_MASK(U_OTHER_PUNCTUATION)
-
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_SM_MASK    U_MASK(U_MATH_SYMBOL)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_SC_MASK    U_MASK(U_CURRENCY_SYMBOL)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_SK_MASK    U_MASK(U_MODIFIER_SYMBOL)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_SO_MASK    U_MASK(U_OTHER_SYMBOL)
-
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_PI_MASK    U_MASK(U_INITIAL_PUNCTUATION)
-/** Mask constant for a UCharCategory. @stable ICU 2.1 */
-#define U_GC_PF_MASK    U_MASK(U_FINAL_PUNCTUATION)
-
-
-/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
-#define U_GC_L_MASK \
-            (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
-
-/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
-#define U_GC_LC_MASK \
-            (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
-
-/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
-#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
-
-/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
-#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
-
-/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
-#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
-
-/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */
-#define U_GC_C_MASK \
-            (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
-
-/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */
-#define U_GC_P_MASK \
-            (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
-             U_GC_PI_MASK|U_GC_PF_MASK)
-
-/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */
-#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
-
-/**
- * This specifies the language directional property of a character set.
- * @stable ICU 2.0
- */
-typedef enum UCharDirection {
-    /** See note !!.  Comments of the form "EN" are read by genpname. */
-
-    /** L @stable ICU 2.0 */
-    U_LEFT_TO_RIGHT               = 0,
-    /** R @stable ICU 2.0 */
-    U_RIGHT_TO_LEFT               = 1,
-    /** EN @stable ICU 2.0 */
-    U_EUROPEAN_NUMBER             = 2,
-    /** ES @stable ICU 2.0 */
-    U_EUROPEAN_NUMBER_SEPARATOR   = 3,
-    /** ET @stable ICU 2.0 */
-    U_EUROPEAN_NUMBER_TERMINATOR  = 4,
-    /** AN @stable ICU 2.0 */
-    U_ARABIC_NUMBER               = 5,
-    /** CS @stable ICU 2.0 */
-    U_COMMON_NUMBER_SEPARATOR     = 6,
-    /** B @stable ICU 2.0 */
-    U_BLOCK_SEPARATOR             = 7,
-    /** S @stable ICU 2.0 */
-    U_SEGMENT_SEPARATOR           = 8,
-    /** WS @stable ICU 2.0 */
-    U_WHITE_SPACE_NEUTRAL         = 9,
-    /** ON @stable ICU 2.0 */
-    U_OTHER_NEUTRAL               = 10,
-    /** LRE @stable ICU 2.0 */
-    U_LEFT_TO_RIGHT_EMBEDDING     = 11,
-    /** LRO @stable ICU 2.0 */
-    U_LEFT_TO_RIGHT_OVERRIDE      = 12,
-    /** AL @stable ICU 2.0 */
-    U_RIGHT_TO_LEFT_ARABIC        = 13,
-    /** RLE @stable ICU 2.0 */
-    U_RIGHT_TO_LEFT_EMBEDDING     = 14,
-    /** RLO @stable ICU 2.0 */
-    U_RIGHT_TO_LEFT_OVERRIDE      = 15,
-    /** PDF @stable ICU 2.0 */
-    U_POP_DIRECTIONAL_FORMAT      = 16,
-    /** NSM @stable ICU 2.0 */
-    U_DIR_NON_SPACING_MARK        = 17,
-    /** BN @stable ICU 2.0 */
-    U_BOUNDARY_NEUTRAL            = 18,
-    /** @stable ICU 2.0 */
-    U_CHAR_DIRECTION_COUNT
-} UCharDirection;
-
-/**
- * Constants for Unicode blocks, see the Unicode Data file Blocks.txt
- * @stable ICU 2.0
- */
-enum UBlockCode {
-
-    /** New No_Block value in Unicode 4. @stable ICU 2.6 */
-    UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
-
-    /** @stable ICU 2.0 */
-    UBLOCK_BASIC_LATIN = 1, /*[0000]*/ /*See note !!*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/
-
-    /**
-     * Unicode 3.2 renames this block to "Greek and Coptic".
-     * @stable ICU 2.0
-     */
-    UBLOCK_GREEK =8, /*[0370]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_CYRILLIC =9, /*[0400]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_ARMENIAN =10, /*[0530]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_HEBREW =11, /*[0590]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_ARABIC =12, /*[0600]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_SYRIAC =13, /*[0700]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_THAANA =14, /*[0780]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_DEVANAGARI =15, /*[0900]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_BENGALI =16, /*[0980]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_GURMUKHI =17, /*[0A00]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_GUJARATI =18, /*[0A80]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_ORIYA =19, /*[0B00]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_TAMIL =20, /*[0B80]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_TELUGU =21, /*[0C00]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_KANNADA =22, /*[0C80]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_MALAYALAM =23, /*[0D00]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_SINHALA =24, /*[0D80]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_THAI =25, /*[0E00]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_LAO =26, /*[0E80]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_TIBETAN =27, /*[0F00]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_MYANMAR =28, /*[1000]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_GEORGIAN =29, /*[10A0]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_HANGUL_JAMO =30, /*[1100]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_ETHIOPIC =31, /*[1200]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_CHEROKEE =32, /*[13A0]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_OGHAM =34, /*[1680]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_RUNIC =35, /*[16A0]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_KHMER =36, /*[1780]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_MONGOLIAN =37, /*[1800]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
-
-    /**
-     * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
-     * @stable ICU 2.0
-     */
-    UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_NUMBER_FORMS =45, /*[2150]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_ARROWS =46, /*[2190]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_BOX_DRAWING =52, /*[2500]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_DINGBATS =56, /*[2700]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_HIRAGANA =62, /*[3040]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_KATAKANA =63, /*[30A0]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_BOPOMOFO =64, /*[3100]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_KANBUN =66, /*[3190]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_YI_SYLLABLES =72, /*[A000]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_YI_RADICALS =73, /*[A490]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
-
-    /**
-     * Same as UBLOCK_PRIVATE_USE_AREA.
-     * Until Unicode 3.1.1, the corresponding block name was "Private Use",
-     * and multiple code point ranges had this block.
-     * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
-     * adds separate blocks for the supplementary PUAs.
-     *
-     * @stable ICU 2.0
-     */
-    UBLOCK_PRIVATE_USE = 78,
-    /**
-     * Same as UBLOCK_PRIVATE_USE.
-     * Until Unicode 3.1.1, the corresponding block name was "Private Use",
-     * and multiple code point ranges had this block.
-     * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
-     * adds separate blocks for the supplementary PUAs.
-     *
-     * @stable ICU 2.0
-     */
-    UBLOCK_PRIVATE_USE_AREA =UBLOCK_PRIVATE_USE, /*[E000]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_SPECIALS =86, /*[FFF0]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/
-
-    /* New blocks in Unicode 3.1 */
-
-    /** @stable ICU 2.0 */
-    UBLOCK_OLD_ITALIC = 88  , /*[10300]*/
-    /** @stable ICU 2.0 */
-    UBLOCK_GOTHIC = 89 , /*[10330]*/
-    /** @stable ICU 2.0 */
-    UBLOCK_DESERET = 90 , /*[10400]*/
-    /** @stable ICU 2.0 */
-    UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 , /*[1D000]*/
-    /** @stable ICU 2.0 */
-    UBLOCK_MUSICAL_SYMBOLS = 92 , /*[1D100]*/
-    /** @stable ICU 2.0 */
-    UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93  , /*[1D400]*/
-    /** @stable ICU 2.0 */
-    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B  = 94 , /*[20000]*/
-    /** @stable ICU 2.0 */
-    UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 , /*[2F800]*/
-    /** @stable ICU 2.0 */
-    UBLOCK_TAGS = 96, /*[E0000]*/
-
-    /* New blocks in Unicode 3.2 */
-
-    /**
-     * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
-     * @stable ICU 2.2
-     */
-    UBLOCK_CYRILLIC_SUPPLEMENTARY = 97, 
-    /** @stable ICU 3.0  */
-    UBLOCK_CYRILLIC_SUPPLEMENT = UBLOCK_CYRILLIC_SUPPLEMENTARY, /*[0500]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_TAGALOG = 98, /*[1700]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_HANUNOO = 99, /*[1720]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_BUHID = 100, /*[1740]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_TAGBANWA = 101, /*[1760]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
-    /** @stable ICU 2.2 */
-    UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/
-
-    /* New blocks in Unicode 4 */
-
-    /** @stable ICU 2.6 */
-    UBLOCK_LIMBU = 111, /*[1900]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_TAI_LE = 112, /*[1950]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_UGARITIC = 120, /*[10380]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_SHAVIAN = 121, /*[10450]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_OSMANYA = 122, /*[10480]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
-    /** @stable ICU 2.6 */
-    UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/
-
-    /* New blocks in Unicode 4.1 */
-
-    /** @stable ICU 3.4 */
-    UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_BUGINESE = 129, /*[1A00]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_CJK_STROKES = 130, /*[31C0]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_COPTIC = 132, /*[2C80]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_TIFINAGH = 144, /*[2D30]*/
-    /** @stable ICU 3.4 */
-    UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/
-
-    /* New blocks in Unicode 5.0 */
-
-    /** @stable ICU 3.6 */
-    UBLOCK_NKO = 146, /*[07C0]*/
-    /** @stable ICU 3.6 */
-    UBLOCK_BALINESE = 147, /*[1B00]*/
-    /** @stable ICU 3.6 */
-    UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/
-    /** @stable ICU 3.6 */
-    UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/
-    /** @stable ICU 3.6 */
-    UBLOCK_PHAGS_PA = 150, /*[A840]*/
-    /** @stable ICU 3.6 */
-    UBLOCK_PHOENICIAN = 151, /*[10900]*/
-    /** @stable ICU 3.6 */
-    UBLOCK_CUNEIFORM = 152, /*[12000]*/
-    /** @stable ICU 3.6 */
-    UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/
-    /** @stable ICU 3.6 */
-    UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/
-
-    /* New blocks in Unicode 5.1 */
-
-    /** @draft ICU 4.0 */
-    UBLOCK_SUNDANESE = 155, /*[1B80]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_LEPCHA = 156, /*[1C00]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_OL_CHIKI = 157, /*[1C50]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_VAI = 159, /*[A500]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_SAURASHTRA = 161, /*[A880]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_KAYAH_LI = 162, /*[A900]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_REJANG = 163, /*[A930]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_CHAM = 164, /*[AA00]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_LYCIAN = 167, /*[10280]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_CARIAN = 168, /*[102A0]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_LYDIAN = 169, /*[10920]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/
-    /** @draft ICU 4.0 */
-    UBLOCK_DOMINO_TILES = 171, /*[1F030]*/
-
-    /** @stable ICU 2.0 */
-    UBLOCK_COUNT = 172,
-
-    /** @stable ICU 2.0 */
-    UBLOCK_INVALID_CODE=-1
-};
-
-/** @stable ICU 2.0 */
-typedef enum UBlockCode UBlockCode;
-
-/**
- * East Asian Width constants.
- *
- * @see UCHAR_EAST_ASIAN_WIDTH
- * @see u_getIntPropertyValue
- * @stable ICU 2.2
- */
-typedef enum UEastAsianWidth {
-    U_EA_NEUTRAL,   /*[N]*/ /*See note !!*/
-    U_EA_AMBIGUOUS, /*[A]*/
-    U_EA_HALFWIDTH, /*[H]*/
-    U_EA_FULLWIDTH, /*[F]*/
-    U_EA_NARROW,    /*[Na]*/
-    U_EA_WIDE,      /*[W]*/
-    U_EA_COUNT
-} UEastAsianWidth;
-/*
- * Implementation note:
- * Keep UEastAsianWidth constant values in sync with names list in genprops/props2.c.
- */
-
-/**
- * Selector constants for u_charName().
- * u_charName() returns the "modern" name of a
- * Unicode character; or the name that was defined in
- * Unicode version 1.0, before the Unicode standard merged
- * with ISO-10646; or an "extended" name that gives each
- * Unicode code point a unique name.
- *
- * @see u_charName
- * @stable ICU 2.0
- */
-typedef enum UCharNameChoice {
-    U_UNICODE_CHAR_NAME,
-    U_UNICODE_10_CHAR_NAME,
-    U_EXTENDED_CHAR_NAME,
-    U_CHAR_NAME_CHOICE_COUNT
-} UCharNameChoice;
-
-/**
- * Selector constants for u_getPropertyName() and
- * u_getPropertyValueName().  These selectors are used to choose which
- * name is returned for a given property or value.  All properties and
- * values have a long name.  Most have a short name, but some do not.
- * Unicode allows for additional names, beyond the long and short
- * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where
- * i=1, 2,...
- *
- * @see u_getPropertyName()
- * @see u_getPropertyValueName()
- * @stable ICU 2.4
- */
-typedef enum UPropertyNameChoice {
-    U_SHORT_PROPERTY_NAME,
-    U_LONG_PROPERTY_NAME,
-    U_PROPERTY_NAME_CHOICE_COUNT
-} UPropertyNameChoice;
-
-/**
- * Decomposition Type constants.
- *
- * @see UCHAR_DECOMPOSITION_TYPE
- * @stable ICU 2.2
- */
-typedef enum UDecompositionType {
-    U_DT_NONE,              /*[none]*/ /*See note !!*/
-    U_DT_CANONICAL,         /*[can]*/
-    U_DT_COMPAT,            /*[com]*/
-    U_DT_CIRCLE,            /*[enc]*/
-    U_DT_FINAL,             /*[fin]*/
-    U_DT_FONT,              /*[font]*/
-    U_DT_FRACTION,          /*[fra]*/
-    U_DT_INITIAL,           /*[init]*/
-    U_DT_ISOLATED,          /*[iso]*/
-    U_DT_MEDIAL,            /*[med]*/
-    U_DT_NARROW,            /*[nar]*/
-    U_DT_NOBREAK,           /*[nb]*/
-    U_DT_SMALL,             /*[sml]*/
-    U_DT_SQUARE,            /*[sqr]*/
-    U_DT_SUB,               /*[sub]*/
-    U_DT_SUPER,             /*[sup]*/
-    U_DT_VERTICAL,          /*[vert]*/
-    U_DT_WIDE,              /*[wide]*/
-    U_DT_COUNT /* 18 */
-} UDecompositionType;
-
-/**
- * Joining Type constants.
- *
- * @see UCHAR_JOINING_TYPE
- * @stable ICU 2.2
- */
-typedef enum UJoiningType {
-    U_JT_NON_JOINING,       /*[U]*/ /*See note !!*/
-    U_JT_JOIN_CAUSING,      /*[C]*/
-    U_JT_DUAL_JOINING,      /*[D]*/
-    U_JT_LEFT_JOINING,      /*[L]*/
-    U_JT_RIGHT_JOINING,     /*[R]*/
-    U_JT_TRANSPARENT,       /*[T]*/
-    U_JT_COUNT /* 6 */
-} UJoiningType;
-
-/**
- * Joining Group constants.
- *
- * @see UCHAR_JOINING_GROUP
- * @stable ICU 2.2
- */
-typedef enum UJoiningGroup {
-    U_JG_NO_JOINING_GROUP,
-    U_JG_AIN,
-    U_JG_ALAPH,
-    U_JG_ALEF,
-    U_JG_BEH,
-    U_JG_BETH,
-    U_JG_DAL,
-    U_JG_DALATH_RISH,
-    U_JG_E,
-    U_JG_FEH,
-    U_JG_FINAL_SEMKATH,
-    U_JG_GAF,
-    U_JG_GAMAL,
-    U_JG_HAH,
-    U_JG_HAMZA_ON_HEH_GOAL,
-    U_JG_HE,
-    U_JG_HEH,
-    U_JG_HEH_GOAL,
-    U_JG_HETH,
-    U_JG_KAF,
-    U_JG_KAPH,
-    U_JG_KNOTTED_HEH,
-    U_JG_LAM,
-    U_JG_LAMADH,
-    U_JG_MEEM,
-    U_JG_MIM,
-    U_JG_NOON,
-    U_JG_NUN,
-    U_JG_PE,
-    U_JG_QAF,
-    U_JG_QAPH,
-    U_JG_REH,
-    U_JG_REVERSED_PE,
-    U_JG_SAD,
-    U_JG_SADHE,
-    U_JG_SEEN,
-    U_JG_SEMKATH,
-    U_JG_SHIN,
-    U_JG_SWASH_KAF,
-    U_JG_SYRIAC_WAW,
-    U_JG_TAH,
-    U_JG_TAW,
-    U_JG_TEH_MARBUTA,
-    U_JG_TETH,
-    U_JG_WAW,
-    U_JG_YEH,
-    U_JG_YEH_BARREE,
-    U_JG_YEH_WITH_TAIL,
-    U_JG_YUDH,
-    U_JG_YUDH_HE,
-    U_JG_ZAIN,
-    U_JG_FE,        /**< @stable ICU 2.6 */
-    U_JG_KHAPH,     /**< @stable ICU 2.6 */
-    U_JG_ZHAIN,     /**< @stable ICU 2.6 */
-    U_JG_BURUSHASKI_YEH_BARREE, /**< @draft ICU 4.0 */
-    U_JG_COUNT
-} UJoiningGroup;
-
-/**
- * Grapheme Cluster Break constants.
- *
- * @see UCHAR_GRAPHEME_CLUSTER_BREAK
- * @stable ICU 3.4
- */
-typedef enum UGraphemeClusterBreak {
-    U_GCB_OTHER = 0,            /*[XX]*/ /*See note !!*/
-    U_GCB_CONTROL = 1,          /*[CN]*/
-    U_GCB_CR = 2,               /*[CR]*/
-    U_GCB_EXTEND = 3,           /*[EX]*/
-    U_GCB_L = 4,                /*[L]*/
-    U_GCB_LF = 5,               /*[LF]*/
-    U_GCB_LV = 6,               /*[LV]*/
-    U_GCB_LVT = 7,              /*[LVT]*/
-    U_GCB_T = 8,                /*[T]*/
-    U_GCB_V = 9,                /*[V]*/
-    U_GCB_SPACING_MARK = 10,    /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
-    U_GCB_PREPEND = 11,         /*[PP]*/
-    U_GCB_COUNT = 12
-} UGraphemeClusterBreak;
-
-/**
- * Word Break constants.
- * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)
- *
- * @see UCHAR_WORD_BREAK
- * @stable ICU 3.4
- */
-typedef enum UWordBreakValues {
-    U_WB_OTHER = 0,             /*[XX]*/ /*See note !!*/
-    U_WB_ALETTER = 1,           /*[LE]*/
-    U_WB_FORMAT = 2,            /*[FO]*/
-    U_WB_KATAKANA = 3,          /*[KA]*/
-    U_WB_MIDLETTER = 4,         /*[ML]*/
-    U_WB_MIDNUM = 5,            /*[MN]*/
-    U_WB_NUMERIC = 6,           /*[NU]*/
-    U_WB_EXTENDNUMLET = 7,      /*[EX]*/
-    U_WB_CR = 8,                /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
-    U_WB_EXTEND = 9,            /*[Extend]*/
-    U_WB_LF = 10,               /*[LF]*/
-    U_WB_MIDNUMLET =11,         /*[MB]*/
-    U_WB_NEWLINE =12,           /*[NL]*/
-    U_WB_COUNT = 13
-} UWordBreakValues;
-
-/**
- * Sentence Break constants.
- *
- * @see UCHAR_SENTENCE_BREAK
- * @stable ICU 3.4
- */
-typedef enum USentenceBreak {
-    U_SB_OTHER = 0,             /*[XX]*/ /*See note !!*/
-    U_SB_ATERM = 1,             /*[AT]*/
-    U_SB_CLOSE = 2,             /*[CL]*/
-    U_SB_FORMAT = 3,            /*[FO]*/
-    U_SB_LOWER = 4,             /*[LO]*/
-    U_SB_NUMERIC = 5,           /*[NU]*/
-    U_SB_OLETTER = 6,           /*[LE]*/
-    U_SB_SEP = 7,               /*[SE]*/
-    U_SB_SP = 8,                /*[SP]*/
-    U_SB_STERM = 9,             /*[ST]*/
-    U_SB_UPPER = 10,            /*[UP]*/
-    U_SB_CR = 11,               /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
-    U_SB_EXTEND = 12,           /*[EX]*/
-    U_SB_LF = 13,               /*[LF]*/
-    U_SB_SCONTINUE = 14,        /*[SC]*/
-    U_SB_COUNT = 15
-} USentenceBreak;
-
-/**
- * Line Break constants.
- *
- * @see UCHAR_LINE_BREAK
- * @stable ICU 2.2
- */
-typedef enum ULineBreak {
-    U_LB_UNKNOWN = 0,           /*[XX]*/ /*See note !!*/
-    U_LB_AMBIGUOUS = 1,         /*[AI]*/
-    U_LB_ALPHABETIC = 2,        /*[AL]*/
-    U_LB_BREAK_BOTH = 3,        /*[B2]*/
-    U_LB_BREAK_AFTER = 4,       /*[BA]*/
-    U_LB_BREAK_BEFORE = 5,      /*[BB]*/
-    U_LB_MANDATORY_BREAK = 6,   /*[BK]*/
-    U_LB_CONTINGENT_BREAK = 7,  /*[CB]*/
-    U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/
-    U_LB_COMBINING_MARK = 9,    /*[CM]*/
-    U_LB_CARRIAGE_RETURN = 10,   /*[CR]*/
-    U_LB_EXCLAMATION = 11,       /*[EX]*/
-    U_LB_GLUE = 12,              /*[GL]*/
-    U_LB_HYPHEN = 13,            /*[HY]*/
-    U_LB_IDEOGRAPHIC = 14,       /*[ID]*/
-    U_LB_INSEPERABLE = 15,
-    /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */
-    U_LB_INSEPARABLE=U_LB_INSEPERABLE,/*[IN]*/
-    U_LB_INFIX_NUMERIC = 16,     /*[IS]*/
-    U_LB_LINE_FEED = 17,         /*[LF]*/
-    U_LB_NONSTARTER = 18,        /*[NS]*/
-    U_LB_NUMERIC = 19,           /*[NU]*/
-    U_LB_OPEN_PUNCTUATION = 20,  /*[OP]*/
-    U_LB_POSTFIX_NUMERIC = 21,   /*[PO]*/
-    U_LB_PREFIX_NUMERIC = 22,    /*[PR]*/
-    U_LB_QUOTATION = 23,         /*[QU]*/
-    U_LB_COMPLEX_CONTEXT = 24,   /*[SA]*/
-    U_LB_SURROGATE = 25,         /*[SG]*/
-    U_LB_SPACE = 26,             /*[SP]*/
-    U_LB_BREAK_SYMBOLS = 27,     /*[SY]*/
-    U_LB_ZWSPACE = 28,           /*[ZW]*/
-    U_LB_NEXT_LINE = 29,         /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
-    U_LB_WORD_JOINER = 30,       /*[WJ]*/
-    U_LB_H2 = 31,                /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
-    U_LB_H3 = 32,                /*[H3]*/
-    U_LB_JL = 33,                /*[JL]*/
-    U_LB_JT = 34,                /*[JT]*/
-    U_LB_JV = 35,                /*[JV]*/
-    U_LB_COUNT = 36
-} ULineBreak;
-
-/**
- * Numeric Type constants.
- *
- * @see UCHAR_NUMERIC_TYPE
- * @stable ICU 2.2
- */
-typedef enum UNumericType {
-    U_NT_NONE,              /*[None]*/ /*See note !!*/
-    U_NT_DECIMAL,           /*[de]*/
-    U_NT_DIGIT,             /*[di]*/
-    U_NT_NUMERIC,           /*[nu]*/
-    U_NT_COUNT
-} UNumericType;
-
-/**
- * Hangul Syllable Type constants.
- *
- * @see UCHAR_HANGUL_SYLLABLE_TYPE
- * @stable ICU 2.6
- */
-typedef enum UHangulSyllableType {
-    U_HST_NOT_APPLICABLE,   /*[NA]*/ /*See note !!*/
-    U_HST_LEADING_JAMO,     /*[L]*/
-    U_HST_VOWEL_JAMO,       /*[V]*/
-    U_HST_TRAILING_JAMO,    /*[T]*/
-    U_HST_LV_SYLLABLE,      /*[LV]*/
-    U_HST_LVT_SYLLABLE,     /*[LVT]*/
-    U_HST_COUNT
-} UHangulSyllableType;
-
-/**
- * Check a binary Unicode property for a code point.
- *
- * Unicode, especially in version 3.2, defines many more properties than the
- * original set in UnicodeData.txt.
- *
- * The properties APIs are intended to reflect Unicode properties as defined
- * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
- * For details about the properties see http://www.unicode.org/ucd/ .
- * For names of Unicode properties see the UCD file PropertyAliases.txt.
- *
- * Important: If ICU is built with UCD files from Unicode versions below 3.2,
- * then properties marked with "new in Unicode 3.2" are not or not fully available.
- *
- * @param c Code point to test.
- * @param which UProperty selector constant, identifies which binary property to check.
- *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT.
- * @return TRUE or FALSE according to the binary Unicode property value for c.
- *         Also FALSE if 'which' is out of bounds or if the Unicode version
- *         does not have data for the property at all, or not for this code point.
- *
- * @see UProperty
- * @see u_getIntPropertyValue
- * @see u_getUnicodeVersion
- * @stable ICU 2.1
- */
-U_STABLE UBool U_EXPORT2
-u_hasBinaryProperty(UChar32 c, UProperty which);
-
-/**
- * Check if a code point has the Alphabetic Unicode property.
- * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
- * This is different from u_isalpha!
- * @param c Code point to test
- * @return true if the code point has the Alphabetic Unicode property, false otherwise
- *
- * @see UCHAR_ALPHABETIC
- * @see u_isalpha
- * @see u_hasBinaryProperty
- * @stable ICU 2.1
- */
-U_STABLE UBool U_EXPORT2
-u_isUAlphabetic(UChar32 c);
-
-/**
- * Check if a code point has the Lowercase Unicode property.
- * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE).
- * This is different from u_islower!
- * @param c Code point to test
- * @return true if the code point has the Lowercase Unicode property, false otherwise
- *
- * @see UCHAR_LOWERCASE
- * @see u_islower
- * @see u_hasBinaryProperty
- * @stable ICU 2.1
- */
-U_STABLE UBool U_EXPORT2
-u_isULowercase(UChar32 c);
-
-/**
- * Check if a code point has the Uppercase Unicode property.
- * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE).
- * This is different from u_isupper!
- * @param c Code point to test
- * @return true if the code point has the Uppercase Unicode property, false otherwise
- *
- * @see UCHAR_UPPERCASE
- * @see u_isupper
- * @see u_hasBinaryProperty
- * @stable ICU 2.1
- */
-U_STABLE UBool U_EXPORT2
-u_isUUppercase(UChar32 c);
-
-/**
- * Check if a code point has the White_Space Unicode property.
- * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE).
- * This is different from both u_isspace and u_isWhitespace!
- *
- * Note: There are several ICU whitespace functions; please see the uchar.h
- * file documentation for a detailed comparison.
- *
- * @param c Code point to test
- * @return true if the code point has the White_Space Unicode property, false otherwise.
- *
- * @see UCHAR_WHITE_SPACE
- * @see u_isWhitespace
- * @see u_isspace
- * @see u_isJavaSpaceChar
- * @see u_hasBinaryProperty
- * @stable ICU 2.1
- */
-U_STABLE UBool U_EXPORT2
-u_isUWhiteSpace(UChar32 c);
-
-/**
- * Get the property value for an enumerated or integer Unicode property for a code point.
- * Also returns binary and mask property values.
- *
- * Unicode, especially in version 3.2, defines many more properties than the
- * original set in UnicodeData.txt.
- *
- * The properties APIs are intended to reflect Unicode properties as defined
- * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
- * For details about the properties see http://www.unicode.org/ .
- * For names of Unicode properties see the UCD file PropertyAliases.txt.
- *
- * Sample usage:
- * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH);
- * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);
- *
- * @param c Code point to test.
- * @param which UProperty selector constant, identifies which property to check.
- *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
- *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
- *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
- * @return Numeric value that is directly the property value or,
- *         for enumerated properties, corresponds to the numeric value of the enumerated
- *         constant of the respective property value enumeration type
- *         (cast to enum type if necessary).
- *         Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties.
- *         Returns a bit-mask for mask properties.
- *         Returns 0 if 'which' is out of bounds or if the Unicode version
- *         does not have data for the property at all, or not for this code point.
- *
- * @see UProperty
- * @see u_hasBinaryProperty
- * @see u_getIntPropertyMinValue
- * @see u_getIntPropertyMaxValue
- * @see u_getUnicodeVersion
- * @stable ICU 2.2
- */
-U_STABLE int32_t U_EXPORT2
-u_getIntPropertyValue(UChar32 c, UProperty which);
-
-/**
- * Get the minimum value for an enumerated/integer/binary Unicode property.
- * Can be used together with u_getIntPropertyMaxValue
- * to allocate arrays of UnicodeSet or similar.
- *
- * @param which UProperty selector constant, identifies which binary property to check.
- *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
- *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
- * @return Minimum value returned by u_getIntPropertyValue for a Unicode property.
- *         0 if the property selector is out of range.
- *
- * @see UProperty
- * @see u_hasBinaryProperty
- * @see u_getUnicodeVersion
- * @see u_getIntPropertyMaxValue
- * @see u_getIntPropertyValue
- * @stable ICU 2.2
- */
-U_STABLE int32_t U_EXPORT2
-u_getIntPropertyMinValue(UProperty which);
-
-/**
- * Get the maximum value for an enumerated/integer/binary Unicode property.
- * Can be used together with u_getIntPropertyMinValue
- * to allocate arrays of UnicodeSet or similar.
- *
- * Examples for min/max values (for Unicode 3.2):
- *
- * - UCHAR_BIDI_CLASS:    0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
- * - UCHAR_SCRIPT:        0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
- * - UCHAR_IDEOGRAPHIC:   0/1  (FALSE/TRUE)
- *
- * For undefined UProperty constant values, min/max values will be 0/-1.
- *
- * @param which UProperty selector constant, identifies which binary property to check.
- *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
- *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
- * @return Maximum value returned by u_getIntPropertyValue for a Unicode property.
- *         <=0 if the property selector is out of range.
- *
- * @see UProperty
- * @see u_hasBinaryProperty
- * @see u_getUnicodeVersion
- * @see u_getIntPropertyMaxValue
- * @see u_getIntPropertyValue
- * @stable ICU 2.2
- */
-U_STABLE int32_t U_EXPORT2
-u_getIntPropertyMaxValue(UProperty which);
-
-/**
- * Get the numeric value for a Unicode code point as defined in the
- * Unicode Character Database.
- *
- * A "double" return type is necessary because
- * some numeric values are fractions, negative, or too large for int32_t.
- *
- * For characters without any numeric values in the Unicode Character Database,
- * this function will return U_NO_NUMERIC_VALUE.
- *
- * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()
- * also supports negative values, large values, and fractions,
- * while Java's getNumericValue() returns values 10..35 for ASCII letters.
- *
- * @param c Code point to get the numeric value for.
- * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
- *
- * @see U_NO_NUMERIC_VALUE
- * @stable ICU 2.2
- */
-U_STABLE double U_EXPORT2
-u_getNumericValue(UChar32 c);
-
-/**
- * Special value that is returned by u_getNumericValue when
- * no numeric value is defined for a code point.
- *
- * @see u_getNumericValue
- * @stable ICU 2.2
- */
-#define U_NO_NUMERIC_VALUE ((double)-123456789.)
-
-/**
- * Determines whether the specified code point has the general category "Ll"
- * (lowercase letter).
- *
- * Same as java.lang.Character.isLowerCase().
- *
- * This misses some characters that are also lowercase but
- * have a different general category value.
- * In order to include those, use UCHAR_LOWERCASE.
- *
- * In addition to being equivalent to a Java function, this also serves
- * as a C/POSIX migration function.
- * See the comments about C/POSIX character classification functions in the
- * documentation at the top of this header file.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is an Ll lowercase letter
- *
- * @see UCHAR_LOWERCASE
- * @see u_isupper
- * @see u_istitle
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_islower(UChar32 c);
-
-/**
- * Determines whether the specified code point has the general category "Lu"
- * (uppercase letter).
- *
- * Same as java.lang.Character.isUpperCase().
- *
- * This misses some characters that are also uppercase but
- * have a different general category value.
- * In order to include those, use UCHAR_UPPERCASE.
- *
- * In addition to being equivalent to a Java function, this also serves
- * as a C/POSIX migration function.
- * See the comments about C/POSIX character classification functions in the
- * documentation at the top of this header file.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is an Lu uppercase letter
- *
- * @see UCHAR_UPPERCASE
- * @see u_islower
- * @see u_istitle
- * @see u_tolower
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isupper(UChar32 c);
-
-/**
- * Determines whether the specified code point is a titlecase letter.
- * True for general category "Lt" (titlecase letter).
- *
- * Same as java.lang.Character.isTitleCase().
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is an Lt titlecase letter
- *
- * @see u_isupper
- * @see u_islower
- * @see u_totitle
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_istitle(UChar32 c);
-
-/**
- * Determines whether the specified code point is a digit character according to Java.
- * True for characters with general category "Nd" (decimal digit numbers).
- * Beginning with Unicode 4, this is the same as
- * testing for the Numeric_Type of Decimal.
- *
- * Same as java.lang.Character.isDigit().
- *
- * In addition to being equivalent to a Java function, this also serves
- * as a C/POSIX migration function.
- * See the comments about C/POSIX character classification functions in the
- * documentation at the top of this header file.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is a digit character according to Character.isDigit()
- *
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isdigit(UChar32 c);
-
-/**
- * Determines whether the specified code point is a letter character.
- * True for general categories "L" (letters).
- *
- * Same as java.lang.Character.isLetter().
- *
- * In addition to being equivalent to a Java function, this also serves
- * as a C/POSIX migration function.
- * See the comments about C/POSIX character classification functions in the
- * documentation at the top of this header file.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is a letter character
- *
- * @see u_isdigit
- * @see u_isalnum
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isalpha(UChar32 c);
-
-/**
- * Determines whether the specified code point is an alphanumeric character
- * (letter or digit) according to Java.
- * True for characters with general categories
- * "L" (letters) and "Nd" (decimal digit numbers).
- *
- * Same as java.lang.Character.isLetterOrDigit().
- *
- * In addition to being equivalent to a Java function, this also serves
- * as a C/POSIX migration function.
- * See the comments about C/POSIX character classification functions in the
- * documentation at the top of this header file.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit()
- *
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isalnum(UChar32 c);
-
-/**
- * Determines whether the specified code point is a hexadecimal digit.
- * This is equivalent to u_digit(c, 16)>=0.
- * True for characters with general category "Nd" (decimal digit numbers)
- * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
- * (That is, for letters with code points
- * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
- *
- * In order to narrow the definition of hexadecimal digits to only ASCII
- * characters, use (c<=0x7f && u_isxdigit(c)).
- *
- * This is a C/POSIX migration function.
- * See the comments about C/POSIX character classification functions in the
- * documentation at the top of this header file.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is a hexadecimal digit
- *
- * @stable ICU 2.6
- */
-U_STABLE UBool U_EXPORT2
-u_isxdigit(UChar32 c);
-
-/**
- * Determines whether the specified code point is a punctuation character.
- * True for characters with general categories "P" (punctuation).
- *
- * This is a C/POSIX migration function.
- * See the comments about C/POSIX character classification functions in the
- * documentation at the top of this header file.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is a punctuation character
- *
- * @stable ICU 2.6
- */
-U_STABLE UBool U_EXPORT2
-u_ispunct(UChar32 c);
-
-/**
- * Determines whether the specified code point is a "graphic" character
- * (printable, excluding spaces).
- * TRUE for all characters except those with general categories
- * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates),
- * "Cn" (unassigned), and "Z" (separators).
- *
- * This is a C/POSIX migration function.
- * See the comments about C/POSIX character classification functions in the
- * documentation at the top of this header file.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is a "graphic" character
- *
- * @stable ICU 2.6
- */
-U_STABLE UBool U_EXPORT2
-u_isgraph(UChar32 c);
-
-/**
- * Determines whether the specified code point is a "blank" or "horizontal space",
- * a character that visibly separates words on a line.
- * The following are equivalent definitions:
- *
- * TRUE for Unicode White_Space characters except for "vertical space controls"
- * where "vertical space controls" are the following characters:
- * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)
- *
- * same as
- *
- * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators)
- * except Zero Width Space (ZWSP, U+200B).
- *
- * Note: There are several ICU whitespace functions; please see the uchar.h
- * file documentation for a detailed comparison.
- *
- * This is a C/POSIX migration function.
- * See the comments about C/POSIX character classification functions in the
- * documentation at the top of this header file.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is a "blank"
- *
- * @stable ICU 2.6
- */
-U_STABLE UBool U_EXPORT2
-u_isblank(UChar32 c);
-
-/**
- * Determines whether the specified code point is "defined",
- * which usually means that it is assigned a character.
- * True for general categories other than "Cn" (other, not assigned),
- * i.e., true for all code points mentioned in UnicodeData.txt.
- *
- * Note that non-character code points (e.g., U+FDD0) are not "defined"
- * (they are Cn), but surrogate code points are "defined" (Cs).
- *
- * Same as java.lang.Character.isDefined().
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is assigned a character
- *
- * @see u_isdigit
- * @see u_isalpha
- * @see u_isalnum
- * @see u_isupper
- * @see u_islower
- * @see u_istitle
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isdefined(UChar32 c);
-
-/**
- * Determines if the specified character is a space character or not.
- *
- * Note: There are several ICU whitespace functions; please see the uchar.h
- * file documentation for a detailed comparison.
- *
- * This is a C/POSIX migration function.
- * See the comments about C/POSIX character classification functions in the
- * documentation at the top of this header file.
- *
- * @param c    the character to be tested
- * @return  true if the character is a space character; false otherwise.
- *
- * @see u_isJavaSpaceChar
- * @see u_isWhitespace
- * @see u_isUWhiteSpace
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isspace(UChar32 c);
-
-/**
- * Determine if the specified code point is a space character according to Java.
- * True for characters with general categories "Z" (separators),
- * which does not include control codes (e.g., TAB or Line Feed).
- *
- * Same as java.lang.Character.isSpaceChar().
- *
- * Note: There are several ICU whitespace functions; please see the uchar.h
- * file documentation for a detailed comparison.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is a space character according to Character.isSpaceChar()
- *
- * @see u_isspace
- * @see u_isWhitespace
- * @see u_isUWhiteSpace
- * @stable ICU 2.6
- */
-U_STABLE UBool U_EXPORT2
-u_isJavaSpaceChar(UChar32 c);
-
-/**
- * Determines if the specified code point is a whitespace character according to Java/ICU.
- * A character is considered to be a Java whitespace character if and only
- * if it satisfies one of the following criteria:
- *
- * - It is a Unicode separator (categories "Z"), but is not
- *      a no-break space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
- * - It is U+0009 HORIZONTAL TABULATION.
- * - It is U+000A LINE FEED.
- * - It is U+000B VERTICAL TABULATION.
- * - It is U+000C FORM FEED.
- * - It is U+000D CARRIAGE RETURN.
- * - It is U+001C FILE SEPARATOR.
- * - It is U+001D GROUP SEPARATOR.
- * - It is U+001E RECORD SEPARATOR.
- * - It is U+001F UNIT SEPARATOR.
- * - It is U+0085 NEXT LINE.
- *
- * Same as java.lang.Character.isWhitespace() except that Java omits U+0085.
- *
- * Note: There are several ICU whitespace functions; please see the uchar.h
- * file documentation for a detailed comparison.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is a whitespace character according to Java/ICU
- *
- * @see u_isspace
- * @see u_isJavaSpaceChar
- * @see u_isUWhiteSpace
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isWhitespace(UChar32 c);
-
-/**
- * Determines whether the specified code point is a control character
- * (as defined by this function).
- * A control character is one of the following:
- * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
- * - U_CONTROL_CHAR (Cc)
- * - U_FORMAT_CHAR (Cf)
- * - U_LINE_SEPARATOR (Zl)
- * - U_PARAGRAPH_SEPARATOR (Zp)
- *
- * This is a C/POSIX migration function.
- * See the comments about C/POSIX character classification functions in the
- * documentation at the top of this header file.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is a control character
- *
- * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
- * @see u_isprint
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_iscntrl(UChar32 c);
-
-/**
- * Determines whether the specified code point is an ISO control code.
- * True for U+0000..U+001f and U+007f..U+009f (general category "Cc").
- *
- * Same as java.lang.Character.isISOControl().
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is an ISO control code
- *
- * @see u_iscntrl
- * @stable ICU 2.6
- */
-U_STABLE UBool U_EXPORT2
-u_isISOControl(UChar32 c);
-
-/**
- * Determines whether the specified code point is a printable character.
- * True for general categories <em>other</em> than "C" (controls).
- *
- * This is a C/POSIX migration function.
- * See the comments about C/POSIX character classification functions in the
- * documentation at the top of this header file.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is a printable character
- *
- * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
- * @see u_iscntrl
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isprint(UChar32 c);
-
-/**
- * Determines whether the specified code point is a base character.
- * True for general categories "L" (letters), "N" (numbers),
- * "Mc" (spacing combining marks), and "Me" (enclosing marks).
- *
- * Note that this is different from the Unicode definition in
- * chapter 3.5, conformance clause D13,
- * which defines base characters to be all characters (not Cn)
- * that do not graphically combine with preceding characters (M)
- * and that are neither control (Cc) or format (Cf) characters.
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is a base character according to this function
- *
- * @see u_isalpha
- * @see u_isdigit
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isbase(UChar32 c);
-
-/**
- * Returns the bidirectional category value for the code point,
- * which is used in the Unicode bidirectional algorithm
- * (UAX #9 http://www.unicode.org/reports/tr9/).
- * Note that some <em>unassigned</em> code points have bidi values
- * of R or AL because they are in blocks that are reserved
- * for Right-To-Left scripts.
- *
- * Same as java.lang.Character.getDirectionality()
- *
- * @param c the code point to be tested
- * @return the bidirectional category (UCharDirection) value
- *
- * @see UCharDirection
- * @stable ICU 2.0
- */
-U_STABLE UCharDirection U_EXPORT2
-u_charDirection(UChar32 c);
-
-/**
- * Determines whether the code point has the Bidi_Mirrored property.
- * This property is set for characters that are commonly used in
- * Right-To-Left contexts and need to be displayed with a "mirrored"
- * glyph.
- *
- * Same as java.lang.Character.isMirrored().
- * Same as UCHAR_BIDI_MIRRORED
- *
- * @param c the code point to be tested
- * @return TRUE if the character has the Bidi_Mirrored property
- *
- * @see UCHAR_BIDI_MIRRORED
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isMirrored(UChar32 c);
-
-/**
- * Maps the specified character to a "mirror-image" character.
- * For characters with the Bidi_Mirrored property, implementations
- * sometimes need a "poor man's" mapping to another Unicode
- * character (code point) such that the default glyph may serve
- * as the mirror-image of the default glyph of the specified
- * character. This is useful for text conversion to and from
- * codepages with visual order, and for displays without glyph
- * selecetion capabilities.
- *
- * @param c the code point to be mapped
- * @return another Unicode code point that may serve as a mirror-image
- *         substitute, or c itself if there is no such mapping or c
- *         does not have the Bidi_Mirrored property
- *
- * @see UCHAR_BIDI_MIRRORED
- * @see u_isMirrored
- * @stable ICU 2.0
- */
-U_STABLE UChar32 U_EXPORT2
-u_charMirror(UChar32 c);
-
-/**
- * Returns the general category value for the code point.
- *
- * Same as java.lang.Character.getType().
- *
- * @param c the code point to be tested
- * @return the general category (UCharCategory) value
- *
- * @see UCharCategory
- * @stable ICU 2.0
- */
-U_STABLE int8_t U_EXPORT2
-u_charType(UChar32 c);
-
-/**
- * Get a single-bit bit set for the general category of a character.
- * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc.
- * Same as U_MASK(u_charType(c)).
- *
- * @param c the code point to be tested
- * @return a single-bit mask corresponding to the general category (UCharCategory) value
- *
- * @see u_charType
- * @see UCharCategory
- * @see U_GC_CN_MASK
- * @stable ICU 2.1
- */
-#define U_GET_GC_MASK(c) U_MASK(u_charType(c))
-
-/**
- * Callback from u_enumCharTypes(), is called for each contiguous range
- * of code points c (where start<=c<limit)
- * with the same Unicode general category ("character type").
- *
- * The callback function can stop the enumeration by returning FALSE.
- *
- * @param context an opaque pointer, as passed into utrie_enum()
- * @param start the first code point in a contiguous range with value
- * @param limit one past the last code point in a contiguous range with value
- * @param type the general category for all code points in [start..limit[
- * @return FALSE to stop the enumeration
- *
- * @stable ICU 2.1
- * @see UCharCategory
- * @see u_enumCharTypes
- */
-typedef UBool U_CALLCONV
-UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
-
-/**
- * Enumerate efficiently all code points with their Unicode general categories.
- *
- * This is useful for building data structures (e.g., UnicodeSet's),
- * for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
- *
- * For each contiguous range of code points with a given general category ("character type"),
- * the UCharEnumTypeRange function is called.
- * Adjacent ranges have different types.
- * The Unicode Standard guarantees that the numeric value of the type is 0..31.
- *
- * @param enumRange a pointer to a function that is called for each contiguous range
- *                  of code points with the same general category
- * @param context an opaque pointer that is passed on to the callback function
- *
- * @stable ICU 2.1
- * @see UCharCategory
- * @see UCharEnumTypeRange
- */
-U_STABLE void U_EXPORT2
-u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
-
-#if !UCONFIG_NO_NORMALIZATION
-
-/**
- * Returns the combining class of the code point as specified in UnicodeData.txt.
- *
- * @param c the code point of the character
- * @return the combining class of the character
- * @stable ICU 2.0
- */
-U_STABLE uint8_t U_EXPORT2
-u_getCombiningClass(UChar32 c);
-
-#endif
-
-/**
- * Returns the decimal digit value of a decimal digit character.
- * Such characters have the general category "Nd" (decimal digit numbers)
- * and a Numeric_Type of Decimal.
- *
- * Unlike ICU releases before 2.6, no digit values are returned for any
- * Han characters because Han number characters are often used with a special
- * Chinese-style number format (with characters for powers of 10 in between)
- * instead of in decimal-positional notation.
- * Unicode 4 explicitly assigns Han number characters the Numeric_Type
- * Numeric instead of Decimal.
- * See Jitterbug 1483 for more details.
- *
- * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue()
- * for complete numeric Unicode properties.
- *
- * @param c the code point for which to get the decimal digit value
- * @return the decimal digit value of c,
- *         or -1 if c is not a decimal digit character
- *
- * @see u_getNumericValue
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_charDigitValue(UChar32 c);
-
-/**
- * Returns the Unicode allocation block that contains the character.
- *
- * @param c the code point to be tested
- * @return the block value (UBlockCode) for c
- *
- * @see UBlockCode
- * @stable ICU 2.0
- */
-U_STABLE UBlockCode U_EXPORT2
-ublock_getCode(UChar32 c);
-
-/**
- * Retrieve the name of a Unicode character.
- * Depending on <code>nameChoice</code>, the character name written
- * into the buffer is the "modern" name or the name that was defined
- * in Unicode version 1.0.
- * The name contains only "invariant" characters
- * like A-Z, 0-9, space, and '-'.
- * Unicode 1.0 names are only retrieved if they are different from the modern
- * names and if the data file contains the data for them. gennames may or may
- * not be called with a command line option to include 1.0 names in unames.dat.
- *
- * @param code The character (code point) for which to get the name.
- *             It must be <code>0<=code<=0x10ffff</code>.
- * @param nameChoice Selector for which name to get.
- * @param buffer Destination address for copying the name.
- *               The name will always be zero-terminated.
- *               If there is no name, then the buffer will be set to the empty string.
- * @param bufferLength <code>==sizeof(buffer)</code>
- * @param pErrorCode Pointer to a UErrorCode variable;
- *        check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
- *        returns.
- * @return The length of the name, or 0 if there is no name for this character.
- *         If the bufferLength is less than or equal to the length, then the buffer
- *         contains the truncated name and the returned length indicates the full
- *         length of the name.
- *         The length does not include the zero-termination.
- *
- * @see UCharNameChoice
- * @see u_charFromName
- * @see u_enumCharNames
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_charName(UChar32 code, UCharNameChoice nameChoice,
-           char *buffer, int32_t bufferLength,
-           UErrorCode *pErrorCode);
-
-/**
- * Get the ISO 10646 comment for a character.
- * The ISO 10646 comment is an informative field in the Unicode Character
- * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
- *
- * @param c The character (code point) for which to get the ISO comment.
- *             It must be <code>0<=c<=0x10ffff</code>.
- * @param dest Destination address for copying the comment.
- *             The comment will be zero-terminated if possible.
- *             If there is no comment, then the buffer will be set to the empty string.
- * @param destCapacity <code>==sizeof(dest)</code>
- * @param pErrorCode Pointer to a UErrorCode variable;
- *        check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
- *        returns.
- * @return The length of the comment, or 0 if there is no comment for this character.
- *         If the destCapacity is less than or equal to the length, then the buffer
- *         contains the truncated name and the returned length indicates the full
- *         length of the name.
- *         The length does not include the zero-termination.
- *
- * @stable ICU 2.2
- */
-U_STABLE int32_t U_EXPORT2
-u_getISOComment(UChar32 c,
-                char *dest, int32_t destCapacity,
-                UErrorCode *pErrorCode);
-
-/**
- * Find a Unicode character by its name and return its code point value.
- * The name is matched exactly and completely.
- * If the name does not correspond to a code point, <i>pErrorCode</i>
- * is set to <code>U_INVALID_CHAR_FOUND</code>.
- * A Unicode 1.0 name is matched only if it differs from the modern name.
- * Unicode names are all uppercase. Extended names are lowercase followed
- * by an uppercase hexadecimal number, and within angle brackets.
- *
- * @param nameChoice Selector for which name to match.
- * @param name The name to match.
- * @param pErrorCode Pointer to a UErrorCode variable
- * @return The Unicode value of the code point with the given name,
- *         or an undefined value if there is no such code point.
- *
- * @see UCharNameChoice
- * @see u_charName
- * @see u_enumCharNames
- * @stable ICU 1.7
- */
-U_STABLE UChar32 U_EXPORT2
-u_charFromName(UCharNameChoice nameChoice,
-               const char *name,
-               UErrorCode *pErrorCode);
-
-/**
- * Type of a callback function for u_enumCharNames() that gets called
- * for each Unicode character with the code point value and
- * the character name.
- * If such a function returns FALSE, then the enumeration is stopped.
- *
- * @param context The context pointer that was passed to u_enumCharNames().
- * @param code The Unicode code point for the character with this name.
- * @param nameChoice Selector for which kind of names is enumerated.
- * @param name The character's name, zero-terminated.
- * @param length The length of the name.
- * @return TRUE if the enumeration should continue, FALSE to stop it.
- *
- * @see UCharNameChoice
- * @see u_enumCharNames
- * @stable ICU 1.7
- */
-typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
-                               UChar32 code,
-                               UCharNameChoice nameChoice,
-                               const char *name,
-                               int32_t length);
-
-/**
- * Enumerate all assigned Unicode characters between the start and limit
- * code points (start inclusive, limit exclusive) and call a function
- * for each, passing the code point value and the character name.
- * For Unicode 1.0 names, only those are enumerated that differ from the
- * modern names.
- *
- * @param start The first code point in the enumeration range.
- * @param limit One more than the last code point in the enumeration range
- *              (the first one after the range).
- * @param fn The function that is to be called for each character name.
- * @param context An arbitrary pointer that is passed to the function.
- * @param nameChoice Selector for which kind of names to enumerate.
- * @param pErrorCode Pointer to a UErrorCode variable
- *
- * @see UCharNameChoice
- * @see UEnumCharNamesFn
- * @see u_charName
- * @see u_charFromName
- * @stable ICU 1.7
- */
-U_STABLE void U_EXPORT2
-u_enumCharNames(UChar32 start, UChar32 limit,
-                UEnumCharNamesFn *fn,
-                void *context,
-                UCharNameChoice nameChoice,
-                UErrorCode *pErrorCode);
-
-/**
- * Return the Unicode name for a given property, as given in the
- * Unicode database file PropertyAliases.txt.
- *
- * In addition, this function maps the property
- * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
- * "General_Category_Mask".  These names are not in
- * PropertyAliases.txt.
- *
- * @param property UProperty selector other than UCHAR_INVALID_CODE.
- *         If out of range, NULL is returned.
- *
- * @param nameChoice selector for which name to get.  If out of range,
- *         NULL is returned.  All properties have a long name.  Most
- *         have a short name, but some do not.  Unicode allows for
- *         additional names; if present these will be returned by
- *         U_LONG_PROPERTY_NAME + i, where i=1, 2,...
- *
- * @return a pointer to the name, or NULL if either the
- *         property or the nameChoice is out of range.  If a given
- *         nameChoice returns NULL, then all larger values of
- *         nameChoice will return NULL, with one exception: if NULL is
- *         returned for U_SHORT_PROPERTY_NAME, then
- *         U_LONG_PROPERTY_NAME (and higher) may still return a
- *         non-NULL value.  The returned pointer is valid until
- *         u_cleanup() is called.
- *
- * @see UProperty
- * @see UPropertyNameChoice
- * @stable ICU 2.4
- */
-U_STABLE const char* U_EXPORT2
-u_getPropertyName(UProperty property,
-                  UPropertyNameChoice nameChoice);
-
-/**
- * Return the UProperty enum for a given property name, as specified
- * in the Unicode database file PropertyAliases.txt.  Short, long, and
- * any other variants are recognized.
- *
- * In addition, this function maps the synthetic names "gcm" /
- * "General_Category_Mask" to the property
- * UCHAR_GENERAL_CATEGORY_MASK.  These names are not in
- * PropertyAliases.txt.
- *
- * @param alias the property name to be matched.  The name is compared
- *         using "loose matching" as described in PropertyAliases.txt.
- *
- * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
- *         does not match any property.
- *
- * @see UProperty
- * @stable ICU 2.4
- */
-U_STABLE UProperty U_EXPORT2
-u_getPropertyEnum(const char* alias);
-
-/**
- * Return the Unicode name for a given property value, as given in the
- * Unicode database file PropertyValueAliases.txt.
- *
- * Note: Some of the names in PropertyValueAliases.txt can only be
- * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
- * UCHAR_GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
- * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
- * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
- *
- * @param property UProperty selector constant.
- *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
- *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
- *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
- *        If out of range, NULL is returned.
- *
- * @param value selector for a value for the given property.  If out
- *         of range, NULL is returned.  In general, valid values range
- *         from 0 up to some maximum.  There are a few exceptions:
- *         (1.) UCHAR_BLOCK values begin at the non-zero value
- *         UBLOCK_BASIC_LATIN.  (2.)  UCHAR_CANONICAL_COMBINING_CLASS
- *         values are not contiguous and range from 0..240.  (3.)
- *         UCHAR_GENERAL_CATEGORY_MASK values are not values of
- *         UCharCategory, but rather mask values produced by
- *         U_GET_GC_MASK().  This allows grouped categories such as
- *         [:L:] to be represented.  Mask values range
- *         non-contiguously from 1..U_GC_P_MASK.
- *
- * @param nameChoice selector for which name to get.  If out of range,
- *         NULL is returned.  All values have a long name.  Most have
- *         a short name, but some do not.  Unicode allows for
- *         additional names; if present these will be returned by
- *         U_LONG_PROPERTY_NAME + i, where i=1, 2,...
-
- * @return a pointer to the name, or NULL if either the
- *         property or the nameChoice is out of range.  If a given
- *         nameChoice returns NULL, then all larger values of
- *         nameChoice will return NULL, with one exception: if NULL is
- *         returned for U_SHORT_PROPERTY_NAME, then
- *         U_LONG_PROPERTY_NAME (and higher) may still return a
- *         non-NULL value.  The returned pointer is valid until
- *         u_cleanup() is called.
- *
- * @see UProperty
- * @see UPropertyNameChoice
- * @stable ICU 2.4
- */
-U_STABLE const char* U_EXPORT2
-u_getPropertyValueName(UProperty property,
-                       int32_t value,
-                       UPropertyNameChoice nameChoice);
-
-/**
- * Return the property value integer for a given value name, as
- * specified in the Unicode database file PropertyValueAliases.txt.
- * Short, long, and any other variants are recognized.
- *
- * Note: Some of the names in PropertyValueAliases.txt will only be
- * recognized with UCHAR_GENERAL_CATEGORY_MASK, not
- * UCHAR_GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
- * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
- * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
- *
- * @param property UProperty selector constant.
- *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
- *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
- *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
- *        If out of range, UCHAR_INVALID_CODE is returned.
- *
- * @param alias the value name to be matched.  The name is compared
- *         using "loose matching" as described in
- *         PropertyValueAliases.txt.
- *
- * @return a value integer or UCHAR_INVALID_CODE if the given name
- *         does not match any value of the given property, or if the
- *         property is invalid.  Note: U CHAR_GENERAL_CATEGORY values
- *         are not values of UCharCategory, but rather mask values
- *         produced by U_GET_GC_MASK().  This allows grouped
- *         categories such as [:L:] to be represented.
- *
- * @see UProperty
- * @stable ICU 2.4
- */
-U_STABLE int32_t U_EXPORT2
-u_getPropertyValueEnum(UProperty property,
-                       const char* alias);
-
-/**
- * Determines if the specified character is permissible as the
- * first character in an identifier according to Unicode
- * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
- * True for characters with general categories "L" (letters) and "Nl" (letter numbers).
- *
- * Same as java.lang.Character.isUnicodeIdentifierStart().
- * Same as UCHAR_ID_START
- *
- * @param c the code point to be tested
- * @return TRUE if the code point may start an identifier
- *
- * @see UCHAR_ID_START
- * @see u_isalpha
- * @see u_isIDPart
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isIDStart(UChar32 c);
-
-/**
- * Determines if the specified character is permissible
- * in an identifier according to Java.
- * True for characters with general categories "L" (letters),
- * "Nl" (letter numbers), "Nd" (decimal digits),
- * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and
- * u_isIDIgnorable(c).
- *
- * Same as java.lang.Character.isUnicodeIdentifierPart().
- * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE)
- * except that Unicode recommends to ignore Cf which is less than
- * u_isIDIgnorable(c).
- *
- * @param c the code point to be tested
- * @return TRUE if the code point may occur in an identifier according to Java
- *
- * @see UCHAR_ID_CONTINUE
- * @see u_isIDStart
- * @see u_isIDIgnorable
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isIDPart(UChar32 c);
-
-/**
- * Determines if the specified character should be regarded
- * as an ignorable character in an identifier,
- * according to Java.
- * True for characters with general category "Cf" (format controls) as well as
- * non-whitespace ISO controls
- * (U+0000..U+0008, U+000E..U+001B, U+007F..U+0084, U+0086..U+009F).
- *
- * Same as java.lang.Character.isIdentifierIgnorable()
- * except that Java also returns TRUE for U+0085 Next Line
- * (it omits U+0085 from whitespace ISO controls).
- *
- * Note that Unicode just recommends to ignore Cf (format controls).
- *
- * @param c the code point to be tested
- * @return TRUE if the code point is ignorable in identifiers according to Java
- *
- * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
- * @see u_isIDStart
- * @see u_isIDPart
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isIDIgnorable(UChar32 c);
-
-/**
- * Determines if the specified character is permissible as the
- * first character in a Java identifier.
- * In addition to u_isIDStart(c), true for characters with
- * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).
- *
- * Same as java.lang.Character.isJavaIdentifierStart().
- *
- * @param c the code point to be tested
- * @return TRUE if the code point may start a Java identifier
- *
- * @see     u_isJavaIDPart
- * @see     u_isalpha
- * @see     u_isIDStart
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isJavaIDStart(UChar32 c);
-
-/**
- * Determines if the specified character is permissible
- * in a Java identifier.
- * In addition to u_isIDPart(c), true for characters with
- * general category "Sc" (currency symbols).
- *
- * Same as java.lang.Character.isJavaIdentifierPart().
- *
- * @param c the code point to be tested
- * @return TRUE if the code point may occur in a Java identifier
- *
- * @see     u_isIDIgnorable
- * @see     u_isJavaIDStart
- * @see     u_isalpha
- * @see     u_isdigit
- * @see     u_isIDPart
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-u_isJavaIDPart(UChar32 c);
-
-/**
- * The given character is mapped to its lowercase equivalent according to
- * UnicodeData.txt; if the character has no lowercase equivalent, the character
- * itself is returned.
- *
- * Same as java.lang.Character.toLowerCase().
- *
- * This function only returns the simple, single-code point case mapping.
- * Full case mappings should be used whenever possible because they produce
- * better results by working on whole strings.
- * They take into account the string context and the language and can map
- * to a result string with a different length as appropriate.
- * Full case mappings are applied by the string case mapping functions,
- * see ustring.h and the UnicodeString class.
- * See also the User Guide chapter on C/POSIX migration:
- * http://icu-project.org/userguide/posix.html#case_mappings
- *
- * @param c the code point to be mapped
- * @return the Simple_Lowercase_Mapping of the code point, if any;
- *         otherwise the code point itself.
- * @stable ICU 2.0
- */
-U_STABLE UChar32 U_EXPORT2
-u_tolower(UChar32 c);
-
-/**
- * The given character is mapped to its uppercase equivalent according to UnicodeData.txt;
- * if the character has no uppercase equivalent, the character itself is
- * returned.
- *
- * Same as java.lang.Character.toUpperCase().
- *
- * This function only returns the simple, single-code point case mapping.
- * Full case mappings should be used whenever possible because they produce
- * better results by working on whole strings.
- * They take into account the string context and the language and can map
- * to a result string with a different length as appropriate.
- * Full case mappings are applied by the string case mapping functions,
- * see ustring.h and the UnicodeString class.
- * See also the User Guide chapter on C/POSIX migration:
- * http://icu-project.org/userguide/posix.html#case_mappings
- *
- * @param c the code point to be mapped
- * @return the Simple_Uppercase_Mapping of the code point, if any;
- *         otherwise the code point itself.
- * @stable ICU 2.0
- */
-U_STABLE UChar32 U_EXPORT2
-u_toupper(UChar32 c);
-
-/**
- * The given character is mapped to its titlecase equivalent
- * according to UnicodeData.txt;
- * if none is defined, the character itself is returned.
- *
- * Same as java.lang.Character.toTitleCase().
- *
- * This function only returns the simple, single-code point case mapping.
- * Full case mappings should be used whenever possible because they produce
- * better results by working on whole strings.
- * They take into account the string context and the language and can map
- * to a result string with a different length as appropriate.
- * Full case mappings are applied by the string case mapping functions,
- * see ustring.h and the UnicodeString class.
- * See also the User Guide chapter on C/POSIX migration:
- * http://icu-project.org/userguide/posix.html#case_mappings
- *
- * @param c the code point to be mapped
- * @return the Simple_Titlecase_Mapping of the code point, if any;
- *         otherwise the code point itself.
- * @stable ICU 2.0
- */
-U_STABLE UChar32 U_EXPORT2
-u_totitle(UChar32 c);
-
-/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
-#define U_FOLD_CASE_DEFAULT 0
-
-/**
- * Option value for case folding:
- *
- * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
- * and dotless i appropriately for Turkic languages (tr, az).
- *
- * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
- * are to be included for default mappings and
- * excluded for the Turkic-specific mappings.
- *
- * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
- * are to be excluded for default mappings and
- * included for the Turkic-specific mappings.
- *
- * @stable ICU 2.0
- */
-#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
-
-/**
- * The given character is mapped to its case folding equivalent according to
- * UnicodeData.txt and CaseFolding.txt;
- * if the character has no case folding equivalent, the character
- * itself is returned.
- *
- * This function only returns the simple, single-code point case mapping.
- * Full case mappings should be used whenever possible because they produce
- * better results by working on whole strings.
- * They take into account the string context and the language and can map
- * to a result string with a different length as appropriate.
- * Full case mappings are applied by the string case mapping functions,
- * see ustring.h and the UnicodeString class.
- * See also the User Guide chapter on C/POSIX migration:
- * http://icu-project.org/userguide/posix.html#case_mappings
- *
- * @param c the code point to be mapped
- * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
- * @return the Simple_Case_Folding of the code point, if any;
- *         otherwise the code point itself.
- * @stable ICU 2.0
- */
-U_STABLE UChar32 U_EXPORT2
-u_foldCase(UChar32 c, uint32_t options);
-
-/**
- * Returns the decimal digit value of the code point in the
- * specified radix.
- *
- * If the radix is not in the range <code>2<=radix<=36</code> or if the
- * value of <code>c</code> is not a valid digit in the specified
- * radix, <code>-1</code> is returned. A character is a valid digit
- * if at least one of the following is true:
- * <ul>
- * <li>The character has a decimal digit value.
- *     Such characters have the general category "Nd" (decimal digit numbers)
- *     and a Numeric_Type of Decimal.
- *     In this case the value is the character's decimal digit value.</li>
- * <li>The character is one of the uppercase Latin letters
- *     <code>'A'</code> through <code>'Z'</code>.
- *     In this case the value is <code>c-'A'+10</code>.</li>
- * <li>The character is one of the lowercase Latin letters
- *     <code>'a'</code> through <code>'z'</code>.
- *     In this case the value is <code>ch-'a'+10</code>.</li>
- * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A)
- *     as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A)
- *     are recognized.</li>
- * </ul>
- *
- * Same as java.lang.Character.digit().
- *
- * @param   ch      the code point to be tested.
- * @param   radix   the radix.
- * @return  the numeric value represented by the character in the
- *          specified radix,
- *          or -1 if there is no value or if the value exceeds the radix.
- *
- * @see     UCHAR_NUMERIC_TYPE
- * @see     u_forDigit
- * @see     u_charDigitValue
- * @see     u_isdigit
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_digit(UChar32 ch, int8_t radix);
-
-/**
- * Determines the character representation for a specific digit in
- * the specified radix. If the value of <code>radix</code> is not a
- * valid radix, or the value of <code>digit</code> is not a valid
- * digit in the specified radix, the null character
- * (<code>U+0000</code>) is returned.
- * <p>
- * The <code>radix</code> argument is valid if it is greater than or
- * equal to 2 and less than or equal to 36.
- * The <code>digit</code> argument is valid if
- * <code>0 <= digit < radix</code>.
- * <p>
- * If the digit is less than 10, then
- * <code>'0' + digit</code> is returned. Otherwise, the value
- * <code>'a' + digit - 10</code> is returned.
- *
- * Same as java.lang.Character.forDigit().
- *
- * @param   digit   the number to convert to a character.
- * @param   radix   the radix.
- * @return  the <code>char</code> representation of the specified digit
- *          in the specified radix.
- *
- * @see     u_digit
- * @see     u_charDigitValue
- * @see     u_isdigit
- * @stable ICU 2.0
- */
-U_STABLE UChar32 U_EXPORT2
-u_forDigit(int32_t digit, int8_t radix);
-
-/**
- * Get the "age" of the code point.
- * The "age" is the Unicode version when the code point was first
- * designated (as a non-character or for Private Use)
- * or assigned a character.
- * This can be useful to avoid emitting code points to receiving
- * processes that do not accept newer characters.
- * The data is from the UCD file DerivedAge.txt.
- *
- * @param c The code point.
- * @param versionArray The Unicode version number array, to be filled in.
- *
- * @stable ICU 2.1
- */
-U_STABLE void U_EXPORT2
-u_charAge(UChar32 c, UVersionInfo versionArray);
-
-/**
- * Gets the Unicode version information.
- * The version array is filled in with the version information
- * for the Unicode standard that is currently used by ICU.
- * For example, Unicode version 3.1.1 is represented as an array with
- * the values { 3, 1, 1, 0 }.
- *
- * @param versionArray an output array that will be filled in with
- *                     the Unicode version number
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-u_getUnicodeVersion(UVersionInfo versionArray);
-
-/**
- * Get the FC_NFKC_Closure property string for a character.
- * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
- * or for "FNC": http://www.unicode.org/reports/tr15/
- *
- * @param c The character (code point) for which to get the FC_NFKC_Closure string.
- *             It must be <code>0<=c<=0x10ffff</code>.
- * @param dest Destination address for copying the string.
- *             The string will be zero-terminated if possible.
- *             If there is no FC_NFKC_Closure string,
- *             then the buffer will be set to the empty string.
- * @param destCapacity <code>==sizeof(dest)</code>
- * @param pErrorCode Pointer to a UErrorCode variable.
- * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
- *         If the destCapacity is less than or equal to the length, then the buffer
- *         contains the truncated name and the returned length indicates the full
- *         length of the name.
- *         The length does not include the zero-termination.
- *
- * @stable ICU 2.2
- */
-U_STABLE int32_t U_EXPORT2
-u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
-
-U_CDECL_END
-
-#endif /*_UCHAR*/
-/*eof*/

Copied: MacRuby/trunk/icu-1060/unicode/uchar.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uchar.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uchar.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uchar.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,3062 @@
+/*
+**********************************************************************
+*   Copyright (C) 1997-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+* File UCHAR.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   04/02/97    aliu        Creation.
+*   03/29/99    helena      Updated for C APIs.
+*   4/15/99     Madhu       Updated for C Implementation and Javadoc
+*   5/20/99     Madhu       Added the function u_getVersion()
+*   8/19/1999   srl         Upgraded scripts to Unicode 3.0
+*   8/27/1999   schererm    UCharDirection constants: U_...
+*   11/11/1999  weiv        added u_isalnum(), cleaned comments
+*   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion().
+******************************************************************************
+*/
+
+#ifndef UCHAR_H
+#define UCHAR_H
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/*==========================================================================*/
+/* Unicode version number                                                   */
+/*==========================================================================*/
+/**
+ * Unicode version number, default for the current ICU version.
+ * The actual Unicode Character Database (UCD) data is stored in uprops.dat
+ * and may be generated from UCD files from a different Unicode version.
+ * Call u_getUnicodeVersion to get the actual Unicode version of the data.
+ *
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.0
+ */
+#define U_UNICODE_VERSION "5.1"
+
+/**
+ * \file
+ * \brief C API: Unicode Properties
+ *
+ * This C API provides low-level access to the Unicode Character Database.
+ * In addition to raw property values, some convenience functions calculate
+ * derived properties, for example for Java-style programming.
+ *
+ * Unicode assigns each code point (not just assigned character) values for
+ * many properties.
+ * Most of them are simple boolean flags, or constants from a small enumerated list.
+ * For some properties, values are strings or other relatively more complex types.
+ *
+ * For more information see
+ * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
+ * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).
+ *
+ * Many functions are designed to match java.lang.Character functions.
+ * See the individual function documentation,
+ * and see the JDK 1.4 java.lang.Character documentation
+ * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html
+ *
+ * There are also functions that provide easy migration from C/POSIX functions
+ * like isblank(). Their use is generally discouraged because the C/POSIX
+ * standards do not define their semantics beyond the ASCII range, which means
+ * that different implementations exhibit very different behavior.
+ * Instead, Unicode properties should be used directly.
+ *
+ * There are also only a few, broad C/POSIX character classes, and they tend
+ * to be used for conflicting purposes. For example, the "isalpha()" class
+ * is sometimes used to determine word boundaries, while a more sophisticated
+ * approach would at least distinguish initial letters from continuation
+ * characters (the latter including combining marks).
+ * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
+ * Another example: There is no "istitle()" class for titlecase characters.
+ *
+ * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
+ * ICU implements them according to the Standard Recommendations in
+ * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
+ * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
+ *
+ * API access for C/POSIX character classes is as follows:
+ * - alpha:     u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
+ * - lower:     u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
+ * - upper:     u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
+ * - punct:     u_ispunct(c)
+ * - digit:     u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER
+ * - xdigit:    u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
+ * - alnum:     u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
+ * - space:     u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
+ * - blank:     u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
+ * - cntrl:     u_charType(c)==U_CONTROL_CHAR
+ * - graph:     u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
+ * - print:     u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
+ *
+ * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
+ * the Standard Recommendations in UTS #18. Instead, they match Java
+ * functions according to their API documentation.
+ *
+ * \htmlonly
+ * The C/POSIX character classes are also available in UnicodeSet patterns,
+ * using patterns like [:graph:] or \p{graph}.
+ * \endhtmlonly
+ *
+ * Note: There are several ICU whitespace functions.
+ * Comparison:
+ * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
+ *       most of general categories "Z" (separators) + most whitespace ISO controls
+ *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
+ * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
+ * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
+ * - u_isspace: Z + whitespace ISO controls (including no-break spaces)
+ * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP
+ */
+
+/**
+ * Constants.
+ */
+
+/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
+#define UCHAR_MIN_VALUE 0
+
+/**
+ * The highest Unicode code point value (scalar value) according to
+ * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
+ * For a single character, UChar32 is a simple type that can hold any code point value.
+ *
+ * @see UChar32
+ * @stable ICU 2.0
+ */
+#define UCHAR_MAX_VALUE 0x10ffff
+
+/**
+ * Get a single-bit bit set (a flag) from a bit number 0..31.
+ * @stable ICU 2.1
+ */
+#define U_MASK(x) ((uint32_t)1<<(x))
+
+/*
+ * !! Note: Several comments in this file are machine-read by the
+ * genpname tool.  These comments describe the correspondence between
+ * icu enum constants and UCD entities.  Do not delete them.  Update
+ * these comments as needed.
+ *
+ * Any comment of the form "/ *[name]* /" (spaces added) is such
+ * a comment.
+ *
+ * The U_JG_* and U_GC_*_MASK constants are matched by their symbolic
+ * name, which must match PropertyValueAliases.txt.
+ */
+
+/**
+ * Selection constants for Unicode properties.
+ * These constants are used in functions like u_hasBinaryProperty to select
+ * one of the Unicode properties.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ucd/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ * Check u_getUnicodeVersion to be sure.
+ *
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+typedef enum UProperty {
+    /*  See note !!.  Comments of the form "Binary property Dash",
+        "Enumerated property Script", "Double property Numeric_Value",
+        and "String property Age" are read by genpname. */
+
+    /*  Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
+    debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
+    rather than UCHAR_BINARY_START.  Likewise for other *_START
+    identifiers. */
+
+    /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.
+        Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */
+    UCHAR_ALPHABETIC=0,
+    /** First constant for binary Unicode properties. @stable ICU 2.1 */
+    UCHAR_BINARY_START=UCHAR_ALPHABETIC,
+    /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */
+    UCHAR_ASCII_HEX_DIGIT=1,
+    /** Binary property Bidi_Control.
+        Format controls which have specific functions
+        in the Bidi Algorithm. @stable ICU 2.1 */
+    UCHAR_BIDI_CONTROL=2,
+    /** Binary property Bidi_Mirrored.
+        Characters that may change display in RTL text.
+        Same as u_isMirrored.
+        See Bidi Algorithm, UTR 9. @stable ICU 2.1 */
+    UCHAR_BIDI_MIRRORED=3,
+    /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */
+    UCHAR_DASH=4,
+    /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
+        Ignorable in most processing.
+        <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */
+    UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5,
+    /** Binary property Deprecated (new in Unicode 3.2).
+        The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */
+    UCHAR_DEPRECATED=6,
+    /** Binary property Diacritic. Characters that linguistically modify
+        the meaning of another character to which they apply. @stable ICU 2.1 */
+    UCHAR_DIACRITIC=7,
+    /** Binary property Extender.
+        Extend the value or shape of a preceding alphabetic character,
+        e.g., length and iteration marks. @stable ICU 2.1 */
+    UCHAR_EXTENDER=8,
+    /** Binary property Full_Composition_Exclusion.
+        CompositionExclusions.txt+Singleton Decompositions+
+        Non-Starter Decompositions. @stable ICU 2.1 */
+    UCHAR_FULL_COMPOSITION_EXCLUSION=9,
+    /** Binary property Grapheme_Base (new in Unicode 3.2).
+        For programmatic determination of grapheme cluster boundaries.
+        [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */
+    UCHAR_GRAPHEME_BASE=10,
+    /** Binary property Grapheme_Extend (new in Unicode 3.2).
+        For programmatic determination of grapheme cluster boundaries.
+        Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */
+    UCHAR_GRAPHEME_EXTEND=11,
+    /** Binary property Grapheme_Link (new in Unicode 3.2).
+        For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */
+    UCHAR_GRAPHEME_LINK=12,
+    /** Binary property Hex_Digit.
+        Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */
+    UCHAR_HEX_DIGIT=13,
+    /** Binary property Hyphen. Dashes used to mark connections
+        between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */
+    UCHAR_HYPHEN=14,
+    /** Binary property ID_Continue.
+        Characters that can continue an identifier.
+        DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."
+        ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */
+    UCHAR_ID_CONTINUE=15,
+    /** Binary property ID_Start.
+        Characters that can start an identifier.
+        Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */
+    UCHAR_ID_START=16,
+    /** Binary property Ideographic.
+        CJKV ideographs. @stable ICU 2.1 */
+    UCHAR_IDEOGRAPHIC=17,
+    /** Binary property IDS_Binary_Operator (new in Unicode 3.2).
+        For programmatic determination of
+        Ideographic Description Sequences. @stable ICU 2.1 */
+    UCHAR_IDS_BINARY_OPERATOR=18,
+    /** Binary property IDS_Trinary_Operator (new in Unicode 3.2).
+        For programmatic determination of
+        Ideographic Description Sequences. @stable ICU 2.1 */
+    UCHAR_IDS_TRINARY_OPERATOR=19,
+    /** Binary property Join_Control.
+        Format controls for cursive joining and ligation. @stable ICU 2.1 */
+    UCHAR_JOIN_CONTROL=20,
+    /** Binary property Logical_Order_Exception (new in Unicode 3.2).
+        Characters that do not use logical order and
+        require special handling in most processing. @stable ICU 2.1 */
+    UCHAR_LOGICAL_ORDER_EXCEPTION=21,
+    /** Binary property Lowercase. Same as u_isULowercase, different from u_islower.
+        Ll+Other_Lowercase @stable ICU 2.1 */
+    UCHAR_LOWERCASE=22,
+    /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */
+    UCHAR_MATH=23,
+    /** Binary property Noncharacter_Code_Point.
+        Code points that are explicitly defined as illegal
+        for the encoding of characters. @stable ICU 2.1 */
+    UCHAR_NONCHARACTER_CODE_POINT=24,
+    /** Binary property Quotation_Mark. @stable ICU 2.1 */
+    UCHAR_QUOTATION_MARK=25,
+    /** Binary property Radical (new in Unicode 3.2).
+        For programmatic determination of
+        Ideographic Description Sequences. @stable ICU 2.1 */
+    UCHAR_RADICAL=26,
+    /** Binary property Soft_Dotted (new in Unicode 3.2).
+        Characters with a "soft dot", like i or j.
+        An accent placed on these characters causes
+        the dot to disappear. @stable ICU 2.1 */
+    UCHAR_SOFT_DOTTED=27,
+    /** Binary property Terminal_Punctuation.
+        Punctuation characters that generally mark
+        the end of textual units. @stable ICU 2.1 */
+    UCHAR_TERMINAL_PUNCTUATION=28,
+    /** Binary property Unified_Ideograph (new in Unicode 3.2).
+        For programmatic determination of
+        Ideographic Description Sequences. @stable ICU 2.1 */
+    UCHAR_UNIFIED_IDEOGRAPH=29,
+    /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.
+        Lu+Other_Uppercase @stable ICU 2.1 */
+    UCHAR_UPPERCASE=30,
+    /** Binary property White_Space.
+        Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.
+        Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */
+    UCHAR_WHITE_SPACE=31,
+    /** Binary property XID_Continue.
+        ID_Continue modified to allow closure under
+        normalization forms NFKC and NFKD. @stable ICU 2.1 */
+    UCHAR_XID_CONTINUE=32,
+    /** Binary property XID_Start. ID_Start modified to allow
+        closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */
+    UCHAR_XID_START=33,
+    /** Binary property Case_Sensitive. Either the source of a case
+        mapping or _in_ the target of a case mapping. Not the same as
+        the general category Cased_Letter. @stable ICU 2.6 */
+   UCHAR_CASE_SENSITIVE=34,
+    /** Binary property STerm (new in Unicode 4.0.1).
+        Sentence Terminal. Used in UAX #29: Text Boundaries
+        (http://www.unicode.org/reports/tr29/)
+        @stable ICU 3.0 */
+    UCHAR_S_TERM=35,
+    /** Binary property Variation_Selector (new in Unicode 4.0.1).
+        Indicates all those characters that qualify as Variation Selectors.
+        For details on the behavior of these characters,
+        see StandardizedVariants.html and 15.6 Variation Selectors.
+        @stable ICU 3.0 */
+    UCHAR_VARIATION_SELECTOR=36,
+    /** Binary property NFD_Inert.
+        ICU-specific property for characters that are inert under NFD,
+        i.e., they do not interact with adjacent characters.
+        Used for example in normalizing transforms in incremental mode
+        to find the boundary of safely normalizable text despite possible
+        text additions.
+
+        There is one such property per normalization form.
+        These properties are computed as follows - an inert character is:
+        a) unassigned, or ALL of the following:
+        b) of combining class 0.
+        c) not decomposed by this normalization form.
+        AND if NFC or NFKC,
+        d) can never compose with a previous character.
+        e) can never compose with a following character.
+        f) can never change if another character is added.
+           Example: a-breve might satisfy all but f, but if you
+           add an ogonek it changes to a-ogonek + breve
+
+        See also com.ibm.text.UCD.NFSkippable in the ICU4J repository,
+        and icu/source/common/unormimp.h .
+        @stable ICU 3.0 */
+    UCHAR_NFD_INERT=37,
+    /** Binary property NFKD_Inert.
+        ICU-specific property for characters that are inert under NFKD,
+        i.e., they do not interact with adjacent characters.
+        Used for example in normalizing transforms in incremental mode
+        to find the boundary of safely normalizable text despite possible
+        text additions.
+        @see UCHAR_NFD_INERT
+        @stable ICU 3.0 */
+    UCHAR_NFKD_INERT=38,
+    /** Binary property NFC_Inert.
+        ICU-specific property for characters that are inert under NFC,
+        i.e., they do not interact with adjacent characters.
+        Used for example in normalizing transforms in incremental mode
+        to find the boundary of safely normalizable text despite possible
+        text additions.
+        @see UCHAR_NFD_INERT
+        @stable ICU 3.0 */
+    UCHAR_NFC_INERT=39,
+    /** Binary property NFKC_Inert.
+        ICU-specific property for characters that are inert under NFKC,
+        i.e., they do not interact with adjacent characters.
+        Used for example in normalizing transforms in incremental mode
+        to find the boundary of safely normalizable text despite possible
+        text additions.
+        @see UCHAR_NFD_INERT
+        @stable ICU 3.0 */
+    UCHAR_NFKC_INERT=40,
+    /** Binary Property Segment_Starter.
+        ICU-specific property for characters that are starters in terms of
+        Unicode normalization and combining character sequences.
+        They have ccc=0 and do not occur in non-initial position of the
+        canonical decomposition of any character
+        (like " in NFD(a-umlaut) and a Jamo T in an NFD(Hangul LVT)).
+        ICU uses this property for segmenting a string for generating a set of
+        canonically equivalent strings, e.g. for canonical closure while
+        processing collation tailoring rules.
+        @stable ICU 3.0 */
+    UCHAR_SEGMENT_STARTER=41,
+    /** Binary property Pattern_Syntax (new in Unicode 4.1).
+        See UAX #31 Identifier and Pattern Syntax
+        (http://www.unicode.org/reports/tr31/)
+        @stable ICU 3.4 */
+    UCHAR_PATTERN_SYNTAX=42,
+    /** Binary property Pattern_White_Space (new in Unicode 4.1).
+        See UAX #31 Identifier and Pattern Syntax
+        (http://www.unicode.org/reports/tr31/)
+        @stable ICU 3.4 */
+    UCHAR_PATTERN_WHITE_SPACE=43,
+    /** Binary property alnum (a C/POSIX character class).
+        Implemented according to the UTS #18 Annex C Standard Recommendation.
+        See the uchar.h file documentation.
+        @stable ICU 3.4 */
+    UCHAR_POSIX_ALNUM=44,
+    /** Binary property blank (a C/POSIX character class).
+        Implemented according to the UTS #18 Annex C Standard Recommendation.
+        See the uchar.h file documentation.
+        @stable ICU 3.4 */
+    UCHAR_POSIX_BLANK=45,
+    /** Binary property graph (a C/POSIX character class).
+        Implemented according to the UTS #18 Annex C Standard Recommendation.
+        See the uchar.h file documentation.
+        @stable ICU 3.4 */
+    UCHAR_POSIX_GRAPH=46,
+    /** Binary property print (a C/POSIX character class).
+        Implemented according to the UTS #18 Annex C Standard Recommendation.
+        See the uchar.h file documentation.
+        @stable ICU 3.4 */
+    UCHAR_POSIX_PRINT=47,
+    /** Binary property xdigit (a C/POSIX character class).
+        Implemented according to the UTS #18 Annex C Standard Recommendation.
+        See the uchar.h file documentation.
+        @stable ICU 3.4 */
+    UCHAR_POSIX_XDIGIT=48,
+    /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
+    UCHAR_BINARY_LIMIT=49,
+
+    /** Enumerated property Bidi_Class.
+        Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
+    UCHAR_BIDI_CLASS=0x1000,
+    /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+    UCHAR_INT_START=UCHAR_BIDI_CLASS,
+    /** Enumerated property Block.
+        Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
+    UCHAR_BLOCK=0x1001,
+    /** Enumerated property Canonical_Combining_Class.
+        Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
+    UCHAR_CANONICAL_COMBINING_CLASS=0x1002,
+    /** Enumerated property Decomposition_Type.
+        Returns UDecompositionType values. @stable ICU 2.2 */
+    UCHAR_DECOMPOSITION_TYPE=0x1003,
+    /** Enumerated property East_Asian_Width.
+        See http://www.unicode.org/reports/tr11/
+        Returns UEastAsianWidth values. @stable ICU 2.2 */
+    UCHAR_EAST_ASIAN_WIDTH=0x1004,
+    /** Enumerated property General_Category.
+        Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
+    UCHAR_GENERAL_CATEGORY=0x1005,
+    /** Enumerated property Joining_Group.
+        Returns UJoiningGroup values. @stable ICU 2.2 */
+    UCHAR_JOINING_GROUP=0x1006,
+    /** Enumerated property Joining_Type.
+        Returns UJoiningType values. @stable ICU 2.2 */
+    UCHAR_JOINING_TYPE=0x1007,
+    /** Enumerated property Line_Break.
+        Returns ULineBreak values. @stable ICU 2.2 */
+    UCHAR_LINE_BREAK=0x1008,
+    /** Enumerated property Numeric_Type.
+        Returns UNumericType values. @stable ICU 2.2 */
+    UCHAR_NUMERIC_TYPE=0x1009,
+    /** Enumerated property Script.
+        Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
+    UCHAR_SCRIPT=0x100A,
+    /** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
+        Returns UHangulSyllableType values. @stable ICU 2.6 */
+    UCHAR_HANGUL_SYLLABLE_TYPE=0x100B,
+    /** Enumerated property NFD_Quick_Check.
+        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+    UCHAR_NFD_QUICK_CHECK=0x100C,
+    /** Enumerated property NFKD_Quick_Check.
+        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+    UCHAR_NFKD_QUICK_CHECK=0x100D,
+    /** Enumerated property NFC_Quick_Check.
+        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+    UCHAR_NFC_QUICK_CHECK=0x100E,
+    /** Enumerated property NFKC_Quick_Check.
+        Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+    UCHAR_NFKC_QUICK_CHECK=0x100F,
+    /** Enumerated property Lead_Canonical_Combining_Class.
+        ICU-specific property for the ccc of the first code point
+        of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
+        Useful for checking for canonically ordered text;
+        see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+        Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+    UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010,
+    /** Enumerated property Trail_Canonical_Combining_Class.
+        ICU-specific property for the ccc of the last code point
+        of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
+        Useful for checking for canonically ordered text;
+        see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+        Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+    UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011,
+    /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
+        Used in UAX #29: Text Boundaries
+        (http://www.unicode.org/reports/tr29/)
+        Returns UGraphemeClusterBreak values. @stable ICU 3.4 */
+    UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012,
+    /** Enumerated property Sentence_Break (new in Unicode 4.1).
+        Used in UAX #29: Text Boundaries
+        (http://www.unicode.org/reports/tr29/)
+        Returns USentenceBreak values. @stable ICU 3.4 */
+    UCHAR_SENTENCE_BREAK=0x1013,
+    /** Enumerated property Word_Break (new in Unicode 4.1).
+        Used in UAX #29: Text Boundaries
+        (http://www.unicode.org/reports/tr29/)
+        Returns UWordBreakValues values. @stable ICU 3.4 */
+    UCHAR_WORD_BREAK=0x1014,
+    /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+    UCHAR_INT_LIMIT=0x1015,
+
+    /** Bitmask property General_Category_Mask.
+        This is the General_Category property returned as a bit mask.
+        When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),
+        returns bit masks for UCharCategory values where exactly one bit is set.
+        When used with u_getPropertyValueName() and u_getPropertyValueEnum(),
+        a multi-bit mask is used for sets of categories like "Letters".
+        Mask values should be cast to uint32_t.
+        @stable ICU 2.4 */
+    UCHAR_GENERAL_CATEGORY_MASK=0x2000,
+    /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */
+    UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
+    /** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */
+    UCHAR_MASK_LIMIT=0x2001,
+
+    /** Double property Numeric_Value.
+        Corresponds to u_getNumericValue. @stable ICU 2.4 */
+    UCHAR_NUMERIC_VALUE=0x3000,
+    /** First constant for double Unicode properties. @stable ICU 2.4 */
+    UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
+    /** One more than the last constant for double Unicode properties. @stable ICU 2.4 */
+    UCHAR_DOUBLE_LIMIT=0x3001,
+
+    /** String property Age.
+        Corresponds to u_charAge. @stable ICU 2.4 */
+    UCHAR_AGE=0x4000,
+    /** First constant for string Unicode properties. @stable ICU 2.4 */
+    UCHAR_STRING_START=UCHAR_AGE,
+    /** String property Bidi_Mirroring_Glyph.
+        Corresponds to u_charMirror. @stable ICU 2.4 */
+    UCHAR_BIDI_MIRRORING_GLYPH=0x4001,
+    /** String property Case_Folding.
+        Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
+    UCHAR_CASE_FOLDING=0x4002,
+    /** String property ISO_Comment.
+        Corresponds to u_getISOComment. @stable ICU 2.4 */
+    UCHAR_ISO_COMMENT=0x4003,
+    /** String property Lowercase_Mapping.
+        Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
+    UCHAR_LOWERCASE_MAPPING=0x4004,
+    /** String property Name.
+        Corresponds to u_charName. @stable ICU 2.4 */
+    UCHAR_NAME=0x4005,
+    /** String property Simple_Case_Folding.
+        Corresponds to u_foldCase. @stable ICU 2.4 */
+    UCHAR_SIMPLE_CASE_FOLDING=0x4006,
+    /** String property Simple_Lowercase_Mapping.
+        Corresponds to u_tolower. @stable ICU 2.4 */
+    UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007,
+    /** String property Simple_Titlecase_Mapping.
+        Corresponds to u_totitle. @stable ICU 2.4 */
+    UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008,
+    /** String property Simple_Uppercase_Mapping.
+        Corresponds to u_toupper. @stable ICU 2.4 */
+    UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009,
+    /** String property Titlecase_Mapping.
+        Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
+    UCHAR_TITLECASE_MAPPING=0x400A,
+    /** String property Unicode_1_Name.
+        Corresponds to u_charName. @stable ICU 2.4 */
+    UCHAR_UNICODE_1_NAME=0x400B,
+    /** String property Uppercase_Mapping.
+        Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
+    UCHAR_UPPERCASE_MAPPING=0x400C,
+    /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */
+    UCHAR_STRING_LIMIT=0x400D,
+
+    /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
+    UCHAR_INVALID_CODE = -1
+} UProperty;
+
+/**
+ * Data for enumerated Unicode general category types.
+ * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
+ * @stable ICU 2.0
+ */
+typedef enum UCharCategory
+{
+    /** See note !!.  Comments of the form "Cn" are read by genpname. */
+
+    /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
+    U_UNASSIGNED              = 0,
+    /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */
+    U_GENERAL_OTHER_TYPES     = 0,
+    /** Lu @stable ICU 2.0 */
+    U_UPPERCASE_LETTER        = 1,
+    /** Ll @stable ICU 2.0 */
+    U_LOWERCASE_LETTER        = 2,
+    /** Lt @stable ICU 2.0 */
+    U_TITLECASE_LETTER        = 3,
+    /** Lm @stable ICU 2.0 */
+    U_MODIFIER_LETTER         = 4,
+    /** Lo @stable ICU 2.0 */
+    U_OTHER_LETTER            = 5,
+    /** Mn @stable ICU 2.0 */
+    U_NON_SPACING_MARK        = 6,
+    /** Me @stable ICU 2.0 */
+    U_ENCLOSING_MARK          = 7,
+    /** Mc @stable ICU 2.0 */
+    U_COMBINING_SPACING_MARK  = 8,
+    /** Nd @stable ICU 2.0 */
+    U_DECIMAL_DIGIT_NUMBER    = 9,
+    /** Nl @stable ICU 2.0 */
+    U_LETTER_NUMBER           = 10,
+    /** No @stable ICU 2.0 */
+    U_OTHER_NUMBER            = 11,
+    /** Zs @stable ICU 2.0 */
+    U_SPACE_SEPARATOR         = 12,
+    /** Zl @stable ICU 2.0 */
+    U_LINE_SEPARATOR          = 13,
+    /** Zp @stable ICU 2.0 */
+    U_PARAGRAPH_SEPARATOR     = 14,
+    /** Cc @stable ICU 2.0 */
+    U_CONTROL_CHAR            = 15,
+    /** Cf @stable ICU 2.0 */
+    U_FORMAT_CHAR             = 16,
+    /** Co @stable ICU 2.0 */
+    U_PRIVATE_USE_CHAR        = 17,
+    /** Cs @stable ICU 2.0 */
+    U_SURROGATE               = 18,
+    /** Pd @stable ICU 2.0 */
+    U_DASH_PUNCTUATION        = 19,
+    /** Ps @stable ICU 2.0 */
+    U_START_PUNCTUATION       = 20,
+    /** Pe @stable ICU 2.0 */
+    U_END_PUNCTUATION         = 21,
+    /** Pc @stable ICU 2.0 */
+    U_CONNECTOR_PUNCTUATION   = 22,
+    /** Po @stable ICU 2.0 */
+    U_OTHER_PUNCTUATION       = 23,
+    /** Sm @stable ICU 2.0 */
+    U_MATH_SYMBOL             = 24,
+    /** Sc @stable ICU 2.0 */
+    U_CURRENCY_SYMBOL         = 25,
+    /** Sk @stable ICU 2.0 */
+    U_MODIFIER_SYMBOL         = 26,
+    /** So @stable ICU 2.0 */
+    U_OTHER_SYMBOL            = 27,
+    /** Pi @stable ICU 2.0 */
+    U_INITIAL_PUNCTUATION     = 28,
+    /** Pf @stable ICU 2.0 */
+    U_FINAL_PUNCTUATION       = 29,
+    /** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */
+    U_CHAR_CATEGORY_COUNT
+} UCharCategory;
+
+/**
+ * U_GC_XX_MASK constants are bit flags corresponding to Unicode
+ * general category values.
+ * For each category, the nth bit is set if the numeric value of the
+ * corresponding UCharCategory constant is n.
+ *
+ * There are also some U_GC_Y_MASK constants for groups of general categories
+ * like L for all letter categories.
+ *
+ * @see u_charType
+ * @see U_GET_GC_MASK
+ * @see UCharCategory
+ * @stable ICU 2.1
+ */
+#define U_GC_CN_MASK    U_MASK(U_GENERAL_OTHER_TYPES)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LU_MASK    U_MASK(U_UPPERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LL_MASK    U_MASK(U_LOWERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LT_MASK    U_MASK(U_TITLECASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LM_MASK    U_MASK(U_MODIFIER_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LO_MASK    U_MASK(U_OTHER_LETTER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MN_MASK    U_MASK(U_NON_SPACING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ME_MASK    U_MASK(U_ENCLOSING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MC_MASK    U_MASK(U_COMBINING_SPACING_MARK)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ND_MASK    U_MASK(U_DECIMAL_DIGIT_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NL_MASK    U_MASK(U_LETTER_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NO_MASK    U_MASK(U_OTHER_NUMBER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZS_MASK    U_MASK(U_SPACE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZL_MASK    U_MASK(U_LINE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZP_MASK    U_MASK(U_PARAGRAPH_SEPARATOR)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CC_MASK    U_MASK(U_CONTROL_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CF_MASK    U_MASK(U_FORMAT_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CO_MASK    U_MASK(U_PRIVATE_USE_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CS_MASK    U_MASK(U_SURROGATE)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PD_MASK    U_MASK(U_DASH_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PS_MASK    U_MASK(U_START_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PE_MASK    U_MASK(U_END_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PC_MASK    U_MASK(U_CONNECTOR_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PO_MASK    U_MASK(U_OTHER_PUNCTUATION)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SM_MASK    U_MASK(U_MATH_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SC_MASK    U_MASK(U_CURRENCY_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SK_MASK    U_MASK(U_MODIFIER_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SO_MASK    U_MASK(U_OTHER_SYMBOL)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PI_MASK    U_MASK(U_INITIAL_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PF_MASK    U_MASK(U_FINAL_PUNCTUATION)
+
+
+/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
+#define U_GC_L_MASK \
+            (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
+#define U_GC_LC_MASK \
+            (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
+
+/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
+#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
+
+/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
+#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
+#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
+
+/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */
+#define U_GC_C_MASK \
+            (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
+
+/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */
+#define U_GC_P_MASK \
+            (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
+             U_GC_PI_MASK|U_GC_PF_MASK)
+
+/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */
+#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
+
+/**
+ * This specifies the language directional property of a character set.
+ * @stable ICU 2.0
+ */
+typedef enum UCharDirection {
+    /** See note !!.  Comments of the form "EN" are read by genpname. */
+
+    /** L @stable ICU 2.0 */
+    U_LEFT_TO_RIGHT               = 0,
+    /** R @stable ICU 2.0 */
+    U_RIGHT_TO_LEFT               = 1,
+    /** EN @stable ICU 2.0 */
+    U_EUROPEAN_NUMBER             = 2,
+    /** ES @stable ICU 2.0 */
+    U_EUROPEAN_NUMBER_SEPARATOR   = 3,
+    /** ET @stable ICU 2.0 */
+    U_EUROPEAN_NUMBER_TERMINATOR  = 4,
+    /** AN @stable ICU 2.0 */
+    U_ARABIC_NUMBER               = 5,
+    /** CS @stable ICU 2.0 */
+    U_COMMON_NUMBER_SEPARATOR     = 6,
+    /** B @stable ICU 2.0 */
+    U_BLOCK_SEPARATOR             = 7,
+    /** S @stable ICU 2.0 */
+    U_SEGMENT_SEPARATOR           = 8,
+    /** WS @stable ICU 2.0 */
+    U_WHITE_SPACE_NEUTRAL         = 9,
+    /** ON @stable ICU 2.0 */
+    U_OTHER_NEUTRAL               = 10,
+    /** LRE @stable ICU 2.0 */
+    U_LEFT_TO_RIGHT_EMBEDDING     = 11,
+    /** LRO @stable ICU 2.0 */
+    U_LEFT_TO_RIGHT_OVERRIDE      = 12,
+    /** AL @stable ICU 2.0 */
+    U_RIGHT_TO_LEFT_ARABIC        = 13,
+    /** RLE @stable ICU 2.0 */
+    U_RIGHT_TO_LEFT_EMBEDDING     = 14,
+    /** RLO @stable ICU 2.0 */
+    U_RIGHT_TO_LEFT_OVERRIDE      = 15,
+    /** PDF @stable ICU 2.0 */
+    U_POP_DIRECTIONAL_FORMAT      = 16,
+    /** NSM @stable ICU 2.0 */
+    U_DIR_NON_SPACING_MARK        = 17,
+    /** BN @stable ICU 2.0 */
+    U_BOUNDARY_NEUTRAL            = 18,
+    /** @stable ICU 2.0 */
+    U_CHAR_DIRECTION_COUNT
+} UCharDirection;
+
+/**
+ * Constants for Unicode blocks, see the Unicode Data file Blocks.txt
+ * @stable ICU 2.0
+ */
+enum UBlockCode {
+
+    /** New No_Block value in Unicode 4. @stable ICU 2.6 */
+    UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BASIC_LATIN = 1, /*[0000]*/ /*See note !!*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/
+
+    /**
+     * Unicode 3.2 renames this block to "Greek and Coptic".
+     * @stable ICU 2.0
+     */
+    UBLOCK_GREEK =8, /*[0370]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CYRILLIC =9, /*[0400]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ARMENIAN =10, /*[0530]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HEBREW =11, /*[0590]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ARABIC =12, /*[0600]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_SYRIAC =13, /*[0700]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_THAANA =14, /*[0780]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_DEVANAGARI =15, /*[0900]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BENGALI =16, /*[0980]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_GURMUKHI =17, /*[0A00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_GUJARATI =18, /*[0A80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ORIYA =19, /*[0B00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_TAMIL =20, /*[0B80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_TELUGU =21, /*[0C00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_KANNADA =22, /*[0C80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_MALAYALAM =23, /*[0D00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_SINHALA =24, /*[0D80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_THAI =25, /*[0E00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LAO =26, /*[0E80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_TIBETAN =27, /*[0F00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_MYANMAR =28, /*[1000]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_GEORGIAN =29, /*[10A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HANGUL_JAMO =30, /*[1100]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ETHIOPIC =31, /*[1200]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CHEROKEE =32, /*[13A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_OGHAM =34, /*[1680]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_RUNIC =35, /*[16A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_KHMER =36, /*[1780]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_MONGOLIAN =37, /*[1800]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
+
+    /**
+     * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
+     * @stable ICU 2.0
+     */
+    UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_NUMBER_FORMS =45, /*[2150]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ARROWS =46, /*[2190]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BOX_DRAWING =52, /*[2500]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_DINGBATS =56, /*[2700]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HIRAGANA =62, /*[3040]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_KATAKANA =63, /*[30A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BOPOMOFO =64, /*[3100]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_KANBUN =66, /*[3190]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_YI_SYLLABLES =72, /*[A000]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_YI_RADICALS =73, /*[A490]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
+
+    /**
+     * Same as UBLOCK_PRIVATE_USE_AREA.
+     * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+     * and multiple code point ranges had this block.
+     * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+     * adds separate blocks for the supplementary PUAs.
+     *
+     * @stable ICU 2.0
+     */
+    UBLOCK_PRIVATE_USE = 78,
+    /**
+     * Same as UBLOCK_PRIVATE_USE.
+     * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+     * and multiple code point ranges had this block.
+     * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+     * adds separate blocks for the supplementary PUAs.
+     *
+     * @stable ICU 2.0
+     */
+    UBLOCK_PRIVATE_USE_AREA =UBLOCK_PRIVATE_USE, /*[E000]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_SPECIALS =86, /*[FFF0]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/
+
+    /* New blocks in Unicode 3.1 */
+
+    /** @stable ICU 2.0 */
+    UBLOCK_OLD_ITALIC = 88  , /*[10300]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_GOTHIC = 89 , /*[10330]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_DESERET = 90 , /*[10400]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 , /*[1D000]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_MUSICAL_SYMBOLS = 92 , /*[1D100]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93  , /*[1D400]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B  = 94 , /*[20000]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 , /*[2F800]*/
+    /** @stable ICU 2.0 */
+    UBLOCK_TAGS = 96, /*[E0000]*/
+
+    /* New blocks in Unicode 3.2 */
+
+    /**
+     * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
+     * @stable ICU 2.2
+     */
+    UBLOCK_CYRILLIC_SUPPLEMENTARY = 97, 
+    /** @stable ICU 3.0  */
+    UBLOCK_CYRILLIC_SUPPLEMENT = UBLOCK_CYRILLIC_SUPPLEMENTARY, /*[0500]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_TAGALOG = 98, /*[1700]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_HANUNOO = 99, /*[1720]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_BUHID = 100, /*[1740]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_TAGBANWA = 101, /*[1760]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
+    /** @stable ICU 2.2 */
+    UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/
+
+    /* New blocks in Unicode 4 */
+
+    /** @stable ICU 2.6 */
+    UBLOCK_LIMBU = 111, /*[1900]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_TAI_LE = 112, /*[1950]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_UGARITIC = 120, /*[10380]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_SHAVIAN = 121, /*[10450]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_OSMANYA = 122, /*[10480]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
+    /** @stable ICU 2.6 */
+    UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/
+
+    /* New blocks in Unicode 4.1 */
+
+    /** @stable ICU 3.4 */
+    UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_BUGINESE = 129, /*[1A00]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_CJK_STROKES = 130, /*[31C0]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_COPTIC = 132, /*[2C80]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_TIFINAGH = 144, /*[2D30]*/
+    /** @stable ICU 3.4 */
+    UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/
+
+    /* New blocks in Unicode 5.0 */
+
+    /** @stable ICU 3.6 */
+    UBLOCK_NKO = 146, /*[07C0]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_BALINESE = 147, /*[1B00]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_PHAGS_PA = 150, /*[A840]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_PHOENICIAN = 151, /*[10900]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_CUNEIFORM = 152, /*[12000]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/
+    /** @stable ICU 3.6 */
+    UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/
+
+    /* New blocks in Unicode 5.1 */
+
+    /** @draft ICU 4.0 */
+    UBLOCK_SUNDANESE = 155, /*[1B80]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_LEPCHA = 156, /*[1C00]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_OL_CHIKI = 157, /*[1C50]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_VAI = 159, /*[A500]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_SAURASHTRA = 161, /*[A880]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_KAYAH_LI = 162, /*[A900]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_REJANG = 163, /*[A930]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_CHAM = 164, /*[AA00]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_LYCIAN = 167, /*[10280]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_CARIAN = 168, /*[102A0]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_LYDIAN = 169, /*[10920]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/
+    /** @draft ICU 4.0 */
+    UBLOCK_DOMINO_TILES = 171, /*[1F030]*/
+
+    /** @stable ICU 2.0 */
+    UBLOCK_COUNT = 172,
+
+    /** @stable ICU 2.0 */
+    UBLOCK_INVALID_CODE=-1
+};
+
+/** @stable ICU 2.0 */
+typedef enum UBlockCode UBlockCode;
+
+/**
+ * East Asian Width constants.
+ *
+ * @see UCHAR_EAST_ASIAN_WIDTH
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+typedef enum UEastAsianWidth {
+    U_EA_NEUTRAL,   /*[N]*/ /*See note !!*/
+    U_EA_AMBIGUOUS, /*[A]*/
+    U_EA_HALFWIDTH, /*[H]*/
+    U_EA_FULLWIDTH, /*[F]*/
+    U_EA_NARROW,    /*[Na]*/
+    U_EA_WIDE,      /*[W]*/
+    U_EA_COUNT
+} UEastAsianWidth;
+/*
+ * Implementation note:
+ * Keep UEastAsianWidth constant values in sync with names list in genprops/props2.c.
+ */
+
+/**
+ * Selector constants for u_charName().
+ * u_charName() returns the "modern" name of a
+ * Unicode character; or the name that was defined in
+ * Unicode version 1.0, before the Unicode standard merged
+ * with ISO-10646; or an "extended" name that gives each
+ * Unicode code point a unique name.
+ *
+ * @see u_charName
+ * @stable ICU 2.0
+ */
+typedef enum UCharNameChoice {
+    U_UNICODE_CHAR_NAME,
+    U_UNICODE_10_CHAR_NAME,
+    U_EXTENDED_CHAR_NAME,
+    U_CHAR_NAME_CHOICE_COUNT
+} UCharNameChoice;
+
+/**
+ * Selector constants for u_getPropertyName() and
+ * u_getPropertyValueName().  These selectors are used to choose which
+ * name is returned for a given property or value.  All properties and
+ * values have a long name.  Most have a short name, but some do not.
+ * Unicode allows for additional names, beyond the long and short
+ * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where
+ * i=1, 2,...
+ *
+ * @see u_getPropertyName()
+ * @see u_getPropertyValueName()
+ * @stable ICU 2.4
+ */
+typedef enum UPropertyNameChoice {
+    U_SHORT_PROPERTY_NAME,
+    U_LONG_PROPERTY_NAME,
+    U_PROPERTY_NAME_CHOICE_COUNT
+} UPropertyNameChoice;
+
+/**
+ * Decomposition Type constants.
+ *
+ * @see UCHAR_DECOMPOSITION_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UDecompositionType {
+    U_DT_NONE,              /*[none]*/ /*See note !!*/
+    U_DT_CANONICAL,         /*[can]*/
+    U_DT_COMPAT,            /*[com]*/
+    U_DT_CIRCLE,            /*[enc]*/
+    U_DT_FINAL,             /*[fin]*/
+    U_DT_FONT,              /*[font]*/
+    U_DT_FRACTION,          /*[fra]*/
+    U_DT_INITIAL,           /*[init]*/
+    U_DT_ISOLATED,          /*[iso]*/
+    U_DT_MEDIAL,            /*[med]*/
+    U_DT_NARROW,            /*[nar]*/
+    U_DT_NOBREAK,           /*[nb]*/
+    U_DT_SMALL,             /*[sml]*/
+    U_DT_SQUARE,            /*[sqr]*/
+    U_DT_SUB,               /*[sub]*/
+    U_DT_SUPER,             /*[sup]*/
+    U_DT_VERTICAL,          /*[vert]*/
+    U_DT_WIDE,              /*[wide]*/
+    U_DT_COUNT /* 18 */
+} UDecompositionType;
+
+/**
+ * Joining Type constants.
+ *
+ * @see UCHAR_JOINING_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningType {
+    U_JT_NON_JOINING,       /*[U]*/ /*See note !!*/
+    U_JT_JOIN_CAUSING,      /*[C]*/
+    U_JT_DUAL_JOINING,      /*[D]*/
+    U_JT_LEFT_JOINING,      /*[L]*/
+    U_JT_RIGHT_JOINING,     /*[R]*/
+    U_JT_TRANSPARENT,       /*[T]*/
+    U_JT_COUNT /* 6 */
+} UJoiningType;
+
+/**
+ * Joining Group constants.
+ *
+ * @see UCHAR_JOINING_GROUP
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningGroup {
+    U_JG_NO_JOINING_GROUP,
+    U_JG_AIN,
+    U_JG_ALAPH,
+    U_JG_ALEF,
+    U_JG_BEH,
+    U_JG_BETH,
+    U_JG_DAL,
+    U_JG_DALATH_RISH,
+    U_JG_E,
+    U_JG_FEH,
+    U_JG_FINAL_SEMKATH,
+    U_JG_GAF,
+    U_JG_GAMAL,
+    U_JG_HAH,
+    U_JG_HAMZA_ON_HEH_GOAL,
+    U_JG_HE,
+    U_JG_HEH,
+    U_JG_HEH_GOAL,
+    U_JG_HETH,
+    U_JG_KAF,
+    U_JG_KAPH,
+    U_JG_KNOTTED_HEH,
+    U_JG_LAM,
+    U_JG_LAMADH,
+    U_JG_MEEM,
+    U_JG_MIM,
+    U_JG_NOON,
+    U_JG_NUN,
+    U_JG_PE,
+    U_JG_QAF,
+    U_JG_QAPH,
+    U_JG_REH,
+    U_JG_REVERSED_PE,
+    U_JG_SAD,
+    U_JG_SADHE,
+    U_JG_SEEN,
+    U_JG_SEMKATH,
+    U_JG_SHIN,
+    U_JG_SWASH_KAF,
+    U_JG_SYRIAC_WAW,
+    U_JG_TAH,
+    U_JG_TAW,
+    U_JG_TEH_MARBUTA,
+    U_JG_TETH,
+    U_JG_WAW,
+    U_JG_YEH,
+    U_JG_YEH_BARREE,
+    U_JG_YEH_WITH_TAIL,
+    U_JG_YUDH,
+    U_JG_YUDH_HE,
+    U_JG_ZAIN,
+    U_JG_FE,        /**< @stable ICU 2.6 */
+    U_JG_KHAPH,     /**< @stable ICU 2.6 */
+    U_JG_ZHAIN,     /**< @stable ICU 2.6 */
+    U_JG_BURUSHASKI_YEH_BARREE, /**< @draft ICU 4.0 */
+    U_JG_COUNT
+} UJoiningGroup;
+
+/**
+ * Grapheme Cluster Break constants.
+ *
+ * @see UCHAR_GRAPHEME_CLUSTER_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum UGraphemeClusterBreak {
+    U_GCB_OTHER = 0,            /*[XX]*/ /*See note !!*/
+    U_GCB_CONTROL = 1,          /*[CN]*/
+    U_GCB_CR = 2,               /*[CR]*/
+    U_GCB_EXTEND = 3,           /*[EX]*/
+    U_GCB_L = 4,                /*[L]*/
+    U_GCB_LF = 5,               /*[LF]*/
+    U_GCB_LV = 6,               /*[LV]*/
+    U_GCB_LVT = 7,              /*[LVT]*/
+    U_GCB_T = 8,                /*[T]*/
+    U_GCB_V = 9,                /*[V]*/
+    U_GCB_SPACING_MARK = 10,    /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+    U_GCB_PREPEND = 11,         /*[PP]*/
+    U_GCB_COUNT = 12
+} UGraphemeClusterBreak;
+
+/**
+ * Word Break constants.
+ * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)
+ *
+ * @see UCHAR_WORD_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum UWordBreakValues {
+    U_WB_OTHER = 0,             /*[XX]*/ /*See note !!*/
+    U_WB_ALETTER = 1,           /*[LE]*/
+    U_WB_FORMAT = 2,            /*[FO]*/
+    U_WB_KATAKANA = 3,          /*[KA]*/
+    U_WB_MIDLETTER = 4,         /*[ML]*/
+    U_WB_MIDNUM = 5,            /*[MN]*/
+    U_WB_NUMERIC = 6,           /*[NU]*/
+    U_WB_EXTENDNUMLET = 7,      /*[EX]*/
+    U_WB_CR = 8,                /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+    U_WB_EXTEND = 9,            /*[Extend]*/
+    U_WB_LF = 10,               /*[LF]*/
+    U_WB_MIDNUMLET =11,         /*[MB]*/
+    U_WB_NEWLINE =12,           /*[NL]*/
+    U_WB_COUNT = 13
+} UWordBreakValues;
+
+/**
+ * Sentence Break constants.
+ *
+ * @see UCHAR_SENTENCE_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum USentenceBreak {
+    U_SB_OTHER = 0,             /*[XX]*/ /*See note !!*/
+    U_SB_ATERM = 1,             /*[AT]*/
+    U_SB_CLOSE = 2,             /*[CL]*/
+    U_SB_FORMAT = 3,            /*[FO]*/
+    U_SB_LOWER = 4,             /*[LO]*/
+    U_SB_NUMERIC = 5,           /*[NU]*/
+    U_SB_OLETTER = 6,           /*[LE]*/
+    U_SB_SEP = 7,               /*[SE]*/
+    U_SB_SP = 8,                /*[SP]*/
+    U_SB_STERM = 9,             /*[ST]*/
+    U_SB_UPPER = 10,            /*[UP]*/
+    U_SB_CR = 11,               /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+    U_SB_EXTEND = 12,           /*[EX]*/
+    U_SB_LF = 13,               /*[LF]*/
+    U_SB_SCONTINUE = 14,        /*[SC]*/
+    U_SB_COUNT = 15
+} USentenceBreak;
+
+/**
+ * Line Break constants.
+ *
+ * @see UCHAR_LINE_BREAK
+ * @stable ICU 2.2
+ */
+typedef enum ULineBreak {
+    U_LB_UNKNOWN = 0,           /*[XX]*/ /*See note !!*/
+    U_LB_AMBIGUOUS = 1,         /*[AI]*/
+    U_LB_ALPHABETIC = 2,        /*[AL]*/
+    U_LB_BREAK_BOTH = 3,        /*[B2]*/
+    U_LB_BREAK_AFTER = 4,       /*[BA]*/
+    U_LB_BREAK_BEFORE = 5,      /*[BB]*/
+    U_LB_MANDATORY_BREAK = 6,   /*[BK]*/
+    U_LB_CONTINGENT_BREAK = 7,  /*[CB]*/
+    U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/
+    U_LB_COMBINING_MARK = 9,    /*[CM]*/
+    U_LB_CARRIAGE_RETURN = 10,   /*[CR]*/
+    U_LB_EXCLAMATION = 11,       /*[EX]*/
+    U_LB_GLUE = 12,              /*[GL]*/
+    U_LB_HYPHEN = 13,            /*[HY]*/
+    U_LB_IDEOGRAPHIC = 14,       /*[ID]*/
+    U_LB_INSEPERABLE = 15,
+    /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */
+    U_LB_INSEPARABLE=U_LB_INSEPERABLE,/*[IN]*/
+    U_LB_INFIX_NUMERIC = 16,     /*[IS]*/
+    U_LB_LINE_FEED = 17,         /*[LF]*/
+    U_LB_NONSTARTER = 18,        /*[NS]*/
+    U_LB_NUMERIC = 19,           /*[NU]*/
+    U_LB_OPEN_PUNCTUATION = 20,  /*[OP]*/
+    U_LB_POSTFIX_NUMERIC = 21,   /*[PO]*/
+    U_LB_PREFIX_NUMERIC = 22,    /*[PR]*/
+    U_LB_QUOTATION = 23,         /*[QU]*/
+    U_LB_COMPLEX_CONTEXT = 24,   /*[SA]*/
+    U_LB_SURROGATE = 25,         /*[SG]*/
+    U_LB_SPACE = 26,             /*[SP]*/
+    U_LB_BREAK_SYMBOLS = 27,     /*[SY]*/
+    U_LB_ZWSPACE = 28,           /*[ZW]*/
+    U_LB_NEXT_LINE = 29,         /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
+    U_LB_WORD_JOINER = 30,       /*[WJ]*/
+    U_LB_H2 = 31,                /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
+    U_LB_H3 = 32,                /*[H3]*/
+    U_LB_JL = 33,                /*[JL]*/
+    U_LB_JT = 34,                /*[JT]*/
+    U_LB_JV = 35,                /*[JV]*/
+    U_LB_COUNT = 36
+} ULineBreak;
+
+/**
+ * Numeric Type constants.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UNumericType {
+    U_NT_NONE,              /*[None]*/ /*See note !!*/
+    U_NT_DECIMAL,           /*[de]*/
+    U_NT_DIGIT,             /*[di]*/
+    U_NT_NUMERIC,           /*[nu]*/
+    U_NT_COUNT
+} UNumericType;
+
+/**
+ * Hangul Syllable Type constants.
+ *
+ * @see UCHAR_HANGUL_SYLLABLE_TYPE
+ * @stable ICU 2.6
+ */
+typedef enum UHangulSyllableType {
+    U_HST_NOT_APPLICABLE,   /*[NA]*/ /*See note !!*/
+    U_HST_LEADING_JAMO,     /*[L]*/
+    U_HST_VOWEL_JAMO,       /*[V]*/
+    U_HST_TRAILING_JAMO,    /*[T]*/
+    U_HST_LV_SYLLABLE,      /*[LV]*/
+    U_HST_LVT_SYLLABLE,     /*[LVT]*/
+    U_HST_COUNT
+} UHangulSyllableType;
+
+/**
+ * Check a binary Unicode property for a code point.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ucd/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which binary property to check.
+ *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT.
+ * @return TRUE or FALSE according to the binary Unicode property value for c.
+ *         Also FALSE if 'which' is out of bounds or if the Unicode version
+ *         does not have data for the property at all, or not for this code point.
+ *
+ * @see UProperty
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_hasBinaryProperty(UChar32 c, UProperty which);
+
+/**
+ * Check if a code point has the Alphabetic Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
+ * This is different from u_isalpha!
+ * @param c Code point to test
+ * @return true if the code point has the Alphabetic Unicode property, false otherwise
+ *
+ * @see UCHAR_ALPHABETIC
+ * @see u_isalpha
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isUAlphabetic(UChar32 c);
+
+/**
+ * Check if a code point has the Lowercase Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE).
+ * This is different from u_islower!
+ * @param c Code point to test
+ * @return true if the code point has the Lowercase Unicode property, false otherwise
+ *
+ * @see UCHAR_LOWERCASE
+ * @see u_islower
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isULowercase(UChar32 c);
+
+/**
+ * Check if a code point has the Uppercase Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE).
+ * This is different from u_isupper!
+ * @param c Code point to test
+ * @return true if the code point has the Uppercase Unicode property, false otherwise
+ *
+ * @see UCHAR_UPPERCASE
+ * @see u_isupper
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isUUppercase(UChar32 c);
+
+/**
+ * Check if a code point has the White_Space Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE).
+ * This is different from both u_isspace and u_isWhitespace!
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c Code point to test
+ * @return true if the code point has the White_Space Unicode property, false otherwise.
+ *
+ * @see UCHAR_WHITE_SPACE
+ * @see u_isWhitespace
+ * @see u_isspace
+ * @see u_isJavaSpaceChar
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_STABLE UBool U_EXPORT2
+u_isUWhiteSpace(UChar32 c);
+
+/**
+ * Get the property value for an enumerated or integer Unicode property for a code point.
+ * Also returns binary and mask property values.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Sample usage:
+ * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH);
+ * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which property to check.
+ *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * @return Numeric value that is directly the property value or,
+ *         for enumerated properties, corresponds to the numeric value of the enumerated
+ *         constant of the respective property value enumeration type
+ *         (cast to enum type if necessary).
+ *         Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties.
+ *         Returns a bit-mask for mask properties.
+ *         Returns 0 if 'which' is out of bounds or if the Unicode version
+ *         does not have data for the property at all, or not for this code point.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyMinValue
+ * @see u_getIntPropertyMaxValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getIntPropertyValue(UChar32 c, UProperty which);
+
+/**
+ * Get the minimum value for an enumerated/integer/binary Unicode property.
+ * Can be used together with u_getIntPropertyMaxValue
+ * to allocate arrays of UnicodeSet or similar.
+ *
+ * @param which UProperty selector constant, identifies which binary property to check.
+ *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
+ * @return Minimum value returned by u_getIntPropertyValue for a Unicode property.
+ *         0 if the property selector is out of range.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getUnicodeVersion
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getIntPropertyMinValue(UProperty which);
+
+/**
+ * Get the maximum value for an enumerated/integer/binary Unicode property.
+ * Can be used together with u_getIntPropertyMinValue
+ * to allocate arrays of UnicodeSet or similar.
+ *
+ * Examples for min/max values (for Unicode 3.2):
+ *
+ * - UCHAR_BIDI_CLASS:    0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
+ * - UCHAR_SCRIPT:        0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
+ * - UCHAR_IDEOGRAPHIC:   0/1  (FALSE/TRUE)
+ *
+ * For undefined UProperty constant values, min/max values will be 0/-1.
+ *
+ * @param which UProperty selector constant, identifies which binary property to check.
+ *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
+ * @return Maximum value returned by u_getIntPropertyValue for a Unicode property.
+ *         <=0 if the property selector is out of range.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getUnicodeVersion
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getIntPropertyMaxValue(UProperty which);
+
+/**
+ * Get the numeric value for a Unicode code point as defined in the
+ * Unicode Character Database.
+ *
+ * A "double" return type is necessary because
+ * some numeric values are fractions, negative, or too large for int32_t.
+ *
+ * For characters without any numeric values in the Unicode Character Database,
+ * this function will return U_NO_NUMERIC_VALUE.
+ *
+ * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()
+ * also supports negative values, large values, and fractions,
+ * while Java's getNumericValue() returns values 10..35 for ASCII letters.
+ *
+ * @param c Code point to get the numeric value for.
+ * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
+ *
+ * @see U_NO_NUMERIC_VALUE
+ * @stable ICU 2.2
+ */
+U_STABLE double U_EXPORT2
+u_getNumericValue(UChar32 c);
+
+/**
+ * Special value that is returned by u_getNumericValue when
+ * no numeric value is defined for a code point.
+ *
+ * @see u_getNumericValue
+ * @stable ICU 2.2
+ */
+#define U_NO_NUMERIC_VALUE ((double)-123456789.)
+
+/**
+ * Determines whether the specified code point has the general category "Ll"
+ * (lowercase letter).
+ *
+ * Same as java.lang.Character.isLowerCase().
+ *
+ * This misses some characters that are also lowercase but
+ * have a different general category value.
+ * In order to include those, use UCHAR_LOWERCASE.
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an Ll lowercase letter
+ *
+ * @see UCHAR_LOWERCASE
+ * @see u_isupper
+ * @see u_istitle
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_islower(UChar32 c);
+
+/**
+ * Determines whether the specified code point has the general category "Lu"
+ * (uppercase letter).
+ *
+ * Same as java.lang.Character.isUpperCase().
+ *
+ * This misses some characters that are also uppercase but
+ * have a different general category value.
+ * In order to include those, use UCHAR_UPPERCASE.
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an Lu uppercase letter
+ *
+ * @see UCHAR_UPPERCASE
+ * @see u_islower
+ * @see u_istitle
+ * @see u_tolower
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isupper(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a titlecase letter.
+ * True for general category "Lt" (titlecase letter).
+ *
+ * Same as java.lang.Character.isTitleCase().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an Lt titlecase letter
+ *
+ * @see u_isupper
+ * @see u_islower
+ * @see u_totitle
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_istitle(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a digit character according to Java.
+ * True for characters with general category "Nd" (decimal digit numbers).
+ * Beginning with Unicode 4, this is the same as
+ * testing for the Numeric_Type of Decimal.
+ *
+ * Same as java.lang.Character.isDigit().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a digit character according to Character.isDigit()
+ *
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isdigit(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a letter character.
+ * True for general categories "L" (letters).
+ *
+ * Same as java.lang.Character.isLetter().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a letter character
+ *
+ * @see u_isdigit
+ * @see u_isalnum
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isalpha(UChar32 c);
+
+/**
+ * Determines whether the specified code point is an alphanumeric character
+ * (letter or digit) according to Java.
+ * True for characters with general categories
+ * "L" (letters) and "Nd" (decimal digit numbers).
+ *
+ * Same as java.lang.Character.isLetterOrDigit().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit()
+ *
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isalnum(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a hexadecimal digit.
+ * This is equivalent to u_digit(c, 16)>=0.
+ * True for characters with general category "Nd" (decimal digit numbers)
+ * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
+ * (That is, for letters with code points
+ * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
+ *
+ * In order to narrow the definition of hexadecimal digits to only ASCII
+ * characters, use (c<=0x7f && u_isxdigit(c)).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a hexadecimal digit
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isxdigit(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a punctuation character.
+ * True for characters with general categories "P" (punctuation).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a punctuation character
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_ispunct(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a "graphic" character
+ * (printable, excluding spaces).
+ * TRUE for all characters except those with general categories
+ * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates),
+ * "Cn" (unassigned), and "Z" (separators).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a "graphic" character
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isgraph(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a "blank" or "horizontal space",
+ * a character that visibly separates words on a line.
+ * The following are equivalent definitions:
+ *
+ * TRUE for Unicode White_Space characters except for "vertical space controls"
+ * where "vertical space controls" are the following characters:
+ * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)
+ *
+ * same as
+ *
+ * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators)
+ * except Zero Width Space (ZWSP, U+200B).
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a "blank"
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isblank(UChar32 c);
+
+/**
+ * Determines whether the specified code point is "defined",
+ * which usually means that it is assigned a character.
+ * True for general categories other than "Cn" (other, not assigned),
+ * i.e., true for all code points mentioned in UnicodeData.txt.
+ *
+ * Note that non-character code points (e.g., U+FDD0) are not "defined"
+ * (they are Cn), but surrogate code points are "defined" (Cs).
+ *
+ * Same as java.lang.Character.isDefined().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is assigned a character
+ *
+ * @see u_isdigit
+ * @see u_isalpha
+ * @see u_isalnum
+ * @see u_isupper
+ * @see u_islower
+ * @see u_istitle
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isdefined(UChar32 c);
+
+/**
+ * Determines if the specified character is a space character or not.
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c    the character to be tested
+ * @return  true if the character is a space character; false otherwise.
+ *
+ * @see u_isJavaSpaceChar
+ * @see u_isWhitespace
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isspace(UChar32 c);
+
+/**
+ * Determine if the specified code point is a space character according to Java.
+ * True for characters with general categories "Z" (separators),
+ * which does not include control codes (e.g., TAB or Line Feed).
+ *
+ * Same as java.lang.Character.isSpaceChar().
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a space character according to Character.isSpaceChar()
+ *
+ * @see u_isspace
+ * @see u_isWhitespace
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isJavaSpaceChar(UChar32 c);
+
+/**
+ * Determines if the specified code point is a whitespace character according to Java/ICU.
+ * A character is considered to be a Java whitespace character if and only
+ * if it satisfies one of the following criteria:
+ *
+ * - It is a Unicode separator (categories "Z"), but is not
+ *      a no-break space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
+ * - It is U+0009 HORIZONTAL TABULATION.
+ * - It is U+000A LINE FEED.
+ * - It is U+000B VERTICAL TABULATION.
+ * - It is U+000C FORM FEED.
+ * - It is U+000D CARRIAGE RETURN.
+ * - It is U+001C FILE SEPARATOR.
+ * - It is U+001D GROUP SEPARATOR.
+ * - It is U+001E RECORD SEPARATOR.
+ * - It is U+001F UNIT SEPARATOR.
+ * - It is U+0085 NEXT LINE.
+ *
+ * Same as java.lang.Character.isWhitespace() except that Java omits U+0085.
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a whitespace character according to Java/ICU
+ *
+ * @see u_isspace
+ * @see u_isJavaSpaceChar
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isWhitespace(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a control character
+ * (as defined by this function).
+ * A control character is one of the following:
+ * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
+ * - U_CONTROL_CHAR (Cc)
+ * - U_FORMAT_CHAR (Cf)
+ * - U_LINE_SEPARATOR (Zl)
+ * - U_PARAGRAPH_SEPARATOR (Zp)
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a control character
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_isprint
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_iscntrl(UChar32 c);
+
+/**
+ * Determines whether the specified code point is an ISO control code.
+ * True for U+0000..U+001f and U+007f..U+009f (general category "Cc").
+ *
+ * Same as java.lang.Character.isISOControl().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is an ISO control code
+ *
+ * @see u_iscntrl
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+u_isISOControl(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a printable character.
+ * True for general categories <em>other</em> than "C" (controls).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a printable character
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_iscntrl
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isprint(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a base character.
+ * True for general categories "L" (letters), "N" (numbers),
+ * "Mc" (spacing combining marks), and "Me" (enclosing marks).
+ *
+ * Note that this is different from the Unicode definition in
+ * chapter 3.5, conformance clause D13,
+ * which defines base characters to be all characters (not Cn)
+ * that do not graphically combine with preceding characters (M)
+ * and that are neither control (Cc) or format (Cf) characters.
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is a base character according to this function
+ *
+ * @see u_isalpha
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isbase(UChar32 c);
+
+/**
+ * Returns the bidirectional category value for the code point,
+ * which is used in the Unicode bidirectional algorithm
+ * (UAX #9 http://www.unicode.org/reports/tr9/).
+ * Note that some <em>unassigned</em> code points have bidi values
+ * of R or AL because they are in blocks that are reserved
+ * for Right-To-Left scripts.
+ *
+ * Same as java.lang.Character.getDirectionality()
+ *
+ * @param c the code point to be tested
+ * @return the bidirectional category (UCharDirection) value
+ *
+ * @see UCharDirection
+ * @stable ICU 2.0
+ */
+U_STABLE UCharDirection U_EXPORT2
+u_charDirection(UChar32 c);
+
+/**
+ * Determines whether the code point has the Bidi_Mirrored property.
+ * This property is set for characters that are commonly used in
+ * Right-To-Left contexts and need to be displayed with a "mirrored"
+ * glyph.
+ *
+ * Same as java.lang.Character.isMirrored().
+ * Same as UCHAR_BIDI_MIRRORED
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the character has the Bidi_Mirrored property
+ *
+ * @see UCHAR_BIDI_MIRRORED
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isMirrored(UChar32 c);
+
+/**
+ * Maps the specified character to a "mirror-image" character.
+ * For characters with the Bidi_Mirrored property, implementations
+ * sometimes need a "poor man's" mapping to another Unicode
+ * character (code point) such that the default glyph may serve
+ * as the mirror-image of the default glyph of the specified
+ * character. This is useful for text conversion to and from
+ * codepages with visual order, and for displays without glyph
+ * selecetion capabilities.
+ *
+ * @param c the code point to be mapped
+ * @return another Unicode code point that may serve as a mirror-image
+ *         substitute, or c itself if there is no such mapping or c
+ *         does not have the Bidi_Mirrored property
+ *
+ * @see UCHAR_BIDI_MIRRORED
+ * @see u_isMirrored
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_charMirror(UChar32 c);
+
+/**
+ * Returns the general category value for the code point.
+ *
+ * Same as java.lang.Character.getType().
+ *
+ * @param c the code point to be tested
+ * @return the general category (UCharCategory) value
+ *
+ * @see UCharCategory
+ * @stable ICU 2.0
+ */
+U_STABLE int8_t U_EXPORT2
+u_charType(UChar32 c);
+
+/**
+ * Get a single-bit bit set for the general category of a character.
+ * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc.
+ * Same as U_MASK(u_charType(c)).
+ *
+ * @param c the code point to be tested
+ * @return a single-bit mask corresponding to the general category (UCharCategory) value
+ *
+ * @see u_charType
+ * @see UCharCategory
+ * @see U_GC_CN_MASK
+ * @stable ICU 2.1
+ */
+#define U_GET_GC_MASK(c) U_MASK(u_charType(c))
+
+/**
+ * Callback from u_enumCharTypes(), is called for each contiguous range
+ * of code points c (where start<=c<limit)
+ * with the same Unicode general category ("character type").
+ *
+ * The callback function can stop the enumeration by returning FALSE.
+ *
+ * @param context an opaque pointer, as passed into utrie_enum()
+ * @param start the first code point in a contiguous range with value
+ * @param limit one past the last code point in a contiguous range with value
+ * @param type the general category for all code points in [start..limit[
+ * @return FALSE to stop the enumeration
+ *
+ * @stable ICU 2.1
+ * @see UCharCategory
+ * @see u_enumCharTypes
+ */
+typedef UBool U_CALLCONV
+UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
+
+/**
+ * Enumerate efficiently all code points with their Unicode general categories.
+ *
+ * This is useful for building data structures (e.g., UnicodeSet's),
+ * for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
+ *
+ * For each contiguous range of code points with a given general category ("character type"),
+ * the UCharEnumTypeRange function is called.
+ * Adjacent ranges have different types.
+ * The Unicode Standard guarantees that the numeric value of the type is 0..31.
+ *
+ * @param enumRange a pointer to a function that is called for each contiguous range
+ *                  of code points with the same general category
+ * @param context an opaque pointer that is passed on to the callback function
+ *
+ * @stable ICU 2.1
+ * @see UCharCategory
+ * @see UCharEnumTypeRange
+ */
+U_STABLE void U_EXPORT2
+u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/**
+ * Returns the combining class of the code point as specified in UnicodeData.txt.
+ *
+ * @param c the code point of the character
+ * @return the combining class of the character
+ * @stable ICU 2.0
+ */
+U_STABLE uint8_t U_EXPORT2
+u_getCombiningClass(UChar32 c);
+
+#endif
+
+/**
+ * Returns the decimal digit value of a decimal digit character.
+ * Such characters have the general category "Nd" (decimal digit numbers)
+ * and a Numeric_Type of Decimal.
+ *
+ * Unlike ICU releases before 2.6, no digit values are returned for any
+ * Han characters because Han number characters are often used with a special
+ * Chinese-style number format (with characters for powers of 10 in between)
+ * instead of in decimal-positional notation.
+ * Unicode 4 explicitly assigns Han number characters the Numeric_Type
+ * Numeric instead of Decimal.
+ * See Jitterbug 1483 for more details.
+ *
+ * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue()
+ * for complete numeric Unicode properties.
+ *
+ * @param c the code point for which to get the decimal digit value
+ * @return the decimal digit value of c,
+ *         or -1 if c is not a decimal digit character
+ *
+ * @see u_getNumericValue
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_charDigitValue(UChar32 c);
+
+/**
+ * Returns the Unicode allocation block that contains the character.
+ *
+ * @param c the code point to be tested
+ * @return the block value (UBlockCode) for c
+ *
+ * @see UBlockCode
+ * @stable ICU 2.0
+ */
+U_STABLE UBlockCode U_EXPORT2
+ublock_getCode(UChar32 c);
+
+/**
+ * Retrieve the name of a Unicode character.
+ * Depending on <code>nameChoice</code>, the character name written
+ * into the buffer is the "modern" name or the name that was defined
+ * in Unicode version 1.0.
+ * The name contains only "invariant" characters
+ * like A-Z, 0-9, space, and '-'.
+ * Unicode 1.0 names are only retrieved if they are different from the modern
+ * names and if the data file contains the data for them. gennames may or may
+ * not be called with a command line option to include 1.0 names in unames.dat.
+ *
+ * @param code The character (code point) for which to get the name.
+ *             It must be <code>0<=code<=0x10ffff</code>.
+ * @param nameChoice Selector for which name to get.
+ * @param buffer Destination address for copying the name.
+ *               The name will always be zero-terminated.
+ *               If there is no name, then the buffer will be set to the empty string.
+ * @param bufferLength <code>==sizeof(buffer)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ *        check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
+ *        returns.
+ * @return The length of the name, or 0 if there is no name for this character.
+ *         If the bufferLength is less than or equal to the length, then the buffer
+ *         contains the truncated name and the returned length indicates the full
+ *         length of the name.
+ *         The length does not include the zero-termination.
+ *
+ * @see UCharNameChoice
+ * @see u_charFromName
+ * @see u_enumCharNames
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_charName(UChar32 code, UCharNameChoice nameChoice,
+           char *buffer, int32_t bufferLength,
+           UErrorCode *pErrorCode);
+
+/**
+ * Get the ISO 10646 comment for a character.
+ * The ISO 10646 comment is an informative field in the Unicode Character
+ * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
+ *
+ * @param c The character (code point) for which to get the ISO comment.
+ *             It must be <code>0<=c<=0x10ffff</code>.
+ * @param dest Destination address for copying the comment.
+ *             The comment will be zero-terminated if possible.
+ *             If there is no comment, then the buffer will be set to the empty string.
+ * @param destCapacity <code>==sizeof(dest)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ *        check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
+ *        returns.
+ * @return The length of the comment, or 0 if there is no comment for this character.
+ *         If the destCapacity is less than or equal to the length, then the buffer
+ *         contains the truncated name and the returned length indicates the full
+ *         length of the name.
+ *         The length does not include the zero-termination.
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getISOComment(UChar32 c,
+                char *dest, int32_t destCapacity,
+                UErrorCode *pErrorCode);
+
+/**
+ * Find a Unicode character by its name and return its code point value.
+ * The name is matched exactly and completely.
+ * If the name does not correspond to a code point, <i>pErrorCode</i>
+ * is set to <code>U_INVALID_CHAR_FOUND</code>.
+ * A Unicode 1.0 name is matched only if it differs from the modern name.
+ * Unicode names are all uppercase. Extended names are lowercase followed
+ * by an uppercase hexadecimal number, and within angle brackets.
+ *
+ * @param nameChoice Selector for which name to match.
+ * @param name The name to match.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ * @return The Unicode value of the code point with the given name,
+ *         or an undefined value if there is no such code point.
+ *
+ * @see UCharNameChoice
+ * @see u_charName
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+U_STABLE UChar32 U_EXPORT2
+u_charFromName(UCharNameChoice nameChoice,
+               const char *name,
+               UErrorCode *pErrorCode);
+
+/**
+ * Type of a callback function for u_enumCharNames() that gets called
+ * for each Unicode character with the code point value and
+ * the character name.
+ * If such a function returns FALSE, then the enumeration is stopped.
+ *
+ * @param context The context pointer that was passed to u_enumCharNames().
+ * @param code The Unicode code point for the character with this name.
+ * @param nameChoice Selector for which kind of names is enumerated.
+ * @param name The character's name, zero-terminated.
+ * @param length The length of the name.
+ * @return TRUE if the enumeration should continue, FALSE to stop it.
+ *
+ * @see UCharNameChoice
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
+                               UChar32 code,
+                               UCharNameChoice nameChoice,
+                               const char *name,
+                               int32_t length);
+
+/**
+ * Enumerate all assigned Unicode characters between the start and limit
+ * code points (start inclusive, limit exclusive) and call a function
+ * for each, passing the code point value and the character name.
+ * For Unicode 1.0 names, only those are enumerated that differ from the
+ * modern names.
+ *
+ * @param start The first code point in the enumeration range.
+ * @param limit One more than the last code point in the enumeration range
+ *              (the first one after the range).
+ * @param fn The function that is to be called for each character name.
+ * @param context An arbitrary pointer that is passed to the function.
+ * @param nameChoice Selector for which kind of names to enumerate.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ *
+ * @see UCharNameChoice
+ * @see UEnumCharNamesFn
+ * @see u_charName
+ * @see u_charFromName
+ * @stable ICU 1.7
+ */
+U_STABLE void U_EXPORT2
+u_enumCharNames(UChar32 start, UChar32 limit,
+                UEnumCharNamesFn *fn,
+                void *context,
+                UCharNameChoice nameChoice,
+                UErrorCode *pErrorCode);
+
+/**
+ * Return the Unicode name for a given property, as given in the
+ * Unicode database file PropertyAliases.txt.
+ *
+ * In addition, this function maps the property
+ * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
+ * "General_Category_Mask".  These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param property UProperty selector other than UCHAR_INVALID_CODE.
+ *         If out of range, NULL is returned.
+ *
+ * @param nameChoice selector for which name to get.  If out of range,
+ *         NULL is returned.  All properties have a long name.  Most
+ *         have a short name, but some do not.  Unicode allows for
+ *         additional names; if present these will be returned by
+ *         U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+ *
+ * @return a pointer to the name, or NULL if either the
+ *         property or the nameChoice is out of range.  If a given
+ *         nameChoice returns NULL, then all larger values of
+ *         nameChoice will return NULL, with one exception: if NULL is
+ *         returned for U_SHORT_PROPERTY_NAME, then
+ *         U_LONG_PROPERTY_NAME (and higher) may still return a
+ *         non-NULL value.  The returned pointer is valid until
+ *         u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+u_getPropertyName(UProperty property,
+                  UPropertyNameChoice nameChoice);
+
+/**
+ * Return the UProperty enum for a given property name, as specified
+ * in the Unicode database file PropertyAliases.txt.  Short, long, and
+ * any other variants are recognized.
+ *
+ * In addition, this function maps the synthetic names "gcm" /
+ * "General_Category_Mask" to the property
+ * UCHAR_GENERAL_CATEGORY_MASK.  These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param alias the property name to be matched.  The name is compared
+ *         using "loose matching" as described in PropertyAliases.txt.
+ *
+ * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
+ *         does not match any property.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_STABLE UProperty U_EXPORT2
+u_getPropertyEnum(const char* alias);
+
+/**
+ * Return the Unicode name for a given property value, as given in the
+ * Unicode database file PropertyValueAliases.txt.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt can only be
+ * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ *        If out of range, NULL is returned.
+ *
+ * @param value selector for a value for the given property.  If out
+ *         of range, NULL is returned.  In general, valid values range
+ *         from 0 up to some maximum.  There are a few exceptions:
+ *         (1.) UCHAR_BLOCK values begin at the non-zero value
+ *         UBLOCK_BASIC_LATIN.  (2.)  UCHAR_CANONICAL_COMBINING_CLASS
+ *         values are not contiguous and range from 0..240.  (3.)
+ *         UCHAR_GENERAL_CATEGORY_MASK values are not values of
+ *         UCharCategory, but rather mask values produced by
+ *         U_GET_GC_MASK().  This allows grouped categories such as
+ *         [:L:] to be represented.  Mask values range
+ *         non-contiguously from 1..U_GC_P_MASK.
+ *
+ * @param nameChoice selector for which name to get.  If out of range,
+ *         NULL is returned.  All values have a long name.  Most have
+ *         a short name, but some do not.  Unicode allows for
+ *         additional names; if present these will be returned by
+ *         U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+
+ * @return a pointer to the name, or NULL if either the
+ *         property or the nameChoice is out of range.  If a given
+ *         nameChoice returns NULL, then all larger values of
+ *         nameChoice will return NULL, with one exception: if NULL is
+ *         returned for U_SHORT_PROPERTY_NAME, then
+ *         U_LONG_PROPERTY_NAME (and higher) may still return a
+ *         non-NULL value.  The returned pointer is valid until
+ *         u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+u_getPropertyValueName(UProperty property,
+                       int32_t value,
+                       UPropertyNameChoice nameChoice);
+
+/**
+ * Return the property value integer for a given value name, as
+ * specified in the Unicode database file PropertyValueAliases.txt.
+ * Short, long, and any other variants are recognized.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt will only be
+ * recognized with UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ *        Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ *        or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ *        or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ *        If out of range, UCHAR_INVALID_CODE is returned.
+ *
+ * @param alias the value name to be matched.  The name is compared
+ *         using "loose matching" as described in
+ *         PropertyValueAliases.txt.
+ *
+ * @return a value integer or UCHAR_INVALID_CODE if the given name
+ *         does not match any value of the given property, or if the
+ *         property is invalid.  Note: U CHAR_GENERAL_CATEGORY values
+ *         are not values of UCharCategory, but rather mask values
+ *         produced by U_GET_GC_MASK().  This allows grouped
+ *         categories such as [:L:] to be represented.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+u_getPropertyValueEnum(UProperty property,
+                       const char* alias);
+
+/**
+ * Determines if the specified character is permissible as the
+ * first character in an identifier according to Unicode
+ * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
+ * True for characters with general categories "L" (letters) and "Nl" (letter numbers).
+ *
+ * Same as java.lang.Character.isUnicodeIdentifierStart().
+ * Same as UCHAR_ID_START
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may start an identifier
+ *
+ * @see UCHAR_ID_START
+ * @see u_isalpha
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isIDStart(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible
+ * in an identifier according to Java.
+ * True for characters with general categories "L" (letters),
+ * "Nl" (letter numbers), "Nd" (decimal digits),
+ * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and
+ * u_isIDIgnorable(c).
+ *
+ * Same as java.lang.Character.isUnicodeIdentifierPart().
+ * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE)
+ * except that Unicode recommends to ignore Cf which is less than
+ * u_isIDIgnorable(c).
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may occur in an identifier according to Java
+ *
+ * @see UCHAR_ID_CONTINUE
+ * @see u_isIDStart
+ * @see u_isIDIgnorable
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isIDPart(UChar32 c);
+
+/**
+ * Determines if the specified character should be regarded
+ * as an ignorable character in an identifier,
+ * according to Java.
+ * True for characters with general category "Cf" (format controls) as well as
+ * non-whitespace ISO controls
+ * (U+0000..U+0008, U+000E..U+001B, U+007F..U+0084, U+0086..U+009F).
+ *
+ * Same as java.lang.Character.isIdentifierIgnorable()
+ * except that Java also returns TRUE for U+0085 Next Line
+ * (it omits U+0085 from whitespace ISO controls).
+ *
+ * Note that Unicode just recommends to ignore Cf (format controls).
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point is ignorable in identifiers according to Java
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_isIDStart
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isIDIgnorable(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible as the
+ * first character in a Java identifier.
+ * In addition to u_isIDStart(c), true for characters with
+ * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).
+ *
+ * Same as java.lang.Character.isJavaIdentifierStart().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may start a Java identifier
+ *
+ * @see     u_isJavaIDPart
+ * @see     u_isalpha
+ * @see     u_isIDStart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isJavaIDStart(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible
+ * in a Java identifier.
+ * In addition to u_isIDPart(c), true for characters with
+ * general category "Sc" (currency symbols).
+ *
+ * Same as java.lang.Character.isJavaIdentifierPart().
+ *
+ * @param c the code point to be tested
+ * @return TRUE if the code point may occur in a Java identifier
+ *
+ * @see     u_isIDIgnorable
+ * @see     u_isJavaIDStart
+ * @see     u_isalpha
+ * @see     u_isdigit
+ * @see     u_isIDPart
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+u_isJavaIDPart(UChar32 c);
+
+/**
+ * The given character is mapped to its lowercase equivalent according to
+ * UnicodeData.txt; if the character has no lowercase equivalent, the character
+ * itself is returned.
+ *
+ * Same as java.lang.Character.toLowerCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Lowercase_Mapping of the code point, if any;
+ *         otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_tolower(UChar32 c);
+
+/**
+ * The given character is mapped to its uppercase equivalent according to UnicodeData.txt;
+ * if the character has no uppercase equivalent, the character itself is
+ * returned.
+ *
+ * Same as java.lang.Character.toUpperCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Uppercase_Mapping of the code point, if any;
+ *         otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_toupper(UChar32 c);
+
+/**
+ * The given character is mapped to its titlecase equivalent
+ * according to UnicodeData.txt;
+ * if none is defined, the character itself is returned.
+ *
+ * Same as java.lang.Character.toTitleCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Titlecase_Mapping of the code point, if any;
+ *         otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_totitle(UChar32 c);
+
+/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
+#define U_FOLD_CASE_DEFAULT 0
+
+/**
+ * Option value for case folding:
+ *
+ * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
+ * and dotless i appropriately for Turkic languages (tr, az).
+ *
+ * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
+ * are to be included for default mappings and
+ * excluded for the Turkic-specific mappings.
+ *
+ * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
+ * are to be excluded for default mappings and
+ * included for the Turkic-specific mappings.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
+
+/**
+ * The given character is mapped to its case folding equivalent according to
+ * UnicodeData.txt and CaseFolding.txt;
+ * if the character has no case folding equivalent, the character
+ * itself is returned.
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * http://icu-project.org/userguide/posix.html#case_mappings
+ *
+ * @param c the code point to be mapped
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return the Simple_Case_Folding of the code point, if any;
+ *         otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_foldCase(UChar32 c, uint32_t options);
+
+/**
+ * Returns the decimal digit value of the code point in the
+ * specified radix.
+ *
+ * If the radix is not in the range <code>2<=radix<=36</code> or if the
+ * value of <code>c</code> is not a valid digit in the specified
+ * radix, <code>-1</code> is returned. A character is a valid digit
+ * if at least one of the following is true:
+ * <ul>
+ * <li>The character has a decimal digit value.
+ *     Such characters have the general category "Nd" (decimal digit numbers)
+ *     and a Numeric_Type of Decimal.
+ *     In this case the value is the character's decimal digit value.</li>
+ * <li>The character is one of the uppercase Latin letters
+ *     <code>'A'</code> through <code>'Z'</code>.
+ *     In this case the value is <code>c-'A'+10</code>.</li>
+ * <li>The character is one of the lowercase Latin letters
+ *     <code>'a'</code> through <code>'z'</code>.
+ *     In this case the value is <code>ch-'a'+10</code>.</li>
+ * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A)
+ *     as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A)
+ *     are recognized.</li>
+ * </ul>
+ *
+ * Same as java.lang.Character.digit().
+ *
+ * @param   ch      the code point to be tested.
+ * @param   radix   the radix.
+ * @return  the numeric value represented by the character in the
+ *          specified radix,
+ *          or -1 if there is no value or if the value exceeds the radix.
+ *
+ * @see     UCHAR_NUMERIC_TYPE
+ * @see     u_forDigit
+ * @see     u_charDigitValue
+ * @see     u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_digit(UChar32 ch, int8_t radix);
+
+/**
+ * Determines the character representation for a specific digit in
+ * the specified radix. If the value of <code>radix</code> is not a
+ * valid radix, or the value of <code>digit</code> is not a valid
+ * digit in the specified radix, the null character
+ * (<code>U+0000</code>) is returned.
+ * <p>
+ * The <code>radix</code> argument is valid if it is greater than or
+ * equal to 2 and less than or equal to 36.
+ * The <code>digit</code> argument is valid if
+ * <code>0 <= digit < radix</code>.
+ * <p>
+ * If the digit is less than 10, then
+ * <code>'0' + digit</code> is returned. Otherwise, the value
+ * <code>'a' + digit - 10</code> is returned.
+ *
+ * Same as java.lang.Character.forDigit().
+ *
+ * @param   digit   the number to convert to a character.
+ * @param   radix   the radix.
+ * @return  the <code>char</code> representation of the specified digit
+ *          in the specified radix.
+ *
+ * @see     u_digit
+ * @see     u_charDigitValue
+ * @see     u_isdigit
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_forDigit(int32_t digit, int8_t radix);
+
+/**
+ * Get the "age" of the code point.
+ * The "age" is the Unicode version when the code point was first
+ * designated (as a non-character or for Private Use)
+ * or assigned a character.
+ * This can be useful to avoid emitting code points to receiving
+ * processes that do not accept newer characters.
+ * The data is from the UCD file DerivedAge.txt.
+ *
+ * @param c The code point.
+ * @param versionArray The Unicode version number array, to be filled in.
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+u_charAge(UChar32 c, UVersionInfo versionArray);
+
+/**
+ * Gets the Unicode version information.
+ * The version array is filled in with the version information
+ * for the Unicode standard that is currently used by ICU.
+ * For example, Unicode version 3.1.1 is represented as an array with
+ * the values { 3, 1, 1, 0 }.
+ *
+ * @param versionArray an output array that will be filled in with
+ *                     the Unicode version number
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_getUnicodeVersion(UVersionInfo versionArray);
+
+/**
+ * Get the FC_NFKC_Closure property string for a character.
+ * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
+ * or for "FNC": http://www.unicode.org/reports/tr15/
+ *
+ * @param c The character (code point) for which to get the FC_NFKC_Closure string.
+ *             It must be <code>0<=c<=0x10ffff</code>.
+ * @param dest Destination address for copying the string.
+ *             The string will be zero-terminated if possible.
+ *             If there is no FC_NFKC_Closure string,
+ *             then the buffer will be set to the empty string.
+ * @param destCapacity <code>==sizeof(dest)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable.
+ * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
+ *         If the destCapacity is less than or equal to the length, then the buffer
+ *         contains the truncated name and the returned length indicates the full
+ *         length of the name.
+ *         The length does not include the zero-termination.
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
+
+U_CDECL_END
+
+#endif /*_UCHAR*/
+/*eof*/

Deleted: MacRuby/trunk/icu-1060/unicode/uchriter.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uchriter.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uchriter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,381 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1998-2005, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*/
-
-#ifndef UCHRITER_H
-#define UCHRITER_H
-
-#include "unicode/utypes.h"
-#include "unicode/chariter.h"
-
-/**
- * \file 
- * \brief C++ API: UChar Character Iterator
- */
- 
-U_NAMESPACE_BEGIN
-
-/**
- * A concrete subclass of CharacterIterator that iterates over the
- * characters (code units or code points) in a UChar array.
- * It's possible not only to create an
- * iterator that iterates over an entire UChar array, but also to
- * create one that iterates over only a subrange of a UChar array
- * (iterators over different subranges of the same UChar array don't
- * compare equal).
- * @see CharacterIterator
- * @see ForwardCharacterIterator
- * @stable ICU 2.0
- */
-class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
-public:
-  /**
-   * Create an iterator over the UChar array referred to by "textPtr".
-   * The iteration range is 0 to <code>length-1</code>.
-   * text is only aliased, not adopted (the
-   * destructor will not delete it).
-   * @param textPtr The UChar array to be iterated over
-   * @param length The length of the UChar array
-   * @stable ICU 2.0
-   */
-  UCharCharacterIterator(const UChar* textPtr, int32_t length);
-
-  /**
-   * Create an iterator over the UChar array referred to by "textPtr".
-   * The iteration range is 0 to <code>length-1</code>.
-   * text is only aliased, not adopted (the
-   * destructor will not delete it).
-   * The starting
-   * position is specified by "position". If "position" is outside the valid
-   * iteration range, the behavior of this object is undefined.
-   * @param textPtr The UChar array to be iteratd over
-   * @param length The length of the UChar array
-   * @param position The starting position of the iteration
-   * @stable ICU 2.0
-   */
-  UCharCharacterIterator(const UChar* textPtr, int32_t length,
-                         int32_t position);
-
-  /**
-   * Create an iterator over the UChar array referred to by "textPtr".
-   * The iteration range is 0 to <code>end-1</code>.
-   * text is only aliased, not adopted (the
-   * destructor will not delete it).
-   * The starting
-   * position is specified by "position". If begin and end do not
-   * form a valid iteration range or "position" is outside the valid
-   * iteration range, the behavior of this object is undefined.
-   * @param textPtr The UChar array to be iterated over
-   * @param length The length of the UChar array
-   * @param textBegin  The begin position of the iteration range
-   * @param textEnd    The end position of the iteration range
-   * @param position    The starting position of the iteration
-   * @stable ICU 2.0
-   */
-  UCharCharacterIterator(const UChar* textPtr, int32_t length,
-                         int32_t textBegin,
-                         int32_t textEnd,
-                         int32_t position);
-
-  /**
-   * Copy constructor.  The new iterator iterates over the same range
-   * of the same string as "that", and its initial position is the
-   * same as "that"'s current position.
-   * @param that The UCharCharacterIterator to be copied
-   * @stable ICU 2.0
-   */
-  UCharCharacterIterator(const UCharCharacterIterator&  that);
-
-  /**
-   * Destructor.
-   * @stable ICU 2.0
-   */
-  virtual ~UCharCharacterIterator();
-
-  /**
-   * Assignment operator.  *this is altered to iterate over the sane
-   * range of the same string as "that", and refers to the same
-   * character within that string as "that" does.
-   * @param that The object to be copied
-   * @return the newly created object
-   * @stable ICU 2.0
-   */
-  UCharCharacterIterator&
-  operator=(const UCharCharacterIterator&    that);
-
-  /**
-   * Returns true if the iterators iterate over the same range of the
-   * same string and are pointing at the same character.
-   * @param that The ForwardCharacterIterator used to be compared for equality
-   * @return true if the iterators iterate over the same range of the
-   * same string and are pointing at the same character.
-   * @stable ICU 2.0
-   */
-  virtual UBool          operator==(const ForwardCharacterIterator& that) const;
-
-  /**
-   * Generates a hash code for this iterator.
-   * @return the hash code.
-   * @stable ICU 2.0
-   */
-  virtual int32_t         hashCode(void) const;
-
-  /**
-   * Returns a new UCharCharacterIterator referring to the same
-   * character in the same range of the same string as this one.  The
-   * caller must delete the new iterator.
-   * @return the CharacterIterator newly created
-   * @stable ICU 2.0
-   */
-  virtual CharacterIterator* clone(void) const;
-
-  /**
-   * Sets the iterator to refer to the first code unit in its
-   * iteration range, and returns that code unit.
-   * This can be used to begin an iteration with next().
-   * @return the first code unit in its iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UChar         first(void);
-
-  /**
-   * Sets the iterator to refer to the first code unit in its
-   * iteration range, returns that code unit, and moves the position
-   * to the second code unit. This is an alternative to setToStart()
-   * for forward iteration with nextPostInc().
-   * @return the first code unit in its iteration range
-   * @stable ICU 2.0
-   */
-  virtual UChar         firstPostInc(void);
-
-  /**
-   * Sets the iterator to refer to the first code point in its
-   * iteration range, and returns that code unit,
-   * This can be used to begin an iteration with next32().
-   * Note that an iteration with next32PostInc(), beginning with,
-   * e.g., setToStart() or firstPostInc(), is more efficient.
-   * @return the first code point in its iteration range
-   * @stable ICU 2.0
-   */
-  virtual UChar32       first32(void);
-
-  /**
-   * Sets the iterator to refer to the first code point in its
-   * iteration range, returns that code point, and moves the position
-   * to the second code point. This is an alternative to setToStart()
-   * for forward iteration with next32PostInc().
-   * @return the first code point in its iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       first32PostInc(void);
-
-  /**
-   * Sets the iterator to refer to the last code unit in its
-   * iteration range, and returns that code unit.
-   * This can be used to begin an iteration with previous().
-   * @return the last code unit in its iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UChar         last(void);
-
-  /**
-   * Sets the iterator to refer to the last code point in its
-   * iteration range, and returns that code unit.
-   * This can be used to begin an iteration with previous32().
-   * @return the last code point in its iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       last32(void);
-
-  /**
-   * Sets the iterator to refer to the "position"-th code unit
-   * in the text-storage object the iterator refers to, and
-   * returns that code unit.
-   * @param position the position within the text-storage object
-   * @return the code unit
-   * @stable ICU 2.0
-   */
-  virtual UChar         setIndex(int32_t position);
-
-  /**
-   * Sets the iterator to refer to the beginning of the code point
-   * that contains the "position"-th code unit
-   * in the text-storage object the iterator refers to, and
-   * returns that code point.
-   * The current position is adjusted to the beginning of the code point
-   * (its first code unit).
-   * @param position the position within the text-storage object
-   * @return the code unit
-   * @stable ICU 2.0
-   */
-  virtual UChar32       setIndex32(int32_t position);
-
-  /**
-   * Returns the code unit the iterator currently refers to.
-   * @return the code unit the iterator currently refers to.
-   * @stable ICU 2.0
-   */
-  virtual UChar         current(void) const;
-
-  /**
-   * Returns the code point the iterator currently refers to.
-   * @return the code point the iterator currently refers to.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       current32(void) const;
-
-  /**
-   * Advances to the next code unit in the iteration range (toward
-   * endIndex()), and returns that code unit.  If there are no more
-   * code units to return, returns DONE.
-   * @return the next code unit in the iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UChar         next(void);
-
-  /**
-   * Gets the current code unit for returning and advances to the next code unit
-   * in the iteration range
-   * (toward endIndex()).  If there are
-   * no more code units to return, returns DONE.
-   * @return the current code unit.
-   * @stable ICU 2.0
-   */
-  virtual UChar         nextPostInc(void);
-
-  /**
-   * Advances to the next code point in the iteration range (toward
-   * endIndex()), and returns that code point.  If there are no more
-   * code points to return, returns DONE.
-   * Note that iteration with "pre-increment" semantics is less
-   * efficient than iteration with "post-increment" semantics
-   * that is provided by next32PostInc().
-   * @return the next code point in the iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       next32(void);
-
-  /**
-   * Gets the current code point for returning and advances to the next code point
-   * in the iteration range
-   * (toward endIndex()).  If there are
-   * no more code points to return, returns DONE.
-   * @return the current point.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       next32PostInc(void);
-
-  /**
-   * Returns FALSE if there are no more code units or code points
-   * at or after the current position in the iteration range.
-   * This is used with nextPostInc() or next32PostInc() in forward
-   * iteration.
-   * @return FALSE if there are no more code units or code points
-   * at or after the current position in the iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UBool        hasNext();
-
-  /**
-   * Advances to the previous code unit in the iteration range (toward
-   * startIndex()), and returns that code unit.  If there are no more
-   * code units to return, returns DONE.
-   * @return the previous code unit in the iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UChar         previous(void);
-
-  /**
-   * Advances to the previous code point in the iteration range (toward
-   * startIndex()), and returns that code point.  If there are no more
-   * code points to return, returns DONE.
-   * @return the previous code point in the iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       previous32(void);
-
-  /**
-   * Returns FALSE if there are no more code units or code points
-   * before the current position in the iteration range.
-   * This is used with previous() or previous32() in backward
-   * iteration.
-   * @return FALSE if there are no more code units or code points
-   * before the current position in the iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UBool        hasPrevious();
-
-  /**
-   * Moves the current position relative to the start or end of the
-   * iteration range, or relative to the current position itself.
-   * The movement is expressed in numbers of code units forward
-   * or backward by specifying a positive or negative delta.
-   * @param delta the position relative to origin. A positive delta means forward;
-   * a negative delta means backward.
-   * @param origin Origin enumeration {kStart, kCurrent, kEnd}
-   * @return the new position
-   * @stable ICU 2.0
-   */
-  virtual int32_t      move(int32_t delta, EOrigin origin);
-
-  /**
-   * Moves the current position relative to the start or end of the
-   * iteration range, or relative to the current position itself.
-   * The movement is expressed in numbers of code points forward
-   * or backward by specifying a positive or negative delta.
-   * @param delta the position relative to origin. A positive delta means forward;
-   * a negative delta means backward.
-   * @param origin Origin enumeration {kStart, kCurrent, kEnd}
-   * @return the new position
-   * @stable ICU 2.0
-   */
-  virtual int32_t      move32(int32_t delta, EOrigin origin);
-
-  /**
-   * Sets the iterator to iterate over a new range of text
-   * @stable ICU 2.0
-   */
-  void setText(const UChar* newText, int32_t newTextLength);
-
-  /**
-   * Copies the UChar array under iteration into the UnicodeString
-   * referred to by "result".  Even if this iterator iterates across
-   * only a part of this string, the whole string is copied.
-   * @param result Receives a copy of the text under iteration.
-   * @stable ICU 2.0
-   */
-  virtual void            getText(UnicodeString& result);
-
-  /**
-   * Return a class ID for this class (not really public)
-   * @return a class ID for this class
-   * @stable ICU 2.0
-   */
-  static UClassID         U_EXPORT2 getStaticClassID(void);
-
-  /**
-   * Return a class ID for this object (not really public)
-   * @return a class ID for this object.
-   * @stable ICU 2.0
-   */
-  virtual UClassID        getDynamicClassID(void) const;
-
-protected:
-  /**
-   * Protected constructor
-   * @stable ICU 2.0
-   */
-  UCharCharacterIterator();
-  /**
-   * Protected member text
-   * @stable ICU 2.0
-   */
-  const UChar*            text;
-
-};
-
-U_NAMESPACE_END
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/uchriter.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uchriter.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uchriter.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uchriter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,381 @@
+/*
+**********************************************************************
+*   Copyright (C) 1998-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef UCHRITER_H
+#define UCHRITER_H
+
+#include "unicode/utypes.h"
+#include "unicode/chariter.h"
+
+/**
+ * \file 
+ * \brief C++ API: UChar Character Iterator
+ */
+ 
+U_NAMESPACE_BEGIN
+
+/**
+ * A concrete subclass of CharacterIterator that iterates over the
+ * characters (code units or code points) in a UChar array.
+ * It's possible not only to create an
+ * iterator that iterates over an entire UChar array, but also to
+ * create one that iterates over only a subrange of a UChar array
+ * (iterators over different subranges of the same UChar array don't
+ * compare equal).
+ * @see CharacterIterator
+ * @see ForwardCharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
+public:
+  /**
+   * Create an iterator over the UChar array referred to by "textPtr".
+   * The iteration range is 0 to <code>length-1</code>.
+   * text is only aliased, not adopted (the
+   * destructor will not delete it).
+   * @param textPtr The UChar array to be iterated over
+   * @param length The length of the UChar array
+   * @stable ICU 2.0
+   */
+  UCharCharacterIterator(const UChar* textPtr, int32_t length);
+
+  /**
+   * Create an iterator over the UChar array referred to by "textPtr".
+   * The iteration range is 0 to <code>length-1</code>.
+   * text is only aliased, not adopted (the
+   * destructor will not delete it).
+   * The starting
+   * position is specified by "position". If "position" is outside the valid
+   * iteration range, the behavior of this object is undefined.
+   * @param textPtr The UChar array to be iteratd over
+   * @param length The length of the UChar array
+   * @param position The starting position of the iteration
+   * @stable ICU 2.0
+   */
+  UCharCharacterIterator(const UChar* textPtr, int32_t length,
+                         int32_t position);
+
+  /**
+   * Create an iterator over the UChar array referred to by "textPtr".
+   * The iteration range is 0 to <code>end-1</code>.
+   * text is only aliased, not adopted (the
+   * destructor will not delete it).
+   * The starting
+   * position is specified by "position". If begin and end do not
+   * form a valid iteration range or "position" is outside the valid
+   * iteration range, the behavior of this object is undefined.
+   * @param textPtr The UChar array to be iterated over
+   * @param length The length of the UChar array
+   * @param textBegin  The begin position of the iteration range
+   * @param textEnd    The end position of the iteration range
+   * @param position    The starting position of the iteration
+   * @stable ICU 2.0
+   */
+  UCharCharacterIterator(const UChar* textPtr, int32_t length,
+                         int32_t textBegin,
+                         int32_t textEnd,
+                         int32_t position);
+
+  /**
+   * Copy constructor.  The new iterator iterates over the same range
+   * of the same string as "that", and its initial position is the
+   * same as "that"'s current position.
+   * @param that The UCharCharacterIterator to be copied
+   * @stable ICU 2.0
+   */
+  UCharCharacterIterator(const UCharCharacterIterator&  that);
+
+  /**
+   * Destructor.
+   * @stable ICU 2.0
+   */
+  virtual ~UCharCharacterIterator();
+
+  /**
+   * Assignment operator.  *this is altered to iterate over the sane
+   * range of the same string as "that", and refers to the same
+   * character within that string as "that" does.
+   * @param that The object to be copied
+   * @return the newly created object
+   * @stable ICU 2.0
+   */
+  UCharCharacterIterator&
+  operator=(const UCharCharacterIterator&    that);
+
+  /**
+   * Returns true if the iterators iterate over the same range of the
+   * same string and are pointing at the same character.
+   * @param that The ForwardCharacterIterator used to be compared for equality
+   * @return true if the iterators iterate over the same range of the
+   * same string and are pointing at the same character.
+   * @stable ICU 2.0
+   */
+  virtual UBool          operator==(const ForwardCharacterIterator& that) const;
+
+  /**
+   * Generates a hash code for this iterator.
+   * @return the hash code.
+   * @stable ICU 2.0
+   */
+  virtual int32_t         hashCode(void) const;
+
+  /**
+   * Returns a new UCharCharacterIterator referring to the same
+   * character in the same range of the same string as this one.  The
+   * caller must delete the new iterator.
+   * @return the CharacterIterator newly created
+   * @stable ICU 2.0
+   */
+  virtual CharacterIterator* clone(void) const;
+
+  /**
+   * Sets the iterator to refer to the first code unit in its
+   * iteration range, and returns that code unit.
+   * This can be used to begin an iteration with next().
+   * @return the first code unit in its iteration range.
+   * @stable ICU 2.0
+   */
+  virtual UChar         first(void);
+
+  /**
+   * Sets the iterator to refer to the first code unit in its
+   * iteration range, returns that code unit, and moves the position
+   * to the second code unit. This is an alternative to setToStart()
+   * for forward iteration with nextPostInc().
+   * @return the first code unit in its iteration range
+   * @stable ICU 2.0
+   */
+  virtual UChar         firstPostInc(void);
+
+  /**
+   * Sets the iterator to refer to the first code point in its
+   * iteration range, and returns that code unit,
+   * This can be used to begin an iteration with next32().
+   * Note that an iteration with next32PostInc(), beginning with,
+   * e.g., setToStart() or firstPostInc(), is more efficient.
+   * @return the first code point in its iteration range
+   * @stable ICU 2.0
+   */
+  virtual UChar32       first32(void);
+
+  /**
+   * Sets the iterator to refer to the first code point in its
+   * iteration range, returns that code point, and moves the position
+   * to the second code point. This is an alternative to setToStart()
+   * for forward iteration with next32PostInc().
+   * @return the first code point in its iteration range.
+   * @stable ICU 2.0
+   */
+  virtual UChar32       first32PostInc(void);
+
+  /**
+   * Sets the iterator to refer to the last code unit in its
+   * iteration range, and returns that code unit.
+   * This can be used to begin an iteration with previous().
+   * @return the last code unit in its iteration range.
+   * @stable ICU 2.0
+   */
+  virtual UChar         last(void);
+
+  /**
+   * Sets the iterator to refer to the last code point in its
+   * iteration range, and returns that code unit.
+   * This can be used to begin an iteration with previous32().
+   * @return the last code point in its iteration range.
+   * @stable ICU 2.0
+   */
+  virtual UChar32       last32(void);
+
+  /**
+   * Sets the iterator to refer to the "position"-th code unit
+   * in the text-storage object the iterator refers to, and
+   * returns that code unit.
+   * @param position the position within the text-storage object
+   * @return the code unit
+   * @stable ICU 2.0
+   */
+  virtual UChar         setIndex(int32_t position);
+
+  /**
+   * Sets the iterator to refer to the beginning of the code point
+   * that contains the "position"-th code unit
+   * in the text-storage object the iterator refers to, and
+   * returns that code point.
+   * The current position is adjusted to the beginning of the code point
+   * (its first code unit).
+   * @param position the position within the text-storage object
+   * @return the code unit
+   * @stable ICU 2.0
+   */
+  virtual UChar32       setIndex32(int32_t position);
+
+  /**
+   * Returns the code unit the iterator currently refers to.
+   * @return the code unit the iterator currently refers to.
+   * @stable ICU 2.0
+   */
+  virtual UChar         current(void) const;
+
+  /**
+   * Returns the code point the iterator currently refers to.
+   * @return the code point the iterator currently refers to.
+   * @stable ICU 2.0
+   */
+  virtual UChar32       current32(void) const;
+
+  /**
+   * Advances to the next code unit in the iteration range (toward
+   * endIndex()), and returns that code unit.  If there are no more
+   * code units to return, returns DONE.
+   * @return the next code unit in the iteration range.
+   * @stable ICU 2.0
+   */
+  virtual UChar         next(void);
+
+  /**
+   * Gets the current code unit for returning and advances to the next code unit
+   * in the iteration range
+   * (toward endIndex()).  If there are
+   * no more code units to return, returns DONE.
+   * @return the current code unit.
+   * @stable ICU 2.0
+   */
+  virtual UChar         nextPostInc(void);
+
+  /**
+   * Advances to the next code point in the iteration range (toward
+   * endIndex()), and returns that code point.  If there are no more
+   * code points to return, returns DONE.
+   * Note that iteration with "pre-increment" semantics is less
+   * efficient than iteration with "post-increment" semantics
+   * that is provided by next32PostInc().
+   * @return the next code point in the iteration range.
+   * @stable ICU 2.0
+   */
+  virtual UChar32       next32(void);
+
+  /**
+   * Gets the current code point for returning and advances to the next code point
+   * in the iteration range
+   * (toward endIndex()).  If there are
+   * no more code points to return, returns DONE.
+   * @return the current point.
+   * @stable ICU 2.0
+   */
+  virtual UChar32       next32PostInc(void);
+
+  /**
+   * Returns FALSE if there are no more code units or code points
+   * at or after the current position in the iteration range.
+   * This is used with nextPostInc() or next32PostInc() in forward
+   * iteration.
+   * @return FALSE if there are no more code units or code points
+   * at or after the current position in the iteration range.
+   * @stable ICU 2.0
+   */
+  virtual UBool        hasNext();
+
+  /**
+   * Advances to the previous code unit in the iteration range (toward
+   * startIndex()), and returns that code unit.  If there are no more
+   * code units to return, returns DONE.
+   * @return the previous code unit in the iteration range.
+   * @stable ICU 2.0
+   */
+  virtual UChar         previous(void);
+
+  /**
+   * Advances to the previous code point in the iteration range (toward
+   * startIndex()), and returns that code point.  If there are no more
+   * code points to return, returns DONE.
+   * @return the previous code point in the iteration range.
+   * @stable ICU 2.0
+   */
+  virtual UChar32       previous32(void);
+
+  /**
+   * Returns FALSE if there are no more code units or code points
+   * before the current position in the iteration range.
+   * This is used with previous() or previous32() in backward
+   * iteration.
+   * @return FALSE if there are no more code units or code points
+   * before the current position in the iteration range.
+   * @stable ICU 2.0
+   */
+  virtual UBool        hasPrevious();
+
+  /**
+   * Moves the current position relative to the start or end of the
+   * iteration range, or relative to the current position itself.
+   * The movement is expressed in numbers of code units forward
+   * or backward by specifying a positive or negative delta.
+   * @param delta the position relative to origin. A positive delta means forward;
+   * a negative delta means backward.
+   * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+   * @return the new position
+   * @stable ICU 2.0
+   */
+  virtual int32_t      move(int32_t delta, EOrigin origin);
+
+  /**
+   * Moves the current position relative to the start or end of the
+   * iteration range, or relative to the current position itself.
+   * The movement is expressed in numbers of code points forward
+   * or backward by specifying a positive or negative delta.
+   * @param delta the position relative to origin. A positive delta means forward;
+   * a negative delta means backward.
+   * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+   * @return the new position
+   * @stable ICU 2.0
+   */
+  virtual int32_t      move32(int32_t delta, EOrigin origin);
+
+  /**
+   * Sets the iterator to iterate over a new range of text
+   * @stable ICU 2.0
+   */
+  void setText(const UChar* newText, int32_t newTextLength);
+
+  /**
+   * Copies the UChar array under iteration into the UnicodeString
+   * referred to by "result".  Even if this iterator iterates across
+   * only a part of this string, the whole string is copied.
+   * @param result Receives a copy of the text under iteration.
+   * @stable ICU 2.0
+   */
+  virtual void            getText(UnicodeString& result);
+
+  /**
+   * Return a class ID for this class (not really public)
+   * @return a class ID for this class
+   * @stable ICU 2.0
+   */
+  static UClassID         U_EXPORT2 getStaticClassID(void);
+
+  /**
+   * Return a class ID for this object (not really public)
+   * @return a class ID for this object.
+   * @stable ICU 2.0
+   */
+  virtual UClassID        getDynamicClassID(void) const;
+
+protected:
+  /**
+   * Protected constructor
+   * @stable ICU 2.0
+   */
+  UCharCharacterIterator();
+  /**
+   * Protected member text
+   * @stable ICU 2.0
+   */
+  const UChar*            text;
+
+};
+
+U_NAMESPACE_END
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/uclean.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uclean.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uclean.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,267 +0,0 @@
-/*
-******************************************************************************
-*                                                                            *
-* Copyright (C) 2001-2005, International Business Machines                   *
-*                Corporation and others. All Rights Reserved.                *
-*                                                                            *
-******************************************************************************
-*   file name:  uclean.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2001July05
-*   created by: George Rhoten
-*/
-
-#ifndef __UCLEAN_H__
-#define __UCLEAN_H__
-
-#include "unicode/utypes.h"
-/**
- * \file
- * \brief C API: Initialize and clean up ICU
- */
- 
-/**
- *  Initialize ICU. The description further below applies to ICU 2.6 to ICU 3.4.
- *  Starting with ICU 3.4, u_init() needs not be called any more for
- *  ensuring thread safety, but it can give an indication for whether ICU
- *  can load its data. In ICU 3.4, it will try to load the converter alias table
- *  (cnvalias.icu) and give an error code if that fails.
- *  This may change in the future.
- *  <p>
- *  For ensuring the availability of necessary data, an application should
- *  open the service objects (converters, collators, etc.) that it will use
- *  and check for error codes there.
- *  <p>
- *  Documentation for ICU 2.6 to ICU 3.4:
- *  <p>
- *  This function loads and initializes data items
- *  that are required internally by various ICU functions.  Use of this explicit
- *  initialization is required in multi-threaded applications; in 
- *  single threaded apps, use is optional, but incurs little additional
- *  cost, and is thus recommended.
- *  <p>
- *  In multi-threaded applications, u_init() should be called  in the
- *  main thread before starting additional threads, or, alternatively
- *  it can be called in each individual thread once, before other ICU
- *  functions are called in that thread.  In this second scenario, the
- *  application must guarantee that the first call to u_init() happen
- *  without contention, in a single thread only.
- *  <p>
- *  If <code>u_setMemoryFunctions()</code> or 
- *  <code>u_setMutexFunctions</code> are needed (uncommon), they must be
- *  called _before_ <code>u_init()</code>.
- *  <p>
- *  Extra, repeated, or otherwise unneeded calls to u_init() do no harm,
- *  other than taking a small amount of time.
- *
- * @param status An ICU UErrorCode parameter. It must not be <code>NULL</code>.
- *    An Error will be returned if some required part of ICU data can not
- *    be loaded or initialized.
- *    The function returns immediately if the input error code indicates a
- *    failure, as usual.
- *
- * @stable ICU 2.6
- */  
-U_STABLE void U_EXPORT2 
-u_init(UErrorCode *status);
-
-/**
- * Clean up the system resources, such as allocated memory or open files,
- * used in all ICU libraries. This will free/delete all memory owned by the
- * ICU libraries, and return them to their original load state. All open ICU
- * items (collators, resource bundles, converters, etc.) must be closed before
- * calling this function, otherwise ICU may not free its allocated memory
- * (e.g. close your converters and resource bundles before calling this
- * function). Generally, this function should be called once just before
- * an application exits. For applications that dynamically load and unload
- * the ICU libraries (relatively uncommon), u_cleanup() should be called
- * just before the library unload.
- * <p>
- * u_cleanup() also clears any ICU heap functions, mutex functions or
- * trace functions that may have been set for the process.  
- * This has the effect of restoring ICU to its initial condition, before
- * any of these override functions were installed.  Refer to
- * u_setMemoryFunctions(), u_setMutexFunctions and 
- * utrace_setFunctions().  If ICU is to be reinitialized after after
- * calling u_cleanup(), these runtime override functions will need to
- * be set up again if they are still required.
- * <p>
- * u_cleanup() is not thread safe.  All other threads should stop using ICU
- * before calling this function.
- * <p>
- * Any open ICU items will be left in an undefined state by u_cleanup(),
- * and any subsequent attempt to use such an item will give unpredictable
- * results.
- * <p>
- * After calling u_cleanup(), an application may continue to use ICU by
- * calling u_init().  An application must invoke u_init() first from one single
- * thread before allowing other threads call u_init().  All threads existing
- * at the time of the first thread's call to u_init() must also call
- * u_init() themselves before continuing with other ICU operations.  
- * <p>
- * The use of u_cleanup() just before an application terminates is optional,
- * but it should be called only once for performance reasons. The primary
- * benefit is to eliminate reports of memory or resource leaks originating
- * in ICU code from the results generated by heap analysis tools.
- * <p>
- * <strong>Use this function with great care!</strong>
- * </p>
- *
- * @stable ICU 2.0
- * @system
- */
-U_STABLE void U_EXPORT2 
-u_cleanup(void);
-
-
-
-
-/**
-  * An opaque pointer type that represents an ICU mutex.
-  * For user-implemented mutexes, the value will typically point to a
-  *  struct or object that implements the mutex.
-  * @stable ICU 2.8
-  * @system
-  */
-typedef void *UMTX;
-
-/**
-  *  Function Pointer type for a user supplied mutex initialization function.
-  *  The user-supplied function will be called by ICU whenever ICU needs to create a
-  *  new mutex.  The function implementation should create a mutex, and store a pointer
-  *  to something that uniquely identifies the mutex into the UMTX that is supplied
-  *  as a paramter.
-  *  @param context user supplied value, obtained from from u_setMutexFunctions().
-  *  @param mutex   Receives a pointer that identifies the new mutex.
-  *                 The mutex init function must set the UMTX to a non-null value.   
-  *                 Subsequent calls by ICU to lock, unlock, or destroy a mutex will 
-  *                 identify the mutex by the UMTX value.
-  *  @param status  Error status.  Report errors back to ICU by setting this variable
-  *                 with an error code.
-  *  @stable ICU 2.8
-  *  @system
-  */
-typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX  *mutex, UErrorCode* status);
-
-
-/**
-  *  Function Pointer type for a user supplied mutex functions.
-  *  One of the  user-supplied functions with this signature will be called by ICU
-  *  whenever ICU needs to lock, unlock, or destroy a mutex.
-  *  @param context user supplied value, obtained from from u_setMutexFunctions().
-  *  @param mutex   specify the mutex on which to operate.
-  *  @stable ICU 2.8
-  *  @system
-  */
-typedef void U_CALLCONV UMtxFn   (const void *context, UMTX  *mutex);
-
-
-/**
-  *  Set the functions that ICU will use for mutex operations
-  *  Use of this function is optional; by default (without this function), ICU will
-  *  directly access system functions for mutex operations
-  *  This function can only be used when ICU is in an initial, unused state, before
-  *  u_init() has been called.
-  *  This function may be used even when ICU has been built without multi-threaded
-  *  support  (see ICU_USE_THREADS pre-processor variable, umutex.h)
-  *  @param context This pointer value will be saved, and then (later) passed as
-  *                 a parameter to the user-supplied mutex functions each time they
-  *                 are called. 
-  *  @param init    Pointer to a mutex initialization function.  Must be non-null.
-  *  @param destroy Pointer to the mutex destroy function.  Must be non-null.
-  *  @param lock    pointer to the mutex lock function.  Must be non-null.
-  *  @param unlock  Pointer to the mutex unlock function.  Must be non-null.
-  *  @param status  Receives error values.
-  *  @stable ICU 2.8
-  *  @system
-  */  
-U_STABLE void U_EXPORT2 
-u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock,
-                    UErrorCode *status);
-
-
-/**
-  *  Pointer type for a user supplied atomic increment or decrement function.
-  *  @param context user supplied value, obtained from from u_setAtomicIncDecFunctions().
-  *  @param p   Pointer to a 32 bit int to be incremented or decremented
-  *  @return    The value of the variable after the inc or dec operation.
-  *  @stable ICU 2.8
-  *  @system
-  */
-typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p);
-
-/**
- *  Set the functions that ICU will use for atomic increment and decrement of int32_t values.
- *  Use of this function is optional; by default (without this function), ICU will
- *  use its own internal implementation of atomic increment/decrement.
- *  This function can only be used when ICU is in an initial, unused state, before
- *  u_init() has been called.
- *  @param context This pointer value will be saved, and then (later) passed as
- *                 a parameter to the increment and decrement functions each time they
- *                 are called.  This function can only be called 
- *  @param inc     Pointer to a function to do an atomic increment operation.  Must be non-null.
- *  @param dec     Pointer to a function to do an atomic decrement operation.  Must be non-null.
- *  @param status  Receives error values.
- *  @stable ICU 2.8
- *  @system
- */  
-U_STABLE void U_EXPORT2 
-u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec,
-                    UErrorCode *status);
-
-
-
-/**
-  *  Pointer type for a user supplied memory allocation function.
-  *  @param context user supplied value, obtained from from u_setMemoryFunctions().
-  *  @param size    The number of bytes to be allocated
-  *  @return        Pointer to the newly allocated memory, or NULL if the allocation failed.
-  *  @stable ICU 2.8
-  *  @system
-  */
-typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
-/**
-  *  Pointer type for a user supplied memory re-allocation function.
-  *  @param context user supplied value, obtained from from u_setMemoryFunctions().
-  *  @param size    The number of bytes to be allocated
-  *  @return        Pointer to the newly allocated memory, or NULL if the allocation failed.
-  *  @stable ICU 2.8
-  *  @system
-  */
-typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size);
-/**
-  *  Pointer type for a user supplied memory free  function.  Behavior should be
-  *  similar the standard C library free().
-  *  @param context user supplied value, obtained from from u_setMemoryFunctions().
-  *  @param mem     Pointer to the memory block to be resized
-  *  @param size    The new size for the block
-  *  @return        Pointer to the resized memory block, or NULL if the resizing failed.
-  *  @stable ICU 2.8
-  *  @system
-  */
-typedef void  U_CALLCONV UMemFreeFn (const void *context, void *mem);
-
-/**
- *  Set the functions that ICU will use for memory allocation.
- *  Use of this function is optional; by default (without this function), ICU will
- *  use the standard C library malloc() and free() functions.
- *  This function can only be used when ICU is in an initial, unused state, before
- *  u_init() has been called.
- *  @param context This pointer value will be saved, and then (later) passed as
- *                 a parameter to the memory functions each time they
- *                 are called.
- *  @param a       Pointer to a user-supplied malloc function.
- *  @param r       Pointer to a user-supplied realloc function.
- *  @param f       Pointer to a user-supplied free function.
- *  @param status  Receives error values.
- *  @stable ICU 2.8
- *  @system
- */  
-U_STABLE void U_EXPORT2 
-u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, 
-                    UErrorCode *status);
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/uclean.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uclean.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uclean.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uclean.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,267 @@
+/*
+******************************************************************************
+*                                                                            *
+* Copyright (C) 2001-2005, International Business Machines                   *
+*                Corporation and others. All Rights Reserved.                *
+*                                                                            *
+******************************************************************************
+*   file name:  uclean.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001July05
+*   created by: George Rhoten
+*/
+
+#ifndef __UCLEAN_H__
+#define __UCLEAN_H__
+
+#include "unicode/utypes.h"
+/**
+ * \file
+ * \brief C API: Initialize and clean up ICU
+ */
+ 
+/**
+ *  Initialize ICU. The description further below applies to ICU 2.6 to ICU 3.4.
+ *  Starting with ICU 3.4, u_init() needs not be called any more for
+ *  ensuring thread safety, but it can give an indication for whether ICU
+ *  can load its data. In ICU 3.4, it will try to load the converter alias table
+ *  (cnvalias.icu) and give an error code if that fails.
+ *  This may change in the future.
+ *  <p>
+ *  For ensuring the availability of necessary data, an application should
+ *  open the service objects (converters, collators, etc.) that it will use
+ *  and check for error codes there.
+ *  <p>
+ *  Documentation for ICU 2.6 to ICU 3.4:
+ *  <p>
+ *  This function loads and initializes data items
+ *  that are required internally by various ICU functions.  Use of this explicit
+ *  initialization is required in multi-threaded applications; in 
+ *  single threaded apps, use is optional, but incurs little additional
+ *  cost, and is thus recommended.
+ *  <p>
+ *  In multi-threaded applications, u_init() should be called  in the
+ *  main thread before starting additional threads, or, alternatively
+ *  it can be called in each individual thread once, before other ICU
+ *  functions are called in that thread.  In this second scenario, the
+ *  application must guarantee that the first call to u_init() happen
+ *  without contention, in a single thread only.
+ *  <p>
+ *  If <code>u_setMemoryFunctions()</code> or 
+ *  <code>u_setMutexFunctions</code> are needed (uncommon), they must be
+ *  called _before_ <code>u_init()</code>.
+ *  <p>
+ *  Extra, repeated, or otherwise unneeded calls to u_init() do no harm,
+ *  other than taking a small amount of time.
+ *
+ * @param status An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ *    An Error will be returned if some required part of ICU data can not
+ *    be loaded or initialized.
+ *    The function returns immediately if the input error code indicates a
+ *    failure, as usual.
+ *
+ * @stable ICU 2.6
+ */  
+U_STABLE void U_EXPORT2 
+u_init(UErrorCode *status);
+
+/**
+ * Clean up the system resources, such as allocated memory or open files,
+ * used in all ICU libraries. This will free/delete all memory owned by the
+ * ICU libraries, and return them to their original load state. All open ICU
+ * items (collators, resource bundles, converters, etc.) must be closed before
+ * calling this function, otherwise ICU may not free its allocated memory
+ * (e.g. close your converters and resource bundles before calling this
+ * function). Generally, this function should be called once just before
+ * an application exits. For applications that dynamically load and unload
+ * the ICU libraries (relatively uncommon), u_cleanup() should be called
+ * just before the library unload.
+ * <p>
+ * u_cleanup() also clears any ICU heap functions, mutex functions or
+ * trace functions that may have been set for the process.  
+ * This has the effect of restoring ICU to its initial condition, before
+ * any of these override functions were installed.  Refer to
+ * u_setMemoryFunctions(), u_setMutexFunctions and 
+ * utrace_setFunctions().  If ICU is to be reinitialized after after
+ * calling u_cleanup(), these runtime override functions will need to
+ * be set up again if they are still required.
+ * <p>
+ * u_cleanup() is not thread safe.  All other threads should stop using ICU
+ * before calling this function.
+ * <p>
+ * Any open ICU items will be left in an undefined state by u_cleanup(),
+ * and any subsequent attempt to use such an item will give unpredictable
+ * results.
+ * <p>
+ * After calling u_cleanup(), an application may continue to use ICU by
+ * calling u_init().  An application must invoke u_init() first from one single
+ * thread before allowing other threads call u_init().  All threads existing
+ * at the time of the first thread's call to u_init() must also call
+ * u_init() themselves before continuing with other ICU operations.  
+ * <p>
+ * The use of u_cleanup() just before an application terminates is optional,
+ * but it should be called only once for performance reasons. The primary
+ * benefit is to eliminate reports of memory or resource leaks originating
+ * in ICU code from the results generated by heap analysis tools.
+ * <p>
+ * <strong>Use this function with great care!</strong>
+ * </p>
+ *
+ * @stable ICU 2.0
+ * @system
+ */
+U_STABLE void U_EXPORT2 
+u_cleanup(void);
+
+
+
+
+/**
+  * An opaque pointer type that represents an ICU mutex.
+  * For user-implemented mutexes, the value will typically point to a
+  *  struct or object that implements the mutex.
+  * @stable ICU 2.8
+  * @system
+  */
+typedef void *UMTX;
+
+/**
+  *  Function Pointer type for a user supplied mutex initialization function.
+  *  The user-supplied function will be called by ICU whenever ICU needs to create a
+  *  new mutex.  The function implementation should create a mutex, and store a pointer
+  *  to something that uniquely identifies the mutex into the UMTX that is supplied
+  *  as a paramter.
+  *  @param context user supplied value, obtained from from u_setMutexFunctions().
+  *  @param mutex   Receives a pointer that identifies the new mutex.
+  *                 The mutex init function must set the UMTX to a non-null value.   
+  *                 Subsequent calls by ICU to lock, unlock, or destroy a mutex will 
+  *                 identify the mutex by the UMTX value.
+  *  @param status  Error status.  Report errors back to ICU by setting this variable
+  *                 with an error code.
+  *  @stable ICU 2.8
+  *  @system
+  */
+typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX  *mutex, UErrorCode* status);
+
+
+/**
+  *  Function Pointer type for a user supplied mutex functions.
+  *  One of the  user-supplied functions with this signature will be called by ICU
+  *  whenever ICU needs to lock, unlock, or destroy a mutex.
+  *  @param context user supplied value, obtained from from u_setMutexFunctions().
+  *  @param mutex   specify the mutex on which to operate.
+  *  @stable ICU 2.8
+  *  @system
+  */
+typedef void U_CALLCONV UMtxFn   (const void *context, UMTX  *mutex);
+
+
+/**
+  *  Set the functions that ICU will use for mutex operations
+  *  Use of this function is optional; by default (without this function), ICU will
+  *  directly access system functions for mutex operations
+  *  This function can only be used when ICU is in an initial, unused state, before
+  *  u_init() has been called.
+  *  This function may be used even when ICU has been built without multi-threaded
+  *  support  (see ICU_USE_THREADS pre-processor variable, umutex.h)
+  *  @param context This pointer value will be saved, and then (later) passed as
+  *                 a parameter to the user-supplied mutex functions each time they
+  *                 are called. 
+  *  @param init    Pointer to a mutex initialization function.  Must be non-null.
+  *  @param destroy Pointer to the mutex destroy function.  Must be non-null.
+  *  @param lock    pointer to the mutex lock function.  Must be non-null.
+  *  @param unlock  Pointer to the mutex unlock function.  Must be non-null.
+  *  @param status  Receives error values.
+  *  @stable ICU 2.8
+  *  @system
+  */  
+U_STABLE void U_EXPORT2 
+u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock,
+                    UErrorCode *status);
+
+
+/**
+  *  Pointer type for a user supplied atomic increment or decrement function.
+  *  @param context user supplied value, obtained from from u_setAtomicIncDecFunctions().
+  *  @param p   Pointer to a 32 bit int to be incremented or decremented
+  *  @return    The value of the variable after the inc or dec operation.
+  *  @stable ICU 2.8
+  *  @system
+  */
+typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p);
+
+/**
+ *  Set the functions that ICU will use for atomic increment and decrement of int32_t values.
+ *  Use of this function is optional; by default (without this function), ICU will
+ *  use its own internal implementation of atomic increment/decrement.
+ *  This function can only be used when ICU is in an initial, unused state, before
+ *  u_init() has been called.
+ *  @param context This pointer value will be saved, and then (later) passed as
+ *                 a parameter to the increment and decrement functions each time they
+ *                 are called.  This function can only be called 
+ *  @param inc     Pointer to a function to do an atomic increment operation.  Must be non-null.
+ *  @param dec     Pointer to a function to do an atomic decrement operation.  Must be non-null.
+ *  @param status  Receives error values.
+ *  @stable ICU 2.8
+ *  @system
+ */  
+U_STABLE void U_EXPORT2 
+u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec,
+                    UErrorCode *status);
+
+
+
+/**
+  *  Pointer type for a user supplied memory allocation function.
+  *  @param context user supplied value, obtained from from u_setMemoryFunctions().
+  *  @param size    The number of bytes to be allocated
+  *  @return        Pointer to the newly allocated memory, or NULL if the allocation failed.
+  *  @stable ICU 2.8
+  *  @system
+  */
+typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
+/**
+  *  Pointer type for a user supplied memory re-allocation function.
+  *  @param context user supplied value, obtained from from u_setMemoryFunctions().
+  *  @param size    The number of bytes to be allocated
+  *  @return        Pointer to the newly allocated memory, or NULL if the allocation failed.
+  *  @stable ICU 2.8
+  *  @system
+  */
+typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size);
+/**
+  *  Pointer type for a user supplied memory free  function.  Behavior should be
+  *  similar the standard C library free().
+  *  @param context user supplied value, obtained from from u_setMemoryFunctions().
+  *  @param mem     Pointer to the memory block to be resized
+  *  @param size    The new size for the block
+  *  @return        Pointer to the resized memory block, or NULL if the resizing failed.
+  *  @stable ICU 2.8
+  *  @system
+  */
+typedef void  U_CALLCONV UMemFreeFn (const void *context, void *mem);
+
+/**
+ *  Set the functions that ICU will use for memory allocation.
+ *  Use of this function is optional; by default (without this function), ICU will
+ *  use the standard C library malloc() and free() functions.
+ *  This function can only be used when ICU is in an initial, unused state, before
+ *  u_init() has been called.
+ *  @param context This pointer value will be saved, and then (later) passed as
+ *                 a parameter to the memory functions each time they
+ *                 are called.
+ *  @param a       Pointer to a user-supplied malloc function.
+ *  @param r       Pointer to a user-supplied realloc function.
+ *  @param f       Pointer to a user-supplied free function.
+ *  @param status  Receives error values.
+ *  @stable ICU 2.8
+ *  @system
+ */  
+U_STABLE void U_EXPORT2 
+u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, 
+                    UErrorCode *status);
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/ucnv.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ucnv.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ucnv.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1967 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1999-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
- *  ucnv.h:
- *  External APIs for the ICU's codeset conversion library
- *  Bertrand A. Damiba
- *
- * Modification History:
- *
- *   Date        Name        Description
- *   04/04/99    helena      Fixed internal header inclusion.
- *   05/11/00    helena      Added setFallback and usesFallback APIs.
- *   06/29/2000  helena      Major rewrite of the callback APIs.
- *   12/07/2000  srl         Update of documentation
- */
-
-/**
- * \file
- * \brief C API: Character conversion 
- *
- * <h2>Character Conversion C API</h2>
- *
- * <p>This API is used to convert codepage or character encoded data to and
- * from UTF-16. You can open a converter with {@link ucnv_open() }. With that
- * converter, you can get its properties, set options, convert your data and
- * close the converter.</p>
- *
- * <p>Since many software programs recogize different converter names for
- * different types of converters, there are other functions in this API to
- * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() },
- * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the
- * more frequently used alias functions to get this information.</p>
- *
- * <p>When a converter encounters an illegal, irregular, invalid or unmappable character
- * its default behavior is to use a substitution character to replace the
- * bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() }
- * or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines
- * many other callback actions that can be used instead of a character substitution.</p>
- *
- * <p>More information about this API can be found in our 
- * <a href="http://icu-project.org/userguide/conversion.html">User's
- * Guide</a>.</p>
- */
-
-#ifndef UCNV_H
-#define UCNV_H
-
-#include "unicode/ucnv_err.h"
-#include "unicode/uenum.h"
-
-#ifndef __USET_H__
-
-/**
- * USet is the C API type for Unicode sets.
- * It is forward-declared here to avoid including the header file if related
- * conversion APIs are not used.
- * See unicode/uset.h
- *
- * @see ucnv_getUnicodeSet
- * @stable ICU 2.6
- */
-struct USet;
-/** @stable ICU 2.6 */
-typedef struct USet USet;
-
-#endif
-
-#if !UCONFIG_NO_CONVERSION
-
-U_CDECL_BEGIN
-
-/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */
-#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
-/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */
-#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
-
-/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
-#define  UCNV_SI 0x0F
-/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
-#define  UCNV_SO 0x0E
-
-/**
- * Enum for specifying basic types of converters
- * @see ucnv_getType
- * @stable ICU 2.0
- */
-typedef enum {
-    UCNV_UNSUPPORTED_CONVERTER = -1,
-    UCNV_SBCS = 0,
-    UCNV_DBCS = 1,
-    UCNV_MBCS = 2,
-    UCNV_LATIN_1 = 3,
-    UCNV_UTF8 = 4,
-    UCNV_UTF16_BigEndian = 5,
-    UCNV_UTF16_LittleEndian = 6,
-    UCNV_UTF32_BigEndian = 7,
-    UCNV_UTF32_LittleEndian = 8,
-    UCNV_EBCDIC_STATEFUL = 9,
-    UCNV_ISO_2022 = 10,
-
-    UCNV_LMBCS_1 = 11,
-    UCNV_LMBCS_2, 
-    UCNV_LMBCS_3,
-    UCNV_LMBCS_4,
-    UCNV_LMBCS_5,
-    UCNV_LMBCS_6,
-    UCNV_LMBCS_8,
-    UCNV_LMBCS_11,
-    UCNV_LMBCS_16,
-    UCNV_LMBCS_17,
-    UCNV_LMBCS_18,
-    UCNV_LMBCS_19,
-    UCNV_LMBCS_LAST = UCNV_LMBCS_19,
-    UCNV_HZ,
-    UCNV_SCSU,
-    UCNV_ISCII,
-    UCNV_US_ASCII,
-    UCNV_UTF7,
-    UCNV_BOCU1,
-    UCNV_UTF16,
-    UCNV_UTF32,
-    UCNV_CESU8,
-    UCNV_IMAP_MAILBOX,
-
-    /* Number of converter types for which we have conversion routines. */
-    UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
-
-} UConverterType;
-
-/**
- * Enum for specifying which platform a converter ID refers to.
- * The use of platform/CCSID is not recommended. See ucnv_openCCSID().
- *
- * @see ucnv_getPlatform
- * @see ucnv_openCCSID
- * @see ucnv_getCCSID
- * @stable ICU 2.0
- */
-typedef enum {
-    UCNV_UNKNOWN = -1,
-    UCNV_IBM = 0
-} UConverterPlatform;
-
-/**
- * Function pointer for error callback in the codepage to unicode direction.
- * Called when an error has occured in conversion to unicode, or on open/close of the callback (see reason).
- * @param context Pointer to the callback's private data
- * @param args Information about the conversion in progress
- * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param reason Defines the reason the callback was invoked
- * @param pErrorCode    ICU error code in/out parameter.
- *                      For converter callback functions, set to a conversion error
- *                      before the call, and the callback may reset it to U_ZERO_ERROR.
- * @see ucnv_setToUCallBack
- * @see UConverterToUnicodeArgs
- * @stable ICU 2.0
- */
-typedef void (U_EXPORT2 *UConverterToUCallback) (
-                  const void* context,
-                  UConverterToUnicodeArgs *args,
-                  const char *codeUnits,
-                  int32_t length,
-                  UConverterCallbackReason reason,
-                  UErrorCode *pErrorCode);
-
-/**
- * Function pointer for error callback in the unicode to codepage direction.
- * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason).
- * @param context Pointer to the callback's private data
- * @param args Information about the conversion in progress
- * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
- * @param reason Defines the reason the callback was invoked
- * @param pErrorCode    ICU error code in/out parameter.
- *                      For converter callback functions, set to a conversion error
- *                      before the call, and the callback may reset it to U_ZERO_ERROR.
- * @see ucnv_setFromUCallBack
- * @stable ICU 2.0
- */
-typedef void (U_EXPORT2 *UConverterFromUCallback) (
-                    const void* context,
-                    UConverterFromUnicodeArgs *args,
-                    const UChar* codeUnits,
-                    int32_t length,
-                    UChar32 codePoint,
-                    UConverterCallbackReason reason,
-                    UErrorCode *pErrorCode);
-
-U_CDECL_END
-
-/**
- * Character that separates converter names from options and options from each other.
- * @see ucnv_open
- * @stable ICU 2.0
- */
-#define UCNV_OPTION_SEP_CHAR ','
-
-/**
- * String version of UCNV_OPTION_SEP_CHAR. 
- * @see ucnv_open
- * @stable ICU 2.0
- */
-#define UCNV_OPTION_SEP_STRING ","
-
-/**
- * Character that separates a converter option from its value.
- * @see ucnv_open
- * @stable ICU 2.0
- */
-#define UCNV_VALUE_SEP_CHAR '='
-
-/**
- * String version of UCNV_VALUE_SEP_CHAR. 
- * @see ucnv_open
- * @stable ICU 2.0
- */
-#define UCNV_VALUE_SEP_STRING "="
-
-/**
- * Converter option for specifying a locale.
- * For example, ucnv_open("SCSU,locale=ja", &errorCode);
- * See convrtrs.txt.
- *
- * @see ucnv_open
- * @stable ICU 2.0
- */
-#define UCNV_LOCALE_OPTION_STRING ",locale="
-
-/**
- * Converter option for specifying a version selector (0..9) for some converters.
- * For example, ucnv_open("UTF-7,version=1", &errorCode);
- * See convrtrs.txt.
- *
- * @see ucnv_open
- * @stable ICU 2.4
- */
-#define UCNV_VERSION_OPTION_STRING ",version="
-
-/**
- * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.
- * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on
- * S/390 (z/OS) Unix System Services (Open Edition).
- * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode);
- * See convrtrs.txt.
- *
- * @see ucnv_open
- * @stable ICU 2.4
- */
-#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl"
-
-/**
- * Do a fuzzy compare of two converter/alias names.
- * The comparison is case-insensitive, ignores leading zeroes if they are not
- * followed by further digits, and ignores all but letters and digits.
- * Thus the strings "UTF-8", "utf_8", "u*T at f08" and "Utf 8" are exactly equivalent.
- * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
- * at http://www.unicode.org/reports/tr22/
- *
- * @param name1 a converter name or alias, zero-terminated
- * @param name2 a converter name or alias, zero-terminated
- * @return 0 if the names match, or a negative value if the name1
- * lexically precedes name2, or a positive value if the name1
- * lexically follows name2.
- * @stable ICU 2.0
- */
-U_STABLE int U_EXPORT2
-ucnv_compareNames(const char *name1, const char *name2);
-
-
-/**
- * Creates a UConverter object with the name of a coded character set specified as a C string.
- * The actual name will be resolved with the alias file
- * using a case-insensitive string comparison that ignores
- * leading zeroes and all non-alphanumeric characters.
- * E.g., the names "UTF8", "utf-8", "u*T at f08" and "Utf 8" are all equivalent.
- * (See also ucnv_compareNames().)
- * If <code>NULL</code> is passed for the converter name, it will create one with the
- * getDefaultName return value.
- *
- * <p>A converter name for ICU 1.5 and above may contain options
- * like a locale specification to control the specific behavior of
- * the newly instantiated converter.
- * The meaning of the options depends on the particular converter.
- * If an option is not defined for or recognized by a given converter, then it is ignored.</p>
- *
- * <p>Options are appended to the converter name string, with a
- * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and
- * also between adjacent options.</p>
- *
- * <p>If the alias is ambiguous, then the preferred converter is used
- * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p>
- *
- * <p>The conversion behavior and names can vary between platforms. ICU may
- * convert some characters differently from other platforms. Details on this topic
- * are in the <a href="http://icu-project.org/userguide/conversion.html">User's
- * Guide</a>. Aliases starting with a "cp" prefix have no specific meaning
- * other than its an alias starting with the letters "cp". Please do not
- * associate any meaning to these aliases.</p>
- *
- * @param converterName Name of the coded character set table.
- *          This may have options appended to the string.
- *          IANA alias character set names, IBM CCSIDs starting with "ibm-",
- *          Windows codepage numbers starting with "windows-" are frequently
- *          used for this parameter. See ucnv_getAvailableName and
- *          ucnv_getAlias for a complete list that is available.
- *          If this parameter is NULL, the default converter will be used.
- * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
- * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
- * @see ucnv_openU
- * @see ucnv_openCCSID
- * @see ucnv_getAvailableName
- * @see ucnv_getAlias
- * @see ucnv_getDefaultName
- * @see ucnv_close
- * @see ucnv_compareNames
- * @stable ICU 2.0
- */
-U_STABLE UConverter* U_EXPORT2 
-ucnv_open(const char *converterName, UErrorCode *err);
-
-
-/**
- * Creates a Unicode converter with the names specified as unicode string. 
- * The name should be limited to the ASCII-7 alphanumerics range.
- * The actual name will be resolved with the alias file
- * using a case-insensitive string comparison that ignores
- * leading zeroes and all non-alphanumeric characters.
- * E.g., the names "UTF8", "utf-8", "u*T at f08" and "Utf 8" are all equivalent.
- * (See also ucnv_compareNames().)
- * If <TT>NULL</TT> is passed for the converter name, it will create 
- * one with the ucnv_getDefaultName() return value.
- * If the alias is ambiguous, then the preferred converter is used
- * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
- *
- * <p>See ucnv_open for the complete details</p>
- * @param name Name of the UConverter table in a zero terminated 
- *        Unicode string
- * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, 
- *        U_FILE_ACCESS_ERROR</TT>
- * @return the created Unicode converter object, or <TT>NULL</TT> if an 
- *        error occured
- * @see ucnv_open
- * @see ucnv_openCCSID
- * @see ucnv_close
- * @see ucnv_compareNames
- * @stable ICU 2.0
- */
-U_STABLE UConverter* U_EXPORT2 
-ucnv_openU(const UChar *name,
-           UErrorCode *err);
-
-/**
- * Creates a UConverter object from a CCSID number and platform pair.
- * Note that the usefulness of this function is limited to platforms with numeric
- * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for
- * encodings.
- *
- * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related.
- * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and
- * for some Unicode conversion tables there are multiple CCSIDs.
- * Some "alternate" Unicode conversion tables are provided by the
- * IBM CDRA conversion table registry.
- * The most prominent example of a systematic modification of conversion tables that is
- * not provided in the form of conversion table files in the repository is
- * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all
- * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well.
- *
- * Only IBM default conversion tables are accessible with ucnv_openCCSID().
- * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated
- * with that CCSID.
- *
- * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM.
- *
- * In summary, the use of CCSIDs and the associated API functions is not recommended.
- *
- * In order to open a converter with the default IBM CDRA Unicode conversion table,
- * you can use this function or use the prefix "ibm-":
- * \code
- *     char name[20];
- *     sprintf(name, "ibm-%hu", ccsid);
- *     cnv=ucnv_open(name, &errorCode);
- * \endcode
- *
- * In order to open a converter with the IBM S/390 Unix System Services variant
- * of a Unicode/EBCDIC conversion table,
- * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING:
- * \code
- *     char name[20];
- *     sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid);
- *     cnv=ucnv_open(name, &errorCode);
- * \endcode
- *
- * In order to open a converter from a Microsoft codepage number, use the prefix "cp":
- * \code
- *     char name[20];
- *     sprintf(name, "cp%hu", codepageID);
- *     cnv=ucnv_open(name, &errorCode);
- * \endcode
- *
- * If the alias is ambiguous, then the preferred converter is used
- * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
- *
- * @param codepage codepage number to create
- * @param platform the platform in which the codepage number exists
- * @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
- * @return the created Unicode converter object, or <TT>NULL</TT> if an error
- *   occured.
- * @see ucnv_open
- * @see ucnv_openU
- * @see ucnv_close
- * @see ucnv_getCCSID
- * @see ucnv_getPlatform
- * @see UConverterPlatform
- * @stable ICU 2.0
- */
-U_STABLE UConverter* U_EXPORT2
-ucnv_openCCSID(int32_t codepage,
-               UConverterPlatform platform,
-               UErrorCode * err);
-
-/**
- * <p>Creates a UConverter object specified from a packageName and a converterName.</p>
- * 
- * <p>The packageName and converterName must point to an ICU udata object, as defined by
- *   <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent.
- * Typically, packageName will refer to a (.dat) file, or to a package registered with
- * udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.</p>
- * 
- * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be
- * stored in the converter cache or the alias table. The only way to open further converters
- * is call this function multiple times, or use the ucnv_safeClone() function to clone a 
- * 'master' converter.</p>
- *
- * <p>A future version of ICU may add alias table lookups and/or caching
- * to this function.</p>
- * 
- * <p>Example Use:
- *      <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code>
- * </p>
- *
- * @param packageName name of the package (equivalent to 'path' in udata_open() call)
- * @param converterName name of the data item to be used, without suffix.
- * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
- * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
- * @see udata_open
- * @see ucnv_open
- * @see ucnv_safeClone
- * @see ucnv_close
- * @stable ICU 2.2
- */
-U_STABLE UConverter* U_EXPORT2 
-ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);
-
-/**
- * Thread safe converter cloning operation.
- * For most efficient operation, pass in a stackBuffer (and a *pBufferSize)
- * with at least U_CNV_SAFECLONE_BUFFERSIZE bytes of space.
- * If the buffer size is sufficient, then the clone will use the stack buffer;
- * otherwise, it will be allocated, and *pBufferSize will indicate
- * the actual size. (This should not occur with U_CNV_SAFECLONE_BUFFERSIZE.)
- *
- * You must ucnv_close() the clone in any case.
- *
- * If *pBufferSize==0, (regardless of whether stackBuffer==NULL or not)
- * then *pBufferSize will be changed to a sufficient size
- * for cloning this converter,
- * without actually cloning the converter ("pure pre-flighting").
- *
- * If *pBufferSize is greater than zero but not large enough for a stack-based
- * clone, then the converter is cloned using newly allocated memory
- * and *pBufferSize is changed to the necessary size.
- *
- * If the converter clone fits into the stack buffer but the stack buffer is not
- * sufficiently aligned for the clone, then the clone will use an
- * adjusted pointer and use an accordingly smaller buffer size.
- *
- * @param cnv converter to be cloned
- * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. 
- *  If buffer is not large enough, new memory will be allocated.
- *  Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
- * @param pBufferSize pointer to size of allocated space. pBufferSize must not be NULL.
- * @param status to indicate whether the operation went on smoothly or there were errors
- *  An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
- *  is used if any allocations were necessary.
- *  However, it is better to check if *pBufferSize grew for checking for
- *  allocations because warning codes can be overridden by subsequent
- *  function calls.
- * @return pointer to the new clone
- * @stable ICU 2.0
- */
-U_STABLE UConverter * U_EXPORT2 
-ucnv_safeClone(const UConverter *cnv, 
-               void             *stackBuffer,
-               int32_t          *pBufferSize, 
-               UErrorCode       *status);
-
-/**
- * \def U_CNV_SAFECLONE_BUFFERSIZE
- * Definition of a buffer size that is designed to be large enough for
- * converters to be cloned with ucnv_safeClone().
- * @stable ICU 2.0
- */
-#define U_CNV_SAFECLONE_BUFFERSIZE  1024
-
-/**
- * Deletes the unicode converter and releases resources associated
- * with just this instance.
- * Does not free up shared converter tables.
- *
- * @param converter the converter object to be deleted
- * @see ucnv_open
- * @see ucnv_openU
- * @see ucnv_openCCSID
- * @stable ICU 2.0
- */
-U_STABLE void  U_EXPORT2
-ucnv_close(UConverter * converter);
-
-/**
- * Fills in the output parameter, subChars, with the substitution characters
- * as multiple bytes.
- * If ucnv_setSubstString() set a Unicode string because the converter is
- * stateful, then subChars will be an empty string.
- *
- * @param converter the Unicode converter
- * @param subChars the subsitution characters
- * @param len on input the capacity of subChars, on output the number 
- * of bytes copied to it
- * @param  err the outgoing error status code.
- * If the substitution character array is too small, an
- * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
- * @see ucnv_setSubstString
- * @see ucnv_setSubstChars
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_getSubstChars(const UConverter *converter,
-                   char *subChars,
-                   int8_t *len,
-                   UErrorCode *err);
-
-/**
- * Sets the substitution chars when converting from unicode to a codepage. The
- * substitution is specified as a string of 1-4 bytes, and may contain
- * <TT>NULL</TT> bytes.
- * The subChars must represent a single character. The caller needs to know the
- * byte sequence of a valid character in the converter's charset.
- * For some converters, for example some ISO 2022 variants, only single-byte
- * substitution characters may be supported.
- * The newer ucnv_setSubstString() function relaxes these limitations.
- *
- * @param converter the Unicode converter
- * @param subChars the substitution character byte sequence we want set
- * @param len the number of bytes in subChars
- * @param err the error status code.  <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
- * len is bigger than the maximum number of bytes allowed in subchars
- * @see ucnv_setSubstString
- * @see ucnv_getSubstChars
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_setSubstChars(UConverter *converter,
-                   const char *subChars,
-                   int8_t len,
-                   UErrorCode *err);
-
-/**
- * Set a substitution string for converting from Unicode to a charset.
- * The caller need not know the charset byte sequence for each charset.
- *
- * Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence
- * for a single character, this function takes a Unicode string with
- * zero, one or more characters, and immediately verifies that the string can be
- * converted to the charset.
- * If not, or if the result is too long (more than 32 bytes as of ICU 3.6),
- * then the function returns with an error accordingly.
- *
- * Also unlike ucnv_setSubstChars(), this function works for stateful charsets
- * by converting on the fly at the point of substitution rather than setting
- * a fixed byte sequence.
- *
- * @param cnv The UConverter object.
- * @param s The Unicode string.
- * @param length The number of UChars in s, or -1 for a NUL-terminated string.
- * @param err Pointer to a standard ICU error code. Its input value must
- *            pass the U_SUCCESS() test, or else the function returns
- *            immediately. Check for U_FAILURE() on output or use with
- *            function chaining. (See User Guide for details.)
- *
- * @see ucnv_setSubstChars
- * @see ucnv_getSubstChars
- * @stable ICU 3.6
- */
-U_STABLE void U_EXPORT2
-ucnv_setSubstString(UConverter *cnv,
-                    const UChar *s,
-                    int32_t length,
-                    UErrorCode *err);
-
-/**
- * Fills in the output parameter, errBytes, with the error characters from the
- * last failing conversion.
- *
- * @param converter the Unicode converter
- * @param errBytes the codepage bytes which were in error
- * @param len on input the capacity of errBytes, on output the number of
- *  bytes which were copied to it
- * @param err the error status code.
- * If the substitution character array is too small, an
- * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_getInvalidChars(const UConverter *converter,
-                     char *errBytes,
-                     int8_t *len,
-                     UErrorCode *err);
-
-/**
- * Fills in the output parameter, errChars, with the error characters from the
- * last failing conversion.
- *
- * @param converter the Unicode converter
- * @param errUChars the UChars which were in error
- * @param len on input the capacity of errUChars, on output the number of 
- *  UChars which were copied to it
- * @param err the error status code.
- * If the substitution character array is too small, an
- * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_getInvalidUChars(const UConverter *converter,
-                      UChar *errUChars,
-                      int8_t *len,
-                      UErrorCode *err);
-
-/**
- * Resets the state of a converter to the default state. This is used
- * in the case of an error, to restart a conversion from a known default state.
- * It will also empty the internal output buffers.
- * @param converter the Unicode converter
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_reset(UConverter *converter);
-
-/**
- * Resets the to-Unicode part of a converter state to the default state.
- * This is used in the case of an error to restart a conversion to
- * Unicode to a known default state. It will also empty the internal
- * output buffers used for the conversion to Unicode codepoints.
- * @param converter the Unicode converter
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucnv_resetToUnicode(UConverter *converter);
-
-/**
- * Resets the from-Unicode part of a converter state to the default state.
- * This is used in the case of an error to restart a conversion from
- * Unicode to a known default state. It will also empty the internal output
- * buffers used for the conversion from Unicode codepoints.
- * @param converter the Unicode converter
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucnv_resetFromUnicode(UConverter *converter);
-
-/**
- * Returns the maximum number of bytes that are output per UChar in conversion
- * from Unicode using this converter.
- * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING
- * to calculate the size of a target buffer for conversion from Unicode.
- *
- * Note: Before ICU 2.8, this function did not return reliable numbers for
- * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS.
- *
- * This number may not be the same as the maximum number of bytes per
- * "conversion unit". In other words, it may not be the intuitively expected
- * number of bytes per character that would be published for a charset,
- * and may not fulfill any other purpose than the allocation of an output
- * buffer of guaranteed sufficient size for a given input length and converter.
- *
- * Examples for special cases that are taken into account:
- * - Supplementary code points may convert to more bytes than BMP code points.
- *   This function returns bytes per UChar (UTF-16 code unit), not per
- *   Unicode code point, for efficient buffer allocation.
- * - State-shifting output (SI/SO, escapes, etc.) from stateful converters.
- * - When m input UChars are converted to n output bytes, then the maximum m/n
- *   is taken into account.
- *
- * The number returned here does not take into account
- * (see UCNV_GET_MAX_BYTES_FOR_STRING):
- * - callbacks which output more than one charset character sequence per call,
- *   like escape callbacks
- * - initial and final non-character bytes that are output by some converters
- *   (automatic BOMs, initial escape sequence, final SI, etc.)
- *
- * Examples for returned values:
- * - SBCS charsets: 1
- * - Shift-JIS: 2
- * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted)
- * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_)
- * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS)
- * - ISO-2022: 3 (always outputs UTF-8)
- * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS)
- * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS)
- *
- * @param converter The Unicode converter.
- * @return The maximum number of bytes per UChar that are output by ucnv_fromUnicode(),
- *         to be used together with UCNV_GET_MAX_BYTES_FOR_STRING for buffer allocation.
- *
- * @see UCNV_GET_MAX_BYTES_FOR_STRING
- * @see ucnv_getMinCharSize
- * @stable ICU 2.0
- */
-U_STABLE int8_t U_EXPORT2
-ucnv_getMaxCharSize(const UConverter *converter);
-
-/**
- * Calculates the size of a buffer for conversion from Unicode to a charset.
- * The calculated size is guaranteed to be sufficient for this conversion.
- *
- * It takes into account initial and final non-character bytes that are output
- * by some converters.
- * It does not take into account callbacks which output more than one charset
- * character sequence per call, like escape callbacks.
- * The default (substitution) callback only outputs one charset character sequence.
- *
- * @param length Number of UChars to be converted.
- * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter
- *                    that will be used.
- * @return Size of a buffer that will be large enough to hold the output bytes of
- *         converting length UChars with the converter that returned the maxCharSize.
- *
- * @see ucnv_getMaxCharSize
- * @stable ICU 2.8
- */
-#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
-     (((int32_t)(length)+10)*(int32_t)(maxCharSize))
-
-/**
- * Returns the minimum byte length for characters in this codepage. 
- * This is usually either 1 or 2.
- * @param converter the Unicode converter
- * @return the minimum number of bytes allowed by this particular converter
- * @see ucnv_getMaxCharSize
- * @stable ICU 2.0
- */
-U_STABLE int8_t U_EXPORT2
-ucnv_getMinCharSize(const UConverter *converter);
-
-/**
- * Returns the display name of the converter passed in based on the Locale 
- * passed in. If the locale contains no display name, the internal ASCII
- * name will be filled in.
- *
- * @param converter the Unicode converter.
- * @param displayLocale is the specific Locale we want to localised for
- * @param displayName user provided buffer to be filled in
- * @param displayNameCapacity size of displayName Buffer
- * @param err error status code
- * @return displayNameLength number of UChar needed in displayName
- * @see ucnv_getName
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ucnv_getDisplayName(const UConverter *converter,
-                    const char *displayLocale,
-                    UChar *displayName,
-                    int32_t displayNameCapacity,
-                    UErrorCode *err);
-
-/**
- * Gets the internal, canonical name of the converter (zero-terminated).
- * The lifetime of the returned string will be that of the converter 
- * passed to this function.
- * @param converter the Unicode converter
- * @param err UErrorCode status
- * @return the internal name of the converter
- * @see ucnv_getDisplayName
- * @stable ICU 2.0
- */
-U_STABLE const char * U_EXPORT2 
-ucnv_getName(const UConverter *converter, UErrorCode *err);
-
-/**
- * Gets a codepage number associated with the converter. This is not guaranteed
- * to be the one used to create the converter. Some converters do not represent
- * platform registered codepages and return zero for the codepage number.
- * The error code fill-in parameter indicates if the codepage number
- * is available.
- * Does not check if the converter is <TT>NULL</TT> or if converter's data
- * table is <TT>NULL</TT>.
- *
- * Important: The use of CCSIDs is not recommended because it is limited
- * to only two platforms in principle and only one (UCNV_IBM) in the current
- * ICU converter API.
- * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely.
- * For more details see ucnv_openCCSID().
- *
- * @param converter the Unicode converter
- * @param err the error status code.
- * @return If any error occurrs, -1 will be returned otherwise, the codepage number
- * will be returned
- * @see ucnv_openCCSID
- * @see ucnv_getPlatform
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ucnv_getCCSID(const UConverter *converter,
-              UErrorCode *err);
-
-/**
- * Gets a codepage platform associated with the converter. Currently, 
- * only <TT>UCNV_IBM</TT> will be returned.
- * Does not test if the converter is <TT>NULL</TT> or if converter's data 
- * table is <TT>NULL</TT>. 
- * @param converter the Unicode converter
- * @param err the error status code.
- * @return The codepage platform
- * @stable ICU 2.0
- */
-U_STABLE UConverterPlatform U_EXPORT2
-ucnv_getPlatform(const UConverter *converter,
-                 UErrorCode *err);
-
-/**
- * Gets the type of the converter
- * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, 
- * EBCDIC_STATEFUL, LATIN_1
- * @param converter a valid, opened converter
- * @return the type of the converter
- * @stable ICU 2.0
- */
-U_STABLE UConverterType U_EXPORT2
-ucnv_getType(const UConverter * converter);
-
-/**
- * Gets the "starter" (lead) bytes for converters of type MBCS.
- * Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
- * is not MBCS. Fills in an array of type UBool, with the value of the byte 
- * as offset to the array. For example, if (starters[0x20] == TRUE) at return,
- * it means that the byte 0x20 is a starter byte in this converter.
- * Context pointers are always owned by the caller.
- * 
- * @param converter a valid, opened converter of type MBCS
- * @param starters an array of size 256 to be filled in
- * @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the 
- * converter is not a type which can return starters.
- * @see ucnv_getType
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_getStarters(const UConverter* converter, 
-                 UBool starters[256],
-                 UErrorCode* err);
-
-
-/**
- * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet().
- * @see ucnv_getUnicodeSet
- * @stable ICU 2.6
- */
-typedef enum UConverterUnicodeSet {
-    /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
-    UCNV_ROUNDTRIP_SET,
-    /** Select the set of Unicode code points with roundtrip or fallback mappings. @draft ICU 4.0 */
-    UCNV_ROUNDTRIP_AND_FALLBACK_SET,
-    /** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */
-    UCNV_SET_COUNT
-} UConverterUnicodeSet;
-
-
-/**
- * Returns the set of Unicode code points that can be converted by an ICU converter.
- *
- * Returns one of several kinds of set:
- *
- * 1. UCNV_ROUNDTRIP_SET
- *
- * The set of all Unicode code points that can be roundtrip-converted
- * (converted without any data loss) with the converter (ucnv_fromUnicode()).
- * This set will not include code points that have fallback mappings
- * or are only the result of reverse fallback mappings.
- * This set will also not include PUA code points with fallbacks, although
- * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback().
- * See UTR #22 "Character Mapping Markup Language"
- * at http://www.unicode.org/reports/tr22/
- *
- * This is useful for example for
- * - checking that a string or document can be roundtrip-converted with a converter,
- *   without/before actually performing the conversion
- * - testing if a converter can be used for text for typical text for a certain locale,
- *   by comparing its roundtrip set with the set of ExemplarCharacters from
- *   ICU's locale data or other sources
- *
- * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET
- *
- * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode())
- * when fallbacks are turned on (see ucnv_setFallback()).
- * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks).
- *
- * In the future, there may be more UConverterUnicodeSet choices to select
- * sets with different properties.
- *
- * @param cnv The converter for which a set is requested.
- * @param setFillIn A valid USet *. It will be cleared by this function before
- *            the converter's specific set is filled into the USet.
- * @param whichSet A UConverterUnicodeSet selector;
- *              currently UCNV_ROUNDTRIP_SET is the only supported value.
- * @param pErrorCode ICU error code in/out parameter.
- *                   Must fulfill U_SUCCESS before the function call.
- *
- * @see UConverterUnicodeSet
- * @see uset_open
- * @see uset_close
- * @stable ICU 2.6
- */
-U_STABLE void U_EXPORT2
-ucnv_getUnicodeSet(const UConverter *cnv,
-                   USet *setFillIn,
-                   UConverterUnicodeSet whichSet,
-                   UErrorCode *pErrorCode);
-
-/**
- * Gets the current calback function used by the converter when an illegal
- *  or invalid codepage sequence is found. 
- * Context pointers are always owned by the caller.
- *
- * @param converter the unicode converter
- * @param action fillin: returns the callback function pointer
- * @param context fillin: returns the callback's private void* context
- * @see ucnv_setToUCallBack
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_getToUCallBack (const UConverter * converter,
-                     UConverterToUCallback *action,
-                     const void **context);
-
-/**
- * Gets the current callback function used by the converter when illegal 
- * or invalid Unicode sequence is found.
- * Context pointers are always owned by the caller.
- *
- * @param converter the unicode converter
- * @param action fillin: returns the callback function pointer
- * @param context fillin: returns the callback's private void* context
- * @see ucnv_setFromUCallBack
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_getFromUCallBack (const UConverter * converter,
-                       UConverterFromUCallback *action,
-                       const void **context);
-
-/**
- * Changes the callback function used by the converter when
- * an illegal or invalid sequence is found.
- * Context pointers are always owned by the caller.
- * Predefined actions and contexts can be found in the ucnv_err.h header.
- *
- * @param converter the unicode converter
- * @param newAction the new callback function
- * @param newContext the new toUnicode callback context pointer. This can be NULL.
- * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
- * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
- * @param err The error code status
- * @see ucnv_getToUCallBack
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_setToUCallBack (UConverter * converter,
-                     UConverterToUCallback newAction,
-                     const void* newContext,
-                     UConverterToUCallback *oldAction,
-                     const void** oldContext,
-                     UErrorCode * err);
-
-/**
- * Changes the current callback function used by the converter when
- * an illegal or invalid sequence is found.
- * Context pointers are always owned by the caller.
- * Predefined actions and contexts can be found in the ucnv_err.h header.
- *
- * @param converter the unicode converter
- * @param newAction the new callback function
- * @param newContext the new fromUnicode callback context pointer. This can be NULL.
- * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
- * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
- * @param err The error code status
- * @see ucnv_getFromUCallBack
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_setFromUCallBack (UConverter * converter,
-                       UConverterFromUCallback newAction,
-                       const void *newContext,
-                       UConverterFromUCallback *oldAction,
-                       const void **oldContext,
-                       UErrorCode * err);
-
-/**
- * Converts an array of unicode characters to an array of codepage
- * characters. This function is optimized for converting a continuous
- * stream of data in buffer-sized chunks, where the entire source and
- * target does not fit in available buffers.
- * 
- * The source pointer is an in/out parameter. It starts out pointing where the 
- * conversion is to begin, and ends up pointing after the last UChar consumed. 
- * 
- * Target similarly starts out pointer at the first available byte in the output
- * buffer, and ends up pointing after the last byte written to the output.
- * 
- * The converter always attempts to consume the entire source buffer, unless 
- * (1.) the target buffer is full, or (2.) a failing error is returned from the
- * current callback function.  When a successful error status has been
- * returned, it means that all of the source buffer has been
- *  consumed. At that point, the caller should reset the source and
- *  sourceLimit pointers to point to the next chunk.
- * 
- * At the end of the stream (flush==TRUE), the input is completely consumed
- * when *source==sourceLimit and no error code is set.
- * The converter object is then automatically reset by this function.
- * (This means that a converter need not be reset explicitly between data
- * streams if it finishes the previous stream without errors.)
- * 
- * This is a <I>stateful</I> conversion. Additionally, even when all source data has
- * been consumed, some data may be in the converters' internal state.
- * Call this function repeatedly, updating the target pointers with
- * the next empty chunk of target in case of a
- * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source  pointers
- *  with the next chunk of source when a successful error status is
- * returned, until there are no more chunks of source data.
- * @param converter the Unicode converter
- * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
- *  codepage characters to. Output : points to after the last codepage character copied
- *  to <TT>target</TT>.
- * @param targetLimit the pointer just after last of the <TT>target</TT> buffer
- * @param source I/O parameter, pointer to pointer to the source Unicode character buffer. 
- * @param sourceLimit the pointer just after the last of the source buffer
- * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
- * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
- * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
- * For output data carried across calls, and other data without a specific source character
- * (such as from escape sequences or callbacks)  -1 will be placed for offsets. 
- * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available
- * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned,
- * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until
- * the source buffer is consumed.
- * @param err the error status.  <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
- * converter is <TT>NULL</TT>.
- * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is 
- * still data to be written to the target.
- * @see ucnv_fromUChars
- * @see ucnv_convert
- * @see ucnv_getMinCharSize
- * @see ucnv_setToUCallBack
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucnv_fromUnicode (UConverter * converter,
-                  char **target,
-                  const char *targetLimit,
-                  const UChar ** source,
-                  const UChar * sourceLimit,
-                  int32_t* offsets,
-                  UBool flush,
-                  UErrorCode * err);
-
-/**
- * Converts a buffer of codepage bytes into an array of unicode UChars
- * characters. This function is optimized for converting a continuous
- * stream of data in buffer-sized chunks, where the entire source and
- * target does not fit in available buffers.
- * 
- * The source pointer is an in/out parameter. It starts out pointing where the 
- * conversion is to begin, and ends up pointing after the last byte of source consumed. 
- * 
- * Target similarly starts out pointer at the first available UChar in the output
- * buffer, and ends up pointing after the last UChar written to the output. 
- * It does NOT necessarily keep UChar sequences together.
- * 
- * The converter always attempts to consume the entire source buffer, unless 
- * (1.) the target buffer is full, or (2.) a failing error is returned from the
- * current callback function.  When a successful error status has been
- * returned, it means that all of the source buffer has been
- *  consumed. At that point, the caller should reset the source and
- *  sourceLimit pointers to point to the next chunk.
- *
- * At the end of the stream (flush==TRUE), the input is completely consumed
- * when *source==sourceLimit and no error code is set
- * The converter object is then automatically reset by this function.
- * (This means that a converter need not be reset explicitly between data
- * streams if it finishes the previous stream without errors.)
- * 
- * This is a <I>stateful</I> conversion. Additionally, even when all source data has
- * been consumed, some data may be in the converters' internal state.
- * Call this function repeatedly, updating the target pointers with
- * the next empty chunk of target in case of a
- * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source  pointers
- *  with the next chunk of source when a successful error status is
- * returned, until there are no more chunks of source data.
- * @param converter the Unicode converter
- * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
- *  UChars into. Output : points to after the last UChar copied.
- * @param targetLimit the pointer just after the end of the <TT>target</TT> buffer
- * @param source I/O parameter, pointer to pointer to the source codepage buffer. 
- * @param sourceLimit the pointer to the byte after the end of the source buffer
- * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
- * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
- * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
- * For output data carried across calls, and other data without a specific source character
- * (such as from escape sequences or callbacks)  -1 will be placed for offsets. 
- * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available
- * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned,
- * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until
- * the source buffer is consumed.
- * @param err the error status.  <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
- * converter is <TT>NULL</TT>.
- * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is 
- * still data to be written to the target. 
- * @see ucnv_fromUChars
- * @see ucnv_convert
- * @see ucnv_getMinCharSize
- * @see ucnv_setFromUCallBack
- * @see ucnv_getNextUChar
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucnv_toUnicode(UConverter *converter,
-               UChar **target,
-               const UChar *targetLimit,
-               const char **source,
-               const char *sourceLimit,
-               int32_t *offsets,
-               UBool flush,
-               UErrorCode *err);
-
-/**
- * Convert the Unicode string into a codepage string using an existing UConverter.
- * The output string is NUL-terminated if possible.
- *
- * This function is a more convenient but less powerful version of ucnv_fromUnicode().
- * It is only useful for whole strings, not for streaming conversion.
- *
- * The maximum output buffer capacity required (barring output from callbacks) will be
- * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
- *
- * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
- * @param src the input Unicode string
- * @param srcLength the input string length, or -1 if NUL-terminated
- * @param dest destination string buffer, can be NULL if destCapacity==0
- * @param destCapacity the number of chars available at dest
- * @param pErrorCode normal ICU error code;
- *                  common error codes that may be set by this function include
- *                  U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
- *                  U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
- * @return the length of the output string, not counting the terminating NUL;
- *         if the length is greater than destCapacity, then the string will not fit
- *         and a buffer of the indicated length would need to be passed in
- * @see ucnv_fromUnicode
- * @see ucnv_convert
- * @see UCNV_GET_MAX_BYTES_FOR_STRING
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ucnv_fromUChars(UConverter *cnv,
-                char *dest, int32_t destCapacity,
-                const UChar *src, int32_t srcLength,
-                UErrorCode *pErrorCode);
-
-/**
- * Convert the codepage string into a Unicode string using an existing UConverter.
- * The output string is NUL-terminated if possible.
- *
- * This function is a more convenient but less powerful version of ucnv_toUnicode().
- * It is only useful for whole strings, not for streaming conversion.
- *
- * The maximum output buffer capacity required (barring output from callbacks) will be
- * 2*srcLength (each char may be converted into a surrogate pair).
- *
- * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called)
- * @param src the input codepage string
- * @param srcLength the input string length, or -1 if NUL-terminated
- * @param dest destination string buffer, can be NULL if destCapacity==0
- * @param destCapacity the number of UChars available at dest
- * @param pErrorCode normal ICU error code;
- *                  common error codes that may be set by this function include
- *                  U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
- *                  U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
- * @return the length of the output string, not counting the terminating NUL;
- *         if the length is greater than destCapacity, then the string will not fit
- *         and a buffer of the indicated length would need to be passed in
- * @see ucnv_toUnicode
- * @see ucnv_convert
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ucnv_toUChars(UConverter *cnv,
-              UChar *dest, int32_t destCapacity,
-              const char *src, int32_t srcLength,
-              UErrorCode *pErrorCode);
-
-/**
- * Convert a codepage buffer into Unicode one character at a time.
- * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
- *
- * Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
- * - Faster for small amounts of data, for most converters, e.g.,
- *   US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
- *   (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
- *    it uses ucnv_toUnicode() internally.)
- * - Convenient.
- *
- * Limitations compared to ucnv_toUnicode():
- * - Always assumes flush=TRUE.
- *   This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
- *   that is, for where the input is supplied in multiple buffers,
- *   because ucnv_getNextUChar() will assume the end of the input at the end
- *   of the first buffer.
- * - Does not provide offset output.
- *
- * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
- * ucnv_getNextUChar() uses the current state of the converter
- * (unlike ucnv_toUChars() which always resets first).
- * However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
- * stopped in the middle of a character sequence (with flush=FALSE),
- * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
- * internally until the next character boundary.
- * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
- * start at a character boundary.)
- *
- * Instead of using ucnv_getNextUChar(), it is recommended
- * to convert using ucnv_toUnicode() or ucnv_toUChars()
- * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
- * or a C++ CharacterIterator or similar.
- * This allows streaming conversion and offset output, for example.
- *
- * <p>Handling of surrogate pairs and supplementary-plane code points:<br>
- * There are two different kinds of codepages that provide mappings for surrogate characters:
- * <ul>
- *   <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode
- *       code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff.
- *       Each valid sequence will result in exactly one returned code point.
- *       If a sequence results in a single surrogate, then that will be returned
- *       by itself, even if a neighboring sequence encodes the matching surrogate.</li>
- *   <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points
- *       including surrogates. Code points in supplementary planes are represented with
- *       two sequences, each encoding a surrogate.
- *       For these codepages, matching pairs of surrogates will be combined into single
- *       code points for returning from this function.
- *       (Note that SCSU is actually a mix of these codepage types.)</li>
- * </ul></p>
- *
- * @param converter an open UConverter
- * @param source the address of a pointer to the codepage buffer, will be
- *  updated to point after the bytes consumed in the conversion call.
- * @param sourceLimit points to the end of the input buffer
- * @param err fills in error status (see ucnv_toUnicode)
- * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input 
- * is empty or does not convert to any output (e.g.: pure state-change 
- * codes SI/SO, escape sequences for ISO 2022,
- * or if the callback did not output anything, ...).
- * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because
- *  the "buffer" is the return code. However, there might be subsequent output
- *  stored in the converter object
- * that will be returned in following calls to this function.
- * @return a UChar32 resulting from the partial conversion of source
- * @see ucnv_toUnicode
- * @see ucnv_toUChars
- * @see ucnv_convert
- * @stable ICU 2.0
- */
-U_STABLE UChar32 U_EXPORT2
-ucnv_getNextUChar(UConverter * converter,
-                  const char **source,
-                  const char * sourceLimit,
-                  UErrorCode * err);
-
-/**
- * Convert from one external charset to another using two existing UConverters.
- * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
- * are used, "pivoting" through 16-bit Unicode.
- *
- * Important: For streaming conversion (multiple function calls for successive
- * parts of a text stream), the caller must provide a pivot buffer explicitly,
- * and must preserve the pivot buffer and associated pointers from one
- * call to another. (The buffer may be moved if its contents and the relative
- * pointer positions are preserved.)
- *
- * There is a similar function, ucnv_convert(),
- * which has the following limitations:
- * - it takes charset names, not converter objects, so that
- *   - two converters are opened for each call
- *   - only single-string conversion is possible, not streaming operation
- * - it does not provide enough information to find out,
- *   in case of failure, whether the toUnicode or
- *   the fromUnicode conversion failed
- *
- * By contrast, ucnv_convertEx()
- * - takes UConverter parameters instead of charset names
- * - fully exposes the pivot buffer for streaming conversion and complete error handling
- *
- * ucnv_convertEx() also provides further convenience:
- * - an option to reset the converters at the beginning
- *   (if reset==TRUE, see parameters;
- *    also sets *pivotTarget=*pivotSource=pivotStart)
- * - allow NUL-terminated input
- *   (only a single NUL byte, will not work for charsets with multi-byte NULs)
- *   (if sourceLimit==NULL, see parameters)
- * - terminate with a NUL on output
- *   (only a single NUL byte, not useful for charsets with multi-byte NULs),
- *   or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
- *   the target buffer
- * - the pivot buffer can be provided internally;
- *   possible only for whole-string conversion, not streaming conversion;
- *   in this case, the caller will not be able to get details about where an
- *   error occurred
- *   (if pivotStart==NULL, see below)
- *
- * The function returns when one of the following is true:
- * - the entire source text has been converted successfully to the target buffer
- * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
- * - a conversion error occurred
- *   (other U_FAILURE(), see description of pErrorCode)
- *
- * Limitation compared to the direct use of
- * ucnv_fromUnicode() and ucnv_toUnicode():
- * ucnv_convertEx() does not provide offset information.
- *
- * Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
- * ucnv_convertEx() does not support preflighting directly.
- *
- * Sample code for converting a single string from
- * one external charset to UTF-8, ignoring the location of errors:
- *
- * \code
- * int32_t
- * myToUTF8(UConverter *cnv,
- *          const char *s, int32_t length,
- *          char *u8, int32_t capacity,
- *          UErrorCode *pErrorCode) {
- *     UConverter *utf8Cnv;
- *     char *target;
- *
- *     if(U_FAILURE(*pErrorCode)) {
- *         return 0;
- *     }
- *
- *     utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
- *     if(U_FAILURE(*pErrorCode)) {
- *         return 0;
- *     }
- *
- *     if(length<0) {
- *         length=strlen(s);
- *     }
- *     target=u8;
- *     ucnv_convertEx(cnv, utf8Cnv,
- *                    &target, u8+capacity,
- *                    &s, s+length,
- *                    NULL, NULL, NULL, NULL,
- *                    TRUE, TRUE,
- *                    pErrorCode);
- * 
- *     myReleaseCachedUTF8Converter(utf8Cnv);
- *
- *     // return the output string length, but without preflighting
- *     return (int32_t)(target-u8);
- * }
- * \endcode
- *
- * @param targetCnv     Output converter, used to convert from the UTF-16 pivot
- *                      to the target using ucnv_fromUnicode().
- * @param sourceCnv     Input converter, used to convert from the source to
- *                      the UTF-16 pivot using ucnv_toUnicode().
- * @param target        I/O parameter, same as for ucnv_fromUChars().
- *                      Input: *target points to the beginning of the target buffer.
- *                      Output: *target points to the first unit after the last char written.
- * @param targetLimit   Pointer to the first unit after the target buffer.
- * @param source        I/O parameter, same as for ucnv_toUChars().
- *                      Input: *source points to the beginning of the source buffer.
- *                      Output: *source points to the first unit after the last char read.
- * @param sourceLimit   Pointer to the first unit after the source buffer.
- * @param pivotStart    Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
- *                      then an internal buffer is used and the other pivot
- *                      arguments are ignored and can be NULL as well.
- * @param pivotSource   I/O parameter, same as source in ucnv_fromUChars() for
- *                      conversion from the pivot buffer to the target buffer.
- * @param pivotTarget   I/O parameter, same as target in ucnv_toUChars() for
- *                      conversion from the source buffer to the pivot buffer.
- *                      It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
- *                      and pivotStart<pivotLimit (unless pivotStart==NULL).
- * @param pivotLimit    Pointer to the first unit after the pivot buffer.
- * @param reset         If TRUE, then ucnv_resetToUnicode(sourceCnv) and
- *                      ucnv_resetFromUnicode(targetCnv) are called, and the
- *                      pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart).
- * @param flush         If true, indicates the end of the input.
- *                      Passed directly to ucnv_toUnicode(), and carried over to
- *                      ucnv_fromUnicode() when the source is empty as well.
- * @param pErrorCode    ICU error code in/out parameter.
- *                      Must fulfill U_SUCCESS before the function call.
- *                      U_BUFFER_OVERFLOW_ERROR always refers to the target buffer
- *                      because overflows into the pivot buffer are handled internally.
- *                      Other conversion errors are from the source-to-pivot
- *                      conversion if *pivotSource==pivotStart, otherwise from
- *                      the pivot-to-target conversion.
- *
- * @see ucnv_convert
- * @see ucnv_fromAlgorithmic
- * @see ucnv_toAlgorithmic
- * @see ucnv_fromUnicode
- * @see ucnv_toUnicode
- * @see ucnv_fromUChars
- * @see ucnv_toUChars
- * @stable ICU 2.6
- */
-U_STABLE void U_EXPORT2
-ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
-               char **target, const char *targetLimit,
-               const char **source, const char *sourceLimit,
-               UChar *pivotStart, UChar **pivotSource,
-               UChar **pivotTarget, const UChar *pivotLimit,
-               UBool reset, UBool flush,
-               UErrorCode *pErrorCode);
-
-/**
- * Convert from one external charset to another.
- * Internally, two converters are opened according to the name arguments,
- * then the text is converted to and from the 16-bit Unicode "pivot"
- * using ucnv_convertEx(), then the converters are closed again.
- *
- * This is a convenience function, not an efficient way to convert a lot of text:
- * ucnv_convert()
- * - takes charset names, not converter objects, so that
- *   - two converters are opened for each call
- *   - only single-string conversion is possible, not streaming operation
- * - does not provide enough information to find out,
- *   in case of failure, whether the toUnicode or
- *   the fromUnicode conversion failed
- * - allows NUL-terminated input
- *   (only a single NUL byte, will not work for charsets with multi-byte NULs)
- *   (if sourceLength==-1, see parameters)
- * - terminate with a NUL on output
- *   (only a single NUL byte, not useful for charsets with multi-byte NULs),
- *   or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
- *   the target buffer
- * - a pivot buffer is provided internally
- *
- * The function returns when one of the following is true:
- * - the entire source text has been converted successfully to the target buffer
- *   and either the target buffer is terminated with a single NUL byte
- *   or the error code is set to U_STRING_NOT_TERMINATED_WARNING
- * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
- *   and the full output string length is returned ("preflighting")
- * - a conversion error occurred
- *   (other U_FAILURE(), see description of pErrorCode)
- *
- * @param toConverterName   The name of the converter that is used to convert
- *                          from the UTF-16 pivot buffer to the target.
- * @param fromConverterName The name of the converter that is used to convert
- *                          from the source to the UTF-16 pivot buffer.
- * @param target            Pointer to the output buffer.
- * @param targetCapacity    Capacity of the target, in bytes.
- * @param source            Pointer to the input buffer.
- * @param sourceLength      Length of the input text, in bytes, or -1 for NUL-terminated input.
- * @param pErrorCode        ICU error code in/out parameter.
- *                          Must fulfill U_SUCCESS before the function call.
- * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
- *         and a U_BUFFER_OVERFLOW_ERROR is set.
- *
- * @see ucnv_convertEx
- * @see ucnv_fromAlgorithmic
- * @see ucnv_toAlgorithmic
- * @see ucnv_fromUnicode
- * @see ucnv_toUnicode
- * @see ucnv_fromUChars
- * @see ucnv_toUChars
- * @see ucnv_getNextUChar
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ucnv_convert(const char *toConverterName,
-             const char *fromConverterName,
-             char *target,
-             int32_t targetCapacity,
-             const char *source,
-             int32_t sourceLength,
-             UErrorCode *pErrorCode);
-
-/**
- * Convert from one external charset to another.
- * Internally, the text is converted to and from the 16-bit Unicode "pivot"
- * using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert()
- * except that the two converters need not be looked up and opened completely.
- *
- * The source-to-pivot conversion uses the cnv converter parameter.
- * The pivot-to-target conversion uses a purely algorithmic converter
- * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
- *
- * Internally, the algorithmic converter is opened and closed for each
- * function call, which is more efficient than using the public ucnv_open()
- * but somewhat less efficient than only resetting an existing converter
- * and using ucnv_convertEx().
- *
- * This function is more convenient than ucnv_convertEx() for single-string
- * conversions, especially when "preflighting" is desired (returning the length
- * of the complete output even if it does not fit into the target buffer;
- * see the User Guide Strings chapter). See ucnv_convert() for details.
- *
- * @param algorithmicType   UConverterType constant identifying the desired target
- *                          charset as a purely algorithmic converter.
- *                          Those are converters for Unicode charsets like
- *                          UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
- *                          as well as US-ASCII and ISO-8859-1.
- * @param cnv               The converter that is used to convert
- *                          from the source to the UTF-16 pivot buffer.
- * @param target            Pointer to the output buffer.
- * @param targetCapacity    Capacity of the target, in bytes.
- * @param source            Pointer to the input buffer.
- * @param sourceLength      Length of the input text, in bytes
- * @param pErrorCode        ICU error code in/out parameter.
- *                          Must fulfill U_SUCCESS before the function call.
- * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
- *         and a U_BUFFER_OVERFLOW_ERROR is set.
- *
- * @see ucnv_fromAlgorithmic
- * @see ucnv_convert
- * @see ucnv_convertEx
- * @see ucnv_fromUnicode
- * @see ucnv_toUnicode
- * @see ucnv_fromUChars
- * @see ucnv_toUChars
- * @stable ICU 2.6
- */
-U_STABLE int32_t U_EXPORT2
-ucnv_toAlgorithmic(UConverterType algorithmicType,
-                   UConverter *cnv,
-                   char *target, int32_t targetCapacity,
-                   const char *source, int32_t sourceLength,
-                   UErrorCode *pErrorCode);
-
-/**
- * Convert from one external charset to another.
- * Internally, the text is converted to and from the 16-bit Unicode "pivot"
- * using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert()
- * except that the two converters need not be looked up and opened completely.
- *
- * The source-to-pivot conversion uses a purely algorithmic converter
- * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
- * The pivot-to-target conversion uses the cnv converter parameter.
- *
- * Internally, the algorithmic converter is opened and closed for each
- * function call, which is more efficient than using the public ucnv_open()
- * but somewhat less efficient than only resetting an existing converter
- * and using ucnv_convertEx().
- *
- * This function is more convenient than ucnv_convertEx() for single-string
- * conversions, especially when "preflighting" is desired (returning the length
- * of the complete output even if it does not fit into the target buffer;
- * see the User Guide Strings chapter). See ucnv_convert() for details.
- *
- * @param cnv               The converter that is used to convert
- *                          from the UTF-16 pivot buffer to the target.
- * @param algorithmicType   UConverterType constant identifying the desired source
- *                          charset as a purely algorithmic converter.
- *                          Those are converters for Unicode charsets like
- *                          UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
- *                          as well as US-ASCII and ISO-8859-1.
- * @param target            Pointer to the output buffer.
- * @param targetCapacity    Capacity of the target, in bytes.
- * @param source            Pointer to the input buffer.
- * @param sourceLength      Length of the input text, in bytes
- * @param pErrorCode        ICU error code in/out parameter.
- *                          Must fulfill U_SUCCESS before the function call.
- * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
- *         and a U_BUFFER_OVERFLOW_ERROR is set.
- *
- * @see ucnv_fromAlgorithmic
- * @see ucnv_convert
- * @see ucnv_convertEx
- * @see ucnv_fromUnicode
- * @see ucnv_toUnicode
- * @see ucnv_fromUChars
- * @see ucnv_toUChars
- * @stable ICU 2.6
- */
-U_STABLE int32_t U_EXPORT2
-ucnv_fromAlgorithmic(UConverter *cnv,
-                     UConverterType algorithmicType,
-                     char *target, int32_t targetCapacity,
-                     const char *source, int32_t sourceLength,
-                     UErrorCode *pErrorCode);
-
-/**
- * Frees up memory occupied by unused, cached converter shared data.
- *
- * @return the number of cached converters successfully deleted
- * @see ucnv_close
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ucnv_flushCache(void);
-
-/**
- * Returns the number of available converters, as per the alias file.
- *
- * @return the number of available converters
- * @see ucnv_getAvailableName
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-ucnv_countAvailable(void);
-
-/**
- * Gets the canonical converter name of the specified converter from a list of
- * all available converters contaied in the alias file. All converters
- * in this list can be opened.
- *
- * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>)
- * @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
- * @see ucnv_countAvailable
- * @stable ICU 2.0
- */
-U_STABLE const char* U_EXPORT2
-ucnv_getAvailableName(int32_t n);
-
-/**
- * Returns a UEnumeration to enumerate all of the canonical converter
- * names, as per the alias file, regardless of the ability to open each
- * converter.
- *
- * @return A UEnumeration object for getting all the recognized canonical
- *   converter names.
- * @see ucnv_getAvailableName
- * @see uenum_close
- * @see uenum_next
- * @stable ICU 2.4
- */
-U_STABLE UEnumeration * U_EXPORT2
-ucnv_openAllNames(UErrorCode *pErrorCode);
-
-/**
- * Gives the number of aliases for a given converter or alias name.
- * If the alias is ambiguous, then the preferred converter is used
- * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
- * This method only enumerates the listed entries in the alias file.
- * @param alias alias name
- * @param pErrorCode error status
- * @return number of names on alias list for given alias
- * @stable ICU 2.0
- */
-U_STABLE uint16_t U_EXPORT2 
-ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
-
-/**
- * Gives the name of the alias at given index of alias list.
- * This method only enumerates the listed entries in the alias file.
- * If the alias is ambiguous, then the preferred converter is used
- * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
- * @param alias alias name
- * @param n index in alias list
- * @param pErrorCode result of operation
- * @return returns the name of the alias at given index
- * @see ucnv_countAliases
- * @stable ICU 2.0
- */
-U_STABLE const char * U_EXPORT2 
-ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
-
-/**
- * Fill-up the list of alias names for the given alias.
- * This method only enumerates the listed entries in the alias file.
- * If the alias is ambiguous, then the preferred converter is used
- * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
- * @param alias alias name
- * @param aliases fill-in list, aliases is a pointer to an array of
- *        <code>ucnv_countAliases()</code> string-pointers
- *        (<code>const char *</code>) that will be filled in.
- *        The strings themselves are owned by the library.
- * @param pErrorCode result of operation
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
-
-/**
- * Return a new UEnumeration object for enumerating all the
- * alias names for a given converter that are recognized by a standard.
- * This method only enumerates the listed entries in the alias file.
- * The convrtrs.txt file can be modified to change the results of
- * this function.
- * The first result in this list is the same result given by
- * <code>ucnv_getStandardName</code>, which is the default alias for
- * the specified standard name. The returned object must be closed with
- * <code>uenum_close</code> when you are done with the object.
- *
- * @param convName original converter name
- * @param standard name of the standard governing the names; MIME and IANA
- *      are such standards
- * @param pErrorCode The error code
- * @return A UEnumeration object for getting all aliases that are recognized
- *      by a standard. If any of the parameters are invalid, NULL
- *      is returned.
- * @see ucnv_getStandardName
- * @see uenum_close
- * @see uenum_next
- * @stable ICU 2.2
- */
-U_STABLE UEnumeration * U_EXPORT2
-ucnv_openStandardNames(const char *convName,
-                       const char *standard,
-                       UErrorCode *pErrorCode);
-
-/**
- * Gives the number of standards associated to converter names.
- * @return number of standards
- * @stable ICU 2.0
- */
-U_STABLE uint16_t U_EXPORT2
-ucnv_countStandards(void);
-
-/**
- * Gives the name of the standard at given index of standard list.
- * @param n index in standard list
- * @param pErrorCode result of operation
- * @return returns the name of the standard at given index. Owned by the library.
- * @stable ICU 2.0
- */
-U_STABLE const char * U_EXPORT2
-ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
-
-/**
- * Returns a standard name for a given converter name.
- * <p>
- * Example alias table:<br>
- * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
- * <p>
- * Result of ucnv_getStandardName("conv", "STANDARD1") from example
- * alias table:<br>
- * <b>"alias2"</b>
- *
- * @param name original converter name
- * @param standard name of the standard governing the names; MIME and IANA
- *        are such standards
- * @param pErrorCode result of operation
- * @return returns the standard converter name;
- *         if a standard converter name cannot be determined,
- *         then <code>NULL</code> is returned. Owned by the library.
- * @stable ICU 2.0
- */
-U_STABLE const char * U_EXPORT2
-ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
-
-/**
- * This function will return the internal canonical converter name of the
- * tagged alias. This is the opposite of ucnv_openStandardNames, which
- * returns the tagged alias given the canonical name.
- * <p>
- * Example alias table:<br>
- * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
- * <p>
- * Result of ucnv_getStandardName("alias1", "STANDARD1") from example
- * alias table:<br>
- * <b>"conv"</b>
- *
- * @return returns the canonical converter name;
- *         if a standard or alias name cannot be determined,
- *         then <code>NULL</code> is returned. The returned string is
- *         owned by the library.
- * @see ucnv_getStandardName
- * @stable ICU 2.4
- */
-U_STABLE const char * U_EXPORT2
-ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode);
-
-/**
- * Returns the current default converter name. If you want to open
- * a default converter, you do not need to use this function.
- * It is faster if you pass a NULL argument to ucnv_open the
- * default converter.
- *
- * @return returns the current default converter name.
- *         Storage owned by the library
- * @see ucnv_setDefaultName
- * @stable ICU 2.0
- */
-U_STABLE const char * U_EXPORT2
-ucnv_getDefaultName(void);
-
-/**
- * This function is not thread safe. DO NOT call this function when ANY ICU
- * function is being used from more than one thread! This function sets the
- * current default converter name. If this function needs to be called, it
- * should be called during application initialization. Most of the time, the
- * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument
- * is sufficient for your application.
- * @param name the converter name to be the default (must be known by ICU).
- * @see ucnv_getDefaultName
- * @system
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_setDefaultName(const char *name);
-
-/**
- * Fixes the backslash character mismapping.  For example, in SJIS, the backslash 
- * character in the ASCII portion is also used to represent the yen currency sign.  
- * When mapping from Unicode character 0x005C, it's unclear whether to map the 
- * character back to yen or backslash in SJIS.  This function will take the input
- * buffer and replace all the yen sign characters with backslash.  This is necessary
- * when the user tries to open a file with the input buffer on Windows.
- * This function will test the converter to see whether such mapping is
- * required.  You can sometimes avoid using this function by using the correct version
- * of Shift-JIS.
- *
- * @param cnv The converter representing the target codepage.
- * @param source the input buffer to be fixed
- * @param sourceLen the length of the input buffer
- * @see ucnv_isAmbiguous
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen);
-
-/**
- * Determines if the converter contains ambiguous mappings of the same
- * character or not.
- * @param cnv the converter to be tested
- * @return TRUE if the converter contains ambiguous mapping of the same 
- * character, FALSE otherwise.
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2
-ucnv_isAmbiguous(const UConverter *cnv);
-
-/**
- * Sets the converter to use fallback mappings or not.
- * Regardless of this flag, the converter will always use
- * fallbacks from Unicode Private Use code points, as well as
- * reverse fallbacks (to Unicode).
- * For details see ".ucm File Format"
- * in the Conversion Data chapter of the ICU User Guide:
- * http://www.icu-project.org/userguide/conversion-data.html#ucmformat
- *
- * @param cnv The converter to set the fallback mapping usage on.
- * @param usesFallback TRUE if the user wants the converter to take advantage of the fallback 
- * mapping, FALSE otherwise.
- * @stable ICU 2.0
- * @see ucnv_usesFallback
- */
-U_STABLE void U_EXPORT2 
-ucnv_setFallback(UConverter *cnv, UBool usesFallback);
-
-/**
- * Determines if the converter uses fallback mappings or not.
- * This flag has restrictions, see ucnv_setFallback().
- *
- * @param cnv The converter to be tested
- * @return TRUE if the converter uses fallback, FALSE otherwise.
- * @stable ICU 2.0
- * @see ucnv_setFallback
- */
-U_STABLE UBool U_EXPORT2 
-ucnv_usesFallback(const UConverter *cnv);
-
-/**
- * Detects Unicode signature byte sequences at the start of the byte stream
- * and returns the charset name of the indicated Unicode charset.
- * NULL is returned when no Unicode signature is recognized.
- * The number of bytes in the signature is output as well.
- *
- * The caller can ucnv_open() a converter using the charset name.
- * The first code unit (UChar) from the start of the stream will be U+FEFF
- * (the Unicode BOM/signature character) and can usually be ignored.
- *
- * For most Unicode charsets it is also possible to ignore the indicated
- * number of initial stream bytes and start converting after them.
- * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which
- * this will not work. Therefore, it is best to ignore the first output UChar
- * instead of the input signature bytes.
- * <p>
- * Usage:
- * @code     
- *      UErrorCode err = U_ZERO_ERROR;
- *      char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
- *      int32_t signatureLength = 0;
- *      char *encoding = ucnv_detectUnicodeSignature(input,sizeof(input),&signatureLength,&err);
- *      UConverter *conv = NULL;
- *      UChar output[100];
- *      UChar *target = output, *out;
- *      char *source = input;
- *      if(encoding!=NULL && U_SUCCESS(err)){
- *          // should signature be discarded ?
- *          conv = ucnv_open(encoding, &err);
- *          // do the conversion
- *          ucnv_toUnicode(conv,
- *                         target, output + sizeof(output)/U_SIZEOF_UCHAR,
- *                         source, input + sizeof(input),
- *                         NULL, TRUE, &err);
- *          out = output;
- *          if (discardSignature){
- *              ++out; // ignore initial U+FEFF
- *          }
- *          while(out != target) {
- *              printf("%04x ", *out++);
- *          }
- *          puts("");
- *      }
- *     
- * @endcode
- *
- * @param source            The source string in which the signature should be detected.
- * @param sourceLength      Length of the input string, or -1 if terminated with a NUL byte.
- * @param signatureLength   A pointer to int32_t to receive the number of bytes that make up the signature 
- *                          of the detected UTF. 0 if not detected.
- *                          Can be a NULL pointer.
- * @param pErrorCode        ICU error code in/out parameter.
- *                          Must fulfill U_SUCCESS before the function call.
- * @return The name of the encoding detected. NULL if encoding is not detected. 
- * @stable ICU 2.4
- */
-U_STABLE const char* U_EXPORT2
-ucnv_detectUnicodeSignature(const char* source,
-                            int32_t sourceLength,
-                            int32_t *signatureLength,
-                            UErrorCode *pErrorCode);
-
-/**
- * Returns the number of UChars held in the converter's internal state 
- * because more input is needed for completing the conversion. This function is 
- * useful for mapping semantics of ICU's converter interface to those of iconv,
- * and this information is not needed for normal conversion.
- * @param cnv       The converter in which the input is held
- * @param status    ICU error code in/out parameter.
- *                  Must fulfill U_SUCCESS before the function call.
- * @return The number of UChars in the state. -1 if an error is encountered.
- * @stable ICU 3.4
- */
-U_STABLE int32_t U_EXPORT2
-ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status);
-
-/**
- * Returns the number of chars held in the converter's internal state
- * because more input is needed for completing the conversion. This function is 
- * useful for mapping semantics of ICU's converter interface to those of iconv,
- * and this information is not needed for normal conversion.
- * @param cnv       The converter in which the input is held as internal state
- * @param status    ICU error code in/out parameter.
- *                  Must fulfill U_SUCCESS before the function call.
- * @return The number of chars in the state. -1 if an error is encountered.
- * @stable ICU 3.4
- */
-U_STABLE int32_t U_EXPORT2
-ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status);
-
-#endif
-
-#endif
-/*_UCNV*/

Copied: MacRuby/trunk/icu-1060/unicode/ucnv.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ucnv.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ucnv.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ucnv.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1967 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+ *  ucnv.h:
+ *  External APIs for the ICU's codeset conversion library
+ *  Bertrand A. Damiba
+ *
+ * Modification History:
+ *
+ *   Date        Name        Description
+ *   04/04/99    helena      Fixed internal header inclusion.
+ *   05/11/00    helena      Added setFallback and usesFallback APIs.
+ *   06/29/2000  helena      Major rewrite of the callback APIs.
+ *   12/07/2000  srl         Update of documentation
+ */
+
+/**
+ * \file
+ * \brief C API: Character conversion 
+ *
+ * <h2>Character Conversion C API</h2>
+ *
+ * <p>This API is used to convert codepage or character encoded data to and
+ * from UTF-16. You can open a converter with {@link ucnv_open() }. With that
+ * converter, you can get its properties, set options, convert your data and
+ * close the converter.</p>
+ *
+ * <p>Since many software programs recogize different converter names for
+ * different types of converters, there are other functions in this API to
+ * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() },
+ * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the
+ * more frequently used alias functions to get this information.</p>
+ *
+ * <p>When a converter encounters an illegal, irregular, invalid or unmappable character
+ * its default behavior is to use a substitution character to replace the
+ * bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() }
+ * or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines
+ * many other callback actions that can be used instead of a character substitution.</p>
+ *
+ * <p>More information about this API can be found in our 
+ * <a href="http://icu-project.org/userguide/conversion.html">User's
+ * Guide</a>.</p>
+ */
+
+#ifndef UCNV_H
+#define UCNV_H
+
+#include "unicode/ucnv_err.h"
+#include "unicode/uenum.h"
+
+#ifndef __USET_H__
+
+/**
+ * USet is the C API type for Unicode sets.
+ * It is forward-declared here to avoid including the header file if related
+ * conversion APIs are not used.
+ * See unicode/uset.h
+ *
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.6
+ */
+struct USet;
+/** @stable ICU 2.6 */
+typedef struct USet USet;
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+U_CDECL_BEGIN
+
+/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */
+#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
+/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */
+#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
+
+/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
+#define  UCNV_SI 0x0F
+/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
+#define  UCNV_SO 0x0E
+
+/**
+ * Enum for specifying basic types of converters
+ * @see ucnv_getType
+ * @stable ICU 2.0
+ */
+typedef enum {
+    UCNV_UNSUPPORTED_CONVERTER = -1,
+    UCNV_SBCS = 0,
+    UCNV_DBCS = 1,
+    UCNV_MBCS = 2,
+    UCNV_LATIN_1 = 3,
+    UCNV_UTF8 = 4,
+    UCNV_UTF16_BigEndian = 5,
+    UCNV_UTF16_LittleEndian = 6,
+    UCNV_UTF32_BigEndian = 7,
+    UCNV_UTF32_LittleEndian = 8,
+    UCNV_EBCDIC_STATEFUL = 9,
+    UCNV_ISO_2022 = 10,
+
+    UCNV_LMBCS_1 = 11,
+    UCNV_LMBCS_2, 
+    UCNV_LMBCS_3,
+    UCNV_LMBCS_4,
+    UCNV_LMBCS_5,
+    UCNV_LMBCS_6,
+    UCNV_LMBCS_8,
+    UCNV_LMBCS_11,
+    UCNV_LMBCS_16,
+    UCNV_LMBCS_17,
+    UCNV_LMBCS_18,
+    UCNV_LMBCS_19,
+    UCNV_LMBCS_LAST = UCNV_LMBCS_19,
+    UCNV_HZ,
+    UCNV_SCSU,
+    UCNV_ISCII,
+    UCNV_US_ASCII,
+    UCNV_UTF7,
+    UCNV_BOCU1,
+    UCNV_UTF16,
+    UCNV_UTF32,
+    UCNV_CESU8,
+    UCNV_IMAP_MAILBOX,
+
+    /* Number of converter types for which we have conversion routines. */
+    UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
+
+} UConverterType;
+
+/**
+ * Enum for specifying which platform a converter ID refers to.
+ * The use of platform/CCSID is not recommended. See ucnv_openCCSID().
+ *
+ * @see ucnv_getPlatform
+ * @see ucnv_openCCSID
+ * @see ucnv_getCCSID
+ * @stable ICU 2.0
+ */
+typedef enum {
+    UCNV_UNKNOWN = -1,
+    UCNV_IBM = 0
+} UConverterPlatform;
+
+/**
+ * Function pointer for error callback in the codepage to unicode direction.
+ * Called when an error has occured in conversion to unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param pErrorCode    ICU error code in/out parameter.
+ *                      For converter callback functions, set to a conversion error
+ *                      before the call, and the callback may reset it to U_ZERO_ERROR.
+ * @see ucnv_setToUCallBack
+ * @see UConverterToUnicodeArgs
+ * @stable ICU 2.0
+ */
+typedef void (U_EXPORT2 *UConverterToUCallback) (
+                  const void* context,
+                  UConverterToUnicodeArgs *args,
+                  const char *codeUnits,
+                  int32_t length,
+                  UConverterCallbackReason reason,
+                  UErrorCode *pErrorCode);
+
+/**
+ * Function pointer for error callback in the unicode to codepage direction.
+ * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param pErrorCode    ICU error code in/out parameter.
+ *                      For converter callback functions, set to a conversion error
+ *                      before the call, and the callback may reset it to U_ZERO_ERROR.
+ * @see ucnv_setFromUCallBack
+ * @stable ICU 2.0
+ */
+typedef void (U_EXPORT2 *UConverterFromUCallback) (
+                    const void* context,
+                    UConverterFromUnicodeArgs *args,
+                    const UChar* codeUnits,
+                    int32_t length,
+                    UChar32 codePoint,
+                    UConverterCallbackReason reason,
+                    UErrorCode *pErrorCode);
+
+U_CDECL_END
+
+/**
+ * Character that separates converter names from options and options from each other.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_OPTION_SEP_CHAR ','
+
+/**
+ * String version of UCNV_OPTION_SEP_CHAR. 
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_OPTION_SEP_STRING ","
+
+/**
+ * Character that separates a converter option from its value.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_VALUE_SEP_CHAR '='
+
+/**
+ * String version of UCNV_VALUE_SEP_CHAR. 
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_VALUE_SEP_STRING "="
+
+/**
+ * Converter option for specifying a locale.
+ * For example, ucnv_open("SCSU,locale=ja", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_LOCALE_OPTION_STRING ",locale="
+
+/**
+ * Converter option for specifying a version selector (0..9) for some converters.
+ * For example, ucnv_open("UTF-7,version=1", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.4
+ */
+#define UCNV_VERSION_OPTION_STRING ",version="
+
+/**
+ * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.
+ * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on
+ * S/390 (z/OS) Unix System Services (Open Edition).
+ * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.4
+ */
+#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl"
+
+/**
+ * Do a fuzzy compare of two converter/alias names.
+ * The comparison is case-insensitive, ignores leading zeroes if they are not
+ * followed by further digits, and ignores all but letters and digits.
+ * Thus the strings "UTF-8", "utf_8", "u*T at f08" and "Utf 8" are exactly equivalent.
+ * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
+ * at http://www.unicode.org/reports/tr22/
+ *
+ * @param name1 a converter name or alias, zero-terminated
+ * @param name2 a converter name or alias, zero-terminated
+ * @return 0 if the names match, or a negative value if the name1
+ * lexically precedes name2, or a positive value if the name1
+ * lexically follows name2.
+ * @stable ICU 2.0
+ */
+U_STABLE int U_EXPORT2
+ucnv_compareNames(const char *name1, const char *name2);
+
+
+/**
+ * Creates a UConverter object with the name of a coded character set specified as a C string.
+ * The actual name will be resolved with the alias file
+ * using a case-insensitive string comparison that ignores
+ * leading zeroes and all non-alphanumeric characters.
+ * E.g., the names "UTF8", "utf-8", "u*T at f08" and "Utf 8" are all equivalent.
+ * (See also ucnv_compareNames().)
+ * If <code>NULL</code> is passed for the converter name, it will create one with the
+ * getDefaultName return value.
+ *
+ * <p>A converter name for ICU 1.5 and above may contain options
+ * like a locale specification to control the specific behavior of
+ * the newly instantiated converter.
+ * The meaning of the options depends on the particular converter.
+ * If an option is not defined for or recognized by a given converter, then it is ignored.</p>
+ *
+ * <p>Options are appended to the converter name string, with a
+ * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and
+ * also between adjacent options.</p>
+ *
+ * <p>If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p>
+ *
+ * <p>The conversion behavior and names can vary between platforms. ICU may
+ * convert some characters differently from other platforms. Details on this topic
+ * are in the <a href="http://icu-project.org/userguide/conversion.html">User's
+ * Guide</a>. Aliases starting with a "cp" prefix have no specific meaning
+ * other than its an alias starting with the letters "cp". Please do not
+ * associate any meaning to these aliases.</p>
+ *
+ * @param converterName Name of the coded character set table.
+ *          This may have options appended to the string.
+ *          IANA alias character set names, IBM CCSIDs starting with "ibm-",
+ *          Windows codepage numbers starting with "windows-" are frequently
+ *          used for this parameter. See ucnv_getAvailableName and
+ *          ucnv_getAlias for a complete list that is available.
+ *          If this parameter is NULL, the default converter will be used.
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
+ * @see ucnv_openU
+ * @see ucnv_openCCSID
+ * @see ucnv_getAvailableName
+ * @see ucnv_getAlias
+ * @see ucnv_getDefaultName
+ * @see ucnv_close
+ * @see ucnv_compareNames
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter* U_EXPORT2 
+ucnv_open(const char *converterName, UErrorCode *err);
+
+
+/**
+ * Creates a Unicode converter with the names specified as unicode string. 
+ * The name should be limited to the ASCII-7 alphanumerics range.
+ * The actual name will be resolved with the alias file
+ * using a case-insensitive string comparison that ignores
+ * leading zeroes and all non-alphanumeric characters.
+ * E.g., the names "UTF8", "utf-8", "u*T at f08" and "Utf 8" are all equivalent.
+ * (See also ucnv_compareNames().)
+ * If <TT>NULL</TT> is passed for the converter name, it will create 
+ * one with the ucnv_getDefaultName() return value.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ *
+ * <p>See ucnv_open for the complete details</p>
+ * @param name Name of the UConverter table in a zero terminated 
+ *        Unicode string
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, 
+ *        U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an 
+ *        error occured
+ * @see ucnv_open
+ * @see ucnv_openCCSID
+ * @see ucnv_close
+ * @see ucnv_compareNames
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter* U_EXPORT2 
+ucnv_openU(const UChar *name,
+           UErrorCode *err);
+
+/**
+ * Creates a UConverter object from a CCSID number and platform pair.
+ * Note that the usefulness of this function is limited to platforms with numeric
+ * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for
+ * encodings.
+ *
+ * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related.
+ * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and
+ * for some Unicode conversion tables there are multiple CCSIDs.
+ * Some "alternate" Unicode conversion tables are provided by the
+ * IBM CDRA conversion table registry.
+ * The most prominent example of a systematic modification of conversion tables that is
+ * not provided in the form of conversion table files in the repository is
+ * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all
+ * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well.
+ *
+ * Only IBM default conversion tables are accessible with ucnv_openCCSID().
+ * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated
+ * with that CCSID.
+ *
+ * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM.
+ *
+ * In summary, the use of CCSIDs and the associated API functions is not recommended.
+ *
+ * In order to open a converter with the default IBM CDRA Unicode conversion table,
+ * you can use this function or use the prefix "ibm-":
+ * \code
+ *     char name[20];
+ *     sprintf(name, "ibm-%hu", ccsid);
+ *     cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * In order to open a converter with the IBM S/390 Unix System Services variant
+ * of a Unicode/EBCDIC conversion table,
+ * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING:
+ * \code
+ *     char name[20];
+ *     sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid);
+ *     cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * In order to open a converter from a Microsoft codepage number, use the prefix "cp":
+ * \code
+ *     char name[20];
+ *     sprintf(name, "cp%hu", codepageID);
+ *     cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ *
+ * @param codepage codepage number to create
+ * @param platform the platform in which the codepage number exists
+ * @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error
+ *   occured.
+ * @see ucnv_open
+ * @see ucnv_openU
+ * @see ucnv_close
+ * @see ucnv_getCCSID
+ * @see ucnv_getPlatform
+ * @see UConverterPlatform
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter* U_EXPORT2
+ucnv_openCCSID(int32_t codepage,
+               UConverterPlatform platform,
+               UErrorCode * err);
+
+/**
+ * <p>Creates a UConverter object specified from a packageName and a converterName.</p>
+ * 
+ * <p>The packageName and converterName must point to an ICU udata object, as defined by
+ *   <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent.
+ * Typically, packageName will refer to a (.dat) file, or to a package registered with
+ * udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.</p>
+ * 
+ * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be
+ * stored in the converter cache or the alias table. The only way to open further converters
+ * is call this function multiple times, or use the ucnv_safeClone() function to clone a 
+ * 'master' converter.</p>
+ *
+ * <p>A future version of ICU may add alias table lookups and/or caching
+ * to this function.</p>
+ * 
+ * <p>Example Use:
+ *      <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code>
+ * </p>
+ *
+ * @param packageName name of the package (equivalent to 'path' in udata_open() call)
+ * @param converterName name of the data item to be used, without suffix.
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
+ * @see udata_open
+ * @see ucnv_open
+ * @see ucnv_safeClone
+ * @see ucnv_close
+ * @stable ICU 2.2
+ */
+U_STABLE UConverter* U_EXPORT2 
+ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);
+
+/**
+ * Thread safe converter cloning operation.
+ * For most efficient operation, pass in a stackBuffer (and a *pBufferSize)
+ * with at least U_CNV_SAFECLONE_BUFFERSIZE bytes of space.
+ * If the buffer size is sufficient, then the clone will use the stack buffer;
+ * otherwise, it will be allocated, and *pBufferSize will indicate
+ * the actual size. (This should not occur with U_CNV_SAFECLONE_BUFFERSIZE.)
+ *
+ * You must ucnv_close() the clone in any case.
+ *
+ * If *pBufferSize==0, (regardless of whether stackBuffer==NULL or not)
+ * then *pBufferSize will be changed to a sufficient size
+ * for cloning this converter,
+ * without actually cloning the converter ("pure pre-flighting").
+ *
+ * If *pBufferSize is greater than zero but not large enough for a stack-based
+ * clone, then the converter is cloned using newly allocated memory
+ * and *pBufferSize is changed to the necessary size.
+ *
+ * If the converter clone fits into the stack buffer but the stack buffer is not
+ * sufficiently aligned for the clone, then the clone will use an
+ * adjusted pointer and use an accordingly smaller buffer size.
+ *
+ * @param cnv converter to be cloned
+ * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. 
+ *  If buffer is not large enough, new memory will be allocated.
+ *  Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
+ * @param pBufferSize pointer to size of allocated space. pBufferSize must not be NULL.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ *  An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
+ *  is used if any allocations were necessary.
+ *  However, it is better to check if *pBufferSize grew for checking for
+ *  allocations because warning codes can be overridden by subsequent
+ *  function calls.
+ * @return pointer to the new clone
+ * @stable ICU 2.0
+ */
+U_STABLE UConverter * U_EXPORT2 
+ucnv_safeClone(const UConverter *cnv, 
+               void             *stackBuffer,
+               int32_t          *pBufferSize, 
+               UErrorCode       *status);
+
+/**
+ * \def U_CNV_SAFECLONE_BUFFERSIZE
+ * Definition of a buffer size that is designed to be large enough for
+ * converters to be cloned with ucnv_safeClone().
+ * @stable ICU 2.0
+ */
+#define U_CNV_SAFECLONE_BUFFERSIZE  1024
+
+/**
+ * Deletes the unicode converter and releases resources associated
+ * with just this instance.
+ * Does not free up shared converter tables.
+ *
+ * @param converter the converter object to be deleted
+ * @see ucnv_open
+ * @see ucnv_openU
+ * @see ucnv_openCCSID
+ * @stable ICU 2.0
+ */
+U_STABLE void  U_EXPORT2
+ucnv_close(UConverter * converter);
+
+/**
+ * Fills in the output parameter, subChars, with the substitution characters
+ * as multiple bytes.
+ * If ucnv_setSubstString() set a Unicode string because the converter is
+ * stateful, then subChars will be an empty string.
+ *
+ * @param converter the Unicode converter
+ * @param subChars the subsitution characters
+ * @param len on input the capacity of subChars, on output the number 
+ * of bytes copied to it
+ * @param  err the outgoing error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @see ucnv_setSubstString
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getSubstChars(const UConverter *converter,
+                   char *subChars,
+                   int8_t *len,
+                   UErrorCode *err);
+
+/**
+ * Sets the substitution chars when converting from unicode to a codepage. The
+ * substitution is specified as a string of 1-4 bytes, and may contain
+ * <TT>NULL</TT> bytes.
+ * The subChars must represent a single character. The caller needs to know the
+ * byte sequence of a valid character in the converter's charset.
+ * For some converters, for example some ISO 2022 variants, only single-byte
+ * substitution characters may be supported.
+ * The newer ucnv_setSubstString() function relaxes these limitations.
+ *
+ * @param converter the Unicode converter
+ * @param subChars the substitution character byte sequence we want set
+ * @param len the number of bytes in subChars
+ * @param err the error status code.  <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
+ * len is bigger than the maximum number of bytes allowed in subchars
+ * @see ucnv_setSubstString
+ * @see ucnv_getSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setSubstChars(UConverter *converter,
+                   const char *subChars,
+                   int8_t len,
+                   UErrorCode *err);
+
+/**
+ * Set a substitution string for converting from Unicode to a charset.
+ * The caller need not know the charset byte sequence for each charset.
+ *
+ * Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence
+ * for a single character, this function takes a Unicode string with
+ * zero, one or more characters, and immediately verifies that the string can be
+ * converted to the charset.
+ * If not, or if the result is too long (more than 32 bytes as of ICU 3.6),
+ * then the function returns with an error accordingly.
+ *
+ * Also unlike ucnv_setSubstChars(), this function works for stateful charsets
+ * by converting on the fly at the point of substitution rather than setting
+ * a fixed byte sequence.
+ *
+ * @param cnv The UConverter object.
+ * @param s The Unicode string.
+ * @param length The number of UChars in s, or -1 for a NUL-terminated string.
+ * @param err Pointer to a standard ICU error code. Its input value must
+ *            pass the U_SUCCESS() test, or else the function returns
+ *            immediately. Check for U_FAILURE() on output or use with
+ *            function chaining. (See User Guide for details.)
+ *
+ * @see ucnv_setSubstChars
+ * @see ucnv_getSubstChars
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ucnv_setSubstString(UConverter *cnv,
+                    const UChar *s,
+                    int32_t length,
+                    UErrorCode *err);
+
+/**
+ * Fills in the output parameter, errBytes, with the error characters from the
+ * last failing conversion.
+ *
+ * @param converter the Unicode converter
+ * @param errBytes the codepage bytes which were in error
+ * @param len on input the capacity of errBytes, on output the number of
+ *  bytes which were copied to it
+ * @param err the error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getInvalidChars(const UConverter *converter,
+                     char *errBytes,
+                     int8_t *len,
+                     UErrorCode *err);
+
+/**
+ * Fills in the output parameter, errChars, with the error characters from the
+ * last failing conversion.
+ *
+ * @param converter the Unicode converter
+ * @param errUChars the UChars which were in error
+ * @param len on input the capacity of errUChars, on output the number of 
+ *  UChars which were copied to it
+ * @param err the error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getInvalidUChars(const UConverter *converter,
+                      UChar *errUChars,
+                      int8_t *len,
+                      UErrorCode *err);
+
+/**
+ * Resets the state of a converter to the default state. This is used
+ * in the case of an error, to restart a conversion from a known default state.
+ * It will also empty the internal output buffers.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_reset(UConverter *converter);
+
+/**
+ * Resets the to-Unicode part of a converter state to the default state.
+ * This is used in the case of an error to restart a conversion to
+ * Unicode to a known default state. It will also empty the internal
+ * output buffers used for the conversion to Unicode codepoints.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucnv_resetToUnicode(UConverter *converter);
+
+/**
+ * Resets the from-Unicode part of a converter state to the default state.
+ * This is used in the case of an error to restart a conversion from
+ * Unicode to a known default state. It will also empty the internal output
+ * buffers used for the conversion from Unicode codepoints.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucnv_resetFromUnicode(UConverter *converter);
+
+/**
+ * Returns the maximum number of bytes that are output per UChar in conversion
+ * from Unicode using this converter.
+ * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING
+ * to calculate the size of a target buffer for conversion from Unicode.
+ *
+ * Note: Before ICU 2.8, this function did not return reliable numbers for
+ * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS.
+ *
+ * This number may not be the same as the maximum number of bytes per
+ * "conversion unit". In other words, it may not be the intuitively expected
+ * number of bytes per character that would be published for a charset,
+ * and may not fulfill any other purpose than the allocation of an output
+ * buffer of guaranteed sufficient size for a given input length and converter.
+ *
+ * Examples for special cases that are taken into account:
+ * - Supplementary code points may convert to more bytes than BMP code points.
+ *   This function returns bytes per UChar (UTF-16 code unit), not per
+ *   Unicode code point, for efficient buffer allocation.
+ * - State-shifting output (SI/SO, escapes, etc.) from stateful converters.
+ * - When m input UChars are converted to n output bytes, then the maximum m/n
+ *   is taken into account.
+ *
+ * The number returned here does not take into account
+ * (see UCNV_GET_MAX_BYTES_FOR_STRING):
+ * - callbacks which output more than one charset character sequence per call,
+ *   like escape callbacks
+ * - initial and final non-character bytes that are output by some converters
+ *   (automatic BOMs, initial escape sequence, final SI, etc.)
+ *
+ * Examples for returned values:
+ * - SBCS charsets: 1
+ * - Shift-JIS: 2
+ * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted)
+ * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_)
+ * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS)
+ * - ISO-2022: 3 (always outputs UTF-8)
+ * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS)
+ * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS)
+ *
+ * @param converter The Unicode converter.
+ * @return The maximum number of bytes per UChar that are output by ucnv_fromUnicode(),
+ *         to be used together with UCNV_GET_MAX_BYTES_FOR_STRING for buffer allocation.
+ *
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @see ucnv_getMinCharSize
+ * @stable ICU 2.0
+ */
+U_STABLE int8_t U_EXPORT2
+ucnv_getMaxCharSize(const UConverter *converter);
+
+/**
+ * Calculates the size of a buffer for conversion from Unicode to a charset.
+ * The calculated size is guaranteed to be sufficient for this conversion.
+ *
+ * It takes into account initial and final non-character bytes that are output
+ * by some converters.
+ * It does not take into account callbacks which output more than one charset
+ * character sequence per call, like escape callbacks.
+ * The default (substitution) callback only outputs one charset character sequence.
+ *
+ * @param length Number of UChars to be converted.
+ * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter
+ *                    that will be used.
+ * @return Size of a buffer that will be large enough to hold the output bytes of
+ *         converting length UChars with the converter that returned the maxCharSize.
+ *
+ * @see ucnv_getMaxCharSize
+ * @stable ICU 2.8
+ */
+#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
+     (((int32_t)(length)+10)*(int32_t)(maxCharSize))
+
+/**
+ * Returns the minimum byte length for characters in this codepage. 
+ * This is usually either 1 or 2.
+ * @param converter the Unicode converter
+ * @return the minimum number of bytes allowed by this particular converter
+ * @see ucnv_getMaxCharSize
+ * @stable ICU 2.0
+ */
+U_STABLE int8_t U_EXPORT2
+ucnv_getMinCharSize(const UConverter *converter);
+
+/**
+ * Returns the display name of the converter passed in based on the Locale 
+ * passed in. If the locale contains no display name, the internal ASCII
+ * name will be filled in.
+ *
+ * @param converter the Unicode converter.
+ * @param displayLocale is the specific Locale we want to localised for
+ * @param displayName user provided buffer to be filled in
+ * @param displayNameCapacity size of displayName Buffer
+ * @param err error status code
+ * @return displayNameLength number of UChar needed in displayName
+ * @see ucnv_getName
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_getDisplayName(const UConverter *converter,
+                    const char *displayLocale,
+                    UChar *displayName,
+                    int32_t displayNameCapacity,
+                    UErrorCode *err);
+
+/**
+ * Gets the internal, canonical name of the converter (zero-terminated).
+ * The lifetime of the returned string will be that of the converter 
+ * passed to this function.
+ * @param converter the Unicode converter
+ * @param err UErrorCode status
+ * @return the internal name of the converter
+ * @see ucnv_getDisplayName
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2 
+ucnv_getName(const UConverter *converter, UErrorCode *err);
+
+/**
+ * Gets a codepage number associated with the converter. This is not guaranteed
+ * to be the one used to create the converter. Some converters do not represent
+ * platform registered codepages and return zero for the codepage number.
+ * The error code fill-in parameter indicates if the codepage number
+ * is available.
+ * Does not check if the converter is <TT>NULL</TT> or if converter's data
+ * table is <TT>NULL</TT>.
+ *
+ * Important: The use of CCSIDs is not recommended because it is limited
+ * to only two platforms in principle and only one (UCNV_IBM) in the current
+ * ICU converter API.
+ * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely.
+ * For more details see ucnv_openCCSID().
+ *
+ * @param converter the Unicode converter
+ * @param err the error status code.
+ * @return If any error occurrs, -1 will be returned otherwise, the codepage number
+ * will be returned
+ * @see ucnv_openCCSID
+ * @see ucnv_getPlatform
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_getCCSID(const UConverter *converter,
+              UErrorCode *err);
+
+/**
+ * Gets a codepage platform associated with the converter. Currently, 
+ * only <TT>UCNV_IBM</TT> will be returned.
+ * Does not test if the converter is <TT>NULL</TT> or if converter's data 
+ * table is <TT>NULL</TT>. 
+ * @param converter the Unicode converter
+ * @param err the error status code.
+ * @return The codepage platform
+ * @stable ICU 2.0
+ */
+U_STABLE UConverterPlatform U_EXPORT2
+ucnv_getPlatform(const UConverter *converter,
+                 UErrorCode *err);
+
+/**
+ * Gets the type of the converter
+ * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, 
+ * EBCDIC_STATEFUL, LATIN_1
+ * @param converter a valid, opened converter
+ * @return the type of the converter
+ * @stable ICU 2.0
+ */
+U_STABLE UConverterType U_EXPORT2
+ucnv_getType(const UConverter * converter);
+
+/**
+ * Gets the "starter" (lead) bytes for converters of type MBCS.
+ * Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
+ * is not MBCS. Fills in an array of type UBool, with the value of the byte 
+ * as offset to the array. For example, if (starters[0x20] == TRUE) at return,
+ * it means that the byte 0x20 is a starter byte in this converter.
+ * Context pointers are always owned by the caller.
+ * 
+ * @param converter a valid, opened converter of type MBCS
+ * @param starters an array of size 256 to be filled in
+ * @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the 
+ * converter is not a type which can return starters.
+ * @see ucnv_getType
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getStarters(const UConverter* converter, 
+                 UBool starters[256],
+                 UErrorCode* err);
+
+
+/**
+ * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet().
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.6
+ */
+typedef enum UConverterUnicodeSet {
+    /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
+    UCNV_ROUNDTRIP_SET,
+    /** Select the set of Unicode code points with roundtrip or fallback mappings. @draft ICU 4.0 */
+    UCNV_ROUNDTRIP_AND_FALLBACK_SET,
+    /** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */
+    UCNV_SET_COUNT
+} UConverterUnicodeSet;
+
+
+/**
+ * Returns the set of Unicode code points that can be converted by an ICU converter.
+ *
+ * Returns one of several kinds of set:
+ *
+ * 1. UCNV_ROUNDTRIP_SET
+ *
+ * The set of all Unicode code points that can be roundtrip-converted
+ * (converted without any data loss) with the converter (ucnv_fromUnicode()).
+ * This set will not include code points that have fallback mappings
+ * or are only the result of reverse fallback mappings.
+ * This set will also not include PUA code points with fallbacks, although
+ * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback().
+ * See UTR #22 "Character Mapping Markup Language"
+ * at http://www.unicode.org/reports/tr22/
+ *
+ * This is useful for example for
+ * - checking that a string or document can be roundtrip-converted with a converter,
+ *   without/before actually performing the conversion
+ * - testing if a converter can be used for text for typical text for a certain locale,
+ *   by comparing its roundtrip set with the set of ExemplarCharacters from
+ *   ICU's locale data or other sources
+ *
+ * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET
+ *
+ * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode())
+ * when fallbacks are turned on (see ucnv_setFallback()).
+ * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks).
+ *
+ * In the future, there may be more UConverterUnicodeSet choices to select
+ * sets with different properties.
+ *
+ * @param cnv The converter for which a set is requested.
+ * @param setFillIn A valid USet *. It will be cleared by this function before
+ *            the converter's specific set is filled into the USet.
+ * @param whichSet A UConverterUnicodeSet selector;
+ *              currently UCNV_ROUNDTRIP_SET is the only supported value.
+ * @param pErrorCode ICU error code in/out parameter.
+ *                   Must fulfill U_SUCCESS before the function call.
+ *
+ * @see UConverterUnicodeSet
+ * @see uset_open
+ * @see uset_close
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+ucnv_getUnicodeSet(const UConverter *cnv,
+                   USet *setFillIn,
+                   UConverterUnicodeSet whichSet,
+                   UErrorCode *pErrorCode);
+
+/**
+ * Gets the current calback function used by the converter when an illegal
+ *  or invalid codepage sequence is found. 
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter the unicode converter
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getToUCallBack (const UConverter * converter,
+                     UConverterToUCallback *action,
+                     const void **context);
+
+/**
+ * Gets the current callback function used by the converter when illegal 
+ * or invalid Unicode sequence is found.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter the unicode converter
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
+ * @see ucnv_setFromUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_getFromUCallBack (const UConverter * converter,
+                       UConverterFromUCallback *action,
+                       const void **context);
+
+/**
+ * Changes the callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
+ * Predefined actions and contexts can be found in the ucnv_err.h header.
+ *
+ * @param converter the unicode converter
+ * @param newAction the new callback function
+ * @param newContext the new toUnicode callback context pointer. This can be NULL.
+ * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
+ * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
+ * @param err The error code status
+ * @see ucnv_getToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setToUCallBack (UConverter * converter,
+                     UConverterToUCallback newAction,
+                     const void* newContext,
+                     UConverterToUCallback *oldAction,
+                     const void** oldContext,
+                     UErrorCode * err);
+
+/**
+ * Changes the current callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
+ * Predefined actions and contexts can be found in the ucnv_err.h header.
+ *
+ * @param converter the unicode converter
+ * @param newAction the new callback function
+ * @param newContext the new fromUnicode callback context pointer. This can be NULL.
+ * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
+ * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
+ * @param err The error code status
+ * @see ucnv_getFromUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setFromUCallBack (UConverter * converter,
+                       UConverterFromUCallback newAction,
+                       const void *newContext,
+                       UConverterFromUCallback *oldAction,
+                       const void **oldContext,
+                       UErrorCode * err);
+
+/**
+ * Converts an array of unicode characters to an array of codepage
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ * 
+ * The source pointer is an in/out parameter. It starts out pointing where the 
+ * conversion is to begin, and ends up pointing after the last UChar consumed. 
+ * 
+ * Target similarly starts out pointer at the first available byte in the output
+ * buffer, and ends up pointing after the last byte written to the output.
+ * 
+ * The converter always attempts to consume the entire source buffer, unless 
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function.  When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ *  consumed. At that point, the caller should reset the source and
+ *  sourceLimit pointers to point to the next chunk.
+ * 
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set.
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ * 
+ * This is a <I>stateful</I> conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source  pointers
+ *  with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ *  codepage characters to. Output : points to after the last codepage character copied
+ *  to <TT>target</TT>.
+ * @param targetLimit the pointer just after last of the <TT>target</TT> buffer
+ * @param source I/O parameter, pointer to pointer to the source Unicode character buffer. 
+ * @param sourceLimit the pointer just after the last of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
+ * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks)  -1 will be placed for offsets. 
+ * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available
+ * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until
+ * the source buffer is consumed.
+ * @param err the error status.  <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
+ * converter is <TT>NULL</TT>.
+ * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is 
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucnv_fromUnicode (UConverter * converter,
+                  char **target,
+                  const char *targetLimit,
+                  const UChar ** source,
+                  const UChar * sourceLimit,
+                  int32_t* offsets,
+                  UBool flush,
+                  UErrorCode * err);
+
+/**
+ * Converts a buffer of codepage bytes into an array of unicode UChars
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ * 
+ * The source pointer is an in/out parameter. It starts out pointing where the 
+ * conversion is to begin, and ends up pointing after the last byte of source consumed. 
+ * 
+ * Target similarly starts out pointer at the first available UChar in the output
+ * buffer, and ends up pointing after the last UChar written to the output. 
+ * It does NOT necessarily keep UChar sequences together.
+ * 
+ * The converter always attempts to consume the entire source buffer, unless 
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function.  When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ *  consumed. At that point, the caller should reset the source and
+ *  sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ * 
+ * This is a <I>stateful</I> conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source  pointers
+ *  with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ *  UChars into. Output : points to after the last UChar copied.
+ * @param targetLimit the pointer just after the end of the <TT>target</TT> buffer
+ * @param source I/O parameter, pointer to pointer to the source codepage buffer. 
+ * @param sourceLimit the pointer to the byte after the end of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
+ * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks)  -1 will be placed for offsets. 
+ * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available
+ * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until
+ * the source buffer is consumed.
+ * @param err the error status.  <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
+ * converter is <TT>NULL</TT>.
+ * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is 
+ * still data to be written to the target. 
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setFromUCallBack
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucnv_toUnicode(UConverter *converter,
+               UChar **target,
+               const UChar *targetLimit,
+               const char **source,
+               const char *sourceLimit,
+               int32_t *offsets,
+               UBool flush,
+               UErrorCode *err);
+
+/**
+ * Convert the Unicode string into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_fromUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
+ *
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
+ * @param src the input Unicode string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of chars available at dest
+ * @param pErrorCode normal ICU error code;
+ *                  common error codes that may be set by this function include
+ *                  U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ *                  U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ *         if the length is greater than destCapacity, then the string will not fit
+ *         and a buffer of the indicated length would need to be passed in
+ * @see ucnv_fromUnicode
+ * @see ucnv_convert
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromUChars(UConverter *cnv,
+                char *dest, int32_t destCapacity,
+                const UChar *src, int32_t srcLength,
+                UErrorCode *pErrorCode);
+
+/**
+ * Convert the codepage string into a Unicode string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_toUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * 2*srcLength (each char may be converted into a surrogate pair).
+ *
+ * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called)
+ * @param src the input codepage string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of UChars available at dest
+ * @param pErrorCode normal ICU error code;
+ *                  common error codes that may be set by this function include
+ *                  U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ *                  U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ *         if the length is greater than destCapacity, then the string will not fit
+ *         and a buffer of the indicated length would need to be passed in
+ * @see ucnv_toUnicode
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toUChars(UConverter *cnv,
+              UChar *dest, int32_t destCapacity,
+              const char *src, int32_t srcLength,
+              UErrorCode *pErrorCode);
+
+/**
+ * Convert a codepage buffer into Unicode one character at a time.
+ * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
+ *
+ * Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
+ * - Faster for small amounts of data, for most converters, e.g.,
+ *   US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
+ *   (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
+ *    it uses ucnv_toUnicode() internally.)
+ * - Convenient.
+ *
+ * Limitations compared to ucnv_toUnicode():
+ * - Always assumes flush=TRUE.
+ *   This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
+ *   that is, for where the input is supplied in multiple buffers,
+ *   because ucnv_getNextUChar() will assume the end of the input at the end
+ *   of the first buffer.
+ * - Does not provide offset output.
+ *
+ * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
+ * ucnv_getNextUChar() uses the current state of the converter
+ * (unlike ucnv_toUChars() which always resets first).
+ * However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
+ * stopped in the middle of a character sequence (with flush=FALSE),
+ * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
+ * internally until the next character boundary.
+ * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
+ * start at a character boundary.)
+ *
+ * Instead of using ucnv_getNextUChar(), it is recommended
+ * to convert using ucnv_toUnicode() or ucnv_toUChars()
+ * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
+ * or a C++ CharacterIterator or similar.
+ * This allows streaming conversion and offset output, for example.
+ *
+ * <p>Handling of surrogate pairs and supplementary-plane code points:<br>
+ * There are two different kinds of codepages that provide mappings for surrogate characters:
+ * <ul>
+ *   <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode
+ *       code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff.
+ *       Each valid sequence will result in exactly one returned code point.
+ *       If a sequence results in a single surrogate, then that will be returned
+ *       by itself, even if a neighboring sequence encodes the matching surrogate.</li>
+ *   <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points
+ *       including surrogates. Code points in supplementary planes are represented with
+ *       two sequences, each encoding a surrogate.
+ *       For these codepages, matching pairs of surrogates will be combined into single
+ *       code points for returning from this function.
+ *       (Note that SCSU is actually a mix of these codepage types.)</li>
+ * </ul></p>
+ *
+ * @param converter an open UConverter
+ * @param source the address of a pointer to the codepage buffer, will be
+ *  updated to point after the bytes consumed in the conversion call.
+ * @param sourceLimit points to the end of the input buffer
+ * @param err fills in error status (see ucnv_toUnicode)
+ * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input 
+ * is empty or does not convert to any output (e.g.: pure state-change 
+ * codes SI/SO, escape sequences for ISO 2022,
+ * or if the callback did not output anything, ...).
+ * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because
+ *  the "buffer" is the return code. However, there might be subsequent output
+ *  stored in the converter object
+ * that will be returned in following calls to this function.
+ * @return a UChar32 resulting from the partial conversion of source
+ * @see ucnv_toUnicode
+ * @see ucnv_toUChars
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+ucnv_getNextUChar(UConverter * converter,
+                  const char **source,
+                  const char * sourceLimit,
+                  UErrorCode * err);
+
+/**
+ * Convert from one external charset to another using two existing UConverters.
+ * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
+ * are used, "pivoting" through 16-bit Unicode.
+ *
+ * Important: For streaming conversion (multiple function calls for successive
+ * parts of a text stream), the caller must provide a pivot buffer explicitly,
+ * and must preserve the pivot buffer and associated pointers from one
+ * call to another. (The buffer may be moved if its contents and the relative
+ * pointer positions are preserved.)
+ *
+ * There is a similar function, ucnv_convert(),
+ * which has the following limitations:
+ * - it takes charset names, not converter objects, so that
+ *   - two converters are opened for each call
+ *   - only single-string conversion is possible, not streaming operation
+ * - it does not provide enough information to find out,
+ *   in case of failure, whether the toUnicode or
+ *   the fromUnicode conversion failed
+ *
+ * By contrast, ucnv_convertEx()
+ * - takes UConverter parameters instead of charset names
+ * - fully exposes the pivot buffer for streaming conversion and complete error handling
+ *
+ * ucnv_convertEx() also provides further convenience:
+ * - an option to reset the converters at the beginning
+ *   (if reset==TRUE, see parameters;
+ *    also sets *pivotTarget=*pivotSource=pivotStart)
+ * - allow NUL-terminated input
+ *   (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ *   (if sourceLimit==NULL, see parameters)
+ * - terminate with a NUL on output
+ *   (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ *   or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ *   the target buffer
+ * - the pivot buffer can be provided internally;
+ *   possible only for whole-string conversion, not streaming conversion;
+ *   in this case, the caller will not be able to get details about where an
+ *   error occurred
+ *   (if pivotStart==NULL, see below)
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ * - a conversion error occurred
+ *   (other U_FAILURE(), see description of pErrorCode)
+ *
+ * Limitation compared to the direct use of
+ * ucnv_fromUnicode() and ucnv_toUnicode():
+ * ucnv_convertEx() does not provide offset information.
+ *
+ * Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
+ * ucnv_convertEx() does not support preflighting directly.
+ *
+ * Sample code for converting a single string from
+ * one external charset to UTF-8, ignoring the location of errors:
+ *
+ * \code
+ * int32_t
+ * myToUTF8(UConverter *cnv,
+ *          const char *s, int32_t length,
+ *          char *u8, int32_t capacity,
+ *          UErrorCode *pErrorCode) {
+ *     UConverter *utf8Cnv;
+ *     char *target;
+ *
+ *     if(U_FAILURE(*pErrorCode)) {
+ *         return 0;
+ *     }
+ *
+ *     utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
+ *     if(U_FAILURE(*pErrorCode)) {
+ *         return 0;
+ *     }
+ *
+ *     if(length<0) {
+ *         length=strlen(s);
+ *     }
+ *     target=u8;
+ *     ucnv_convertEx(cnv, utf8Cnv,
+ *                    &target, u8+capacity,
+ *                    &s, s+length,
+ *                    NULL, NULL, NULL, NULL,
+ *                    TRUE, TRUE,
+ *                    pErrorCode);
+ * 
+ *     myReleaseCachedUTF8Converter(utf8Cnv);
+ *
+ *     // return the output string length, but without preflighting
+ *     return (int32_t)(target-u8);
+ * }
+ * \endcode
+ *
+ * @param targetCnv     Output converter, used to convert from the UTF-16 pivot
+ *                      to the target using ucnv_fromUnicode().
+ * @param sourceCnv     Input converter, used to convert from the source to
+ *                      the UTF-16 pivot using ucnv_toUnicode().
+ * @param target        I/O parameter, same as for ucnv_fromUChars().
+ *                      Input: *target points to the beginning of the target buffer.
+ *                      Output: *target points to the first unit after the last char written.
+ * @param targetLimit   Pointer to the first unit after the target buffer.
+ * @param source        I/O parameter, same as for ucnv_toUChars().
+ *                      Input: *source points to the beginning of the source buffer.
+ *                      Output: *source points to the first unit after the last char read.
+ * @param sourceLimit   Pointer to the first unit after the source buffer.
+ * @param pivotStart    Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
+ *                      then an internal buffer is used and the other pivot
+ *                      arguments are ignored and can be NULL as well.
+ * @param pivotSource   I/O parameter, same as source in ucnv_fromUChars() for
+ *                      conversion from the pivot buffer to the target buffer.
+ * @param pivotTarget   I/O parameter, same as target in ucnv_toUChars() for
+ *                      conversion from the source buffer to the pivot buffer.
+ *                      It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
+ *                      and pivotStart<pivotLimit (unless pivotStart==NULL).
+ * @param pivotLimit    Pointer to the first unit after the pivot buffer.
+ * @param reset         If TRUE, then ucnv_resetToUnicode(sourceCnv) and
+ *                      ucnv_resetFromUnicode(targetCnv) are called, and the
+ *                      pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart).
+ * @param flush         If true, indicates the end of the input.
+ *                      Passed directly to ucnv_toUnicode(), and carried over to
+ *                      ucnv_fromUnicode() when the source is empty as well.
+ * @param pErrorCode    ICU error code in/out parameter.
+ *                      Must fulfill U_SUCCESS before the function call.
+ *                      U_BUFFER_OVERFLOW_ERROR always refers to the target buffer
+ *                      because overflows into the pivot buffer are handled internally.
+ *                      Other conversion errors are from the source-to-pivot
+ *                      conversion if *pivotSource==pivotStart, otherwise from
+ *                      the pivot-to-target conversion.
+ *
+ * @see ucnv_convert
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_toAlgorithmic
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
+               char **target, const char *targetLimit,
+               const char **source, const char *sourceLimit,
+               UChar *pivotStart, UChar **pivotSource,
+               UChar **pivotTarget, const UChar *pivotLimit,
+               UBool reset, UBool flush,
+               UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, two converters are opened according to the name arguments,
+ * then the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(), then the converters are closed again.
+ *
+ * This is a convenience function, not an efficient way to convert a lot of text:
+ * ucnv_convert()
+ * - takes charset names, not converter objects, so that
+ *   - two converters are opened for each call
+ *   - only single-string conversion is possible, not streaming operation
+ * - does not provide enough information to find out,
+ *   in case of failure, whether the toUnicode or
+ *   the fromUnicode conversion failed
+ * - allows NUL-terminated input
+ *   (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ *   (if sourceLength==-1, see parameters)
+ * - terminate with a NUL on output
+ *   (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ *   or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ *   the target buffer
+ * - a pivot buffer is provided internally
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ *   and either the target buffer is terminated with a single NUL byte
+ *   or the error code is set to U_STRING_NOT_TERMINATED_WARNING
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ *   and the full output string length is returned ("preflighting")
+ * - a conversion error occurred
+ *   (other U_FAILURE(), see description of pErrorCode)
+ *
+ * @param toConverterName   The name of the converter that is used to convert
+ *                          from the UTF-16 pivot buffer to the target.
+ * @param fromConverterName The name of the converter that is used to convert
+ *                          from the source to the UTF-16 pivot buffer.
+ * @param target            Pointer to the output buffer.
+ * @param targetCapacity    Capacity of the target, in bytes.
+ * @param source            Pointer to the input buffer.
+ * @param sourceLength      Length of the input text, in bytes, or -1 for NUL-terminated input.
+ * @param pErrorCode        ICU error code in/out parameter.
+ *                          Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ *         and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_convertEx
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_toAlgorithmic
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_convert(const char *toConverterName,
+             const char *fromConverterName,
+             char *target,
+             int32_t targetCapacity,
+             const char *source,
+             int32_t sourceLength,
+             UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert()
+ * except that the two converters need not be looked up and opened completely.
+ *
+ * The source-to-pivot conversion uses the cnv converter parameter.
+ * The pivot-to-target conversion uses a purely algorithmic converter
+ * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
+ *
+ * Internally, the algorithmic converter is opened and closed for each
+ * function call, which is more efficient than using the public ucnv_open()
+ * but somewhat less efficient than only resetting an existing converter
+ * and using ucnv_convertEx().
+ *
+ * This function is more convenient than ucnv_convertEx() for single-string
+ * conversions, especially when "preflighting" is desired (returning the length
+ * of the complete output even if it does not fit into the target buffer;
+ * see the User Guide Strings chapter). See ucnv_convert() for details.
+ *
+ * @param algorithmicType   UConverterType constant identifying the desired target
+ *                          charset as a purely algorithmic converter.
+ *                          Those are converters for Unicode charsets like
+ *                          UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
+ *                          as well as US-ASCII and ISO-8859-1.
+ * @param cnv               The converter that is used to convert
+ *                          from the source to the UTF-16 pivot buffer.
+ * @param target            Pointer to the output buffer.
+ * @param targetCapacity    Capacity of the target, in bytes.
+ * @param source            Pointer to the input buffer.
+ * @param sourceLength      Length of the input text, in bytes
+ * @param pErrorCode        ICU error code in/out parameter.
+ *                          Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ *         and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_convert
+ * @see ucnv_convertEx
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toAlgorithmic(UConverterType algorithmicType,
+                   UConverter *cnv,
+                   char *target, int32_t targetCapacity,
+                   const char *source, int32_t sourceLength,
+                   UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert()
+ * except that the two converters need not be looked up and opened completely.
+ *
+ * The source-to-pivot conversion uses a purely algorithmic converter
+ * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
+ * The pivot-to-target conversion uses the cnv converter parameter.
+ *
+ * Internally, the algorithmic converter is opened and closed for each
+ * function call, which is more efficient than using the public ucnv_open()
+ * but somewhat less efficient than only resetting an existing converter
+ * and using ucnv_convertEx().
+ *
+ * This function is more convenient than ucnv_convertEx() for single-string
+ * conversions, especially when "preflighting" is desired (returning the length
+ * of the complete output even if it does not fit into the target buffer;
+ * see the User Guide Strings chapter). See ucnv_convert() for details.
+ *
+ * @param cnv               The converter that is used to convert
+ *                          from the UTF-16 pivot buffer to the target.
+ * @param algorithmicType   UConverterType constant identifying the desired source
+ *                          charset as a purely algorithmic converter.
+ *                          Those are converters for Unicode charsets like
+ *                          UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
+ *                          as well as US-ASCII and ISO-8859-1.
+ * @param target            Pointer to the output buffer.
+ * @param targetCapacity    Capacity of the target, in bytes.
+ * @param source            Pointer to the input buffer.
+ * @param sourceLength      Length of the input text, in bytes
+ * @param pErrorCode        ICU error code in/out parameter.
+ *                          Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ *         and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_convert
+ * @see ucnv_convertEx
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromAlgorithmic(UConverter *cnv,
+                     UConverterType algorithmicType,
+                     char *target, int32_t targetCapacity,
+                     const char *source, int32_t sourceLength,
+                     UErrorCode *pErrorCode);
+
+/**
+ * Frees up memory occupied by unused, cached converter shared data.
+ *
+ * @return the number of cached converters successfully deleted
+ * @see ucnv_close
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_flushCache(void);
+
+/**
+ * Returns the number of available converters, as per the alias file.
+ *
+ * @return the number of available converters
+ * @see ucnv_getAvailableName
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_countAvailable(void);
+
+/**
+ * Gets the canonical converter name of the specified converter from a list of
+ * all available converters contaied in the alias file. All converters
+ * in this list can be opened.
+ *
+ * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>)
+ * @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
+ * @see ucnv_countAvailable
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+ucnv_getAvailableName(int32_t n);
+
+/**
+ * Returns a UEnumeration to enumerate all of the canonical converter
+ * names, as per the alias file, regardless of the ability to open each
+ * converter.
+ *
+ * @return A UEnumeration object for getting all the recognized canonical
+ *   converter names.
+ * @see ucnv_getAvailableName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.4
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucnv_openAllNames(UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of aliases for a given converter or alias name.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * This method only enumerates the listed entries in the alias file.
+ * @param alias alias name
+ * @param pErrorCode error status
+ * @return number of names on alias list for given alias
+ * @stable ICU 2.0
+ */
+U_STABLE uint16_t U_EXPORT2 
+ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
+
+/**
+ * Gives the name of the alias at given index of alias list.
+ * This method only enumerates the listed entries in the alias file.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param alias alias name
+ * @param n index in alias list
+ * @param pErrorCode result of operation
+ * @return returns the name of the alias at given index
+ * @see ucnv_countAliases
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2 
+ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Fill-up the list of alias names for the given alias.
+ * This method only enumerates the listed entries in the alias file.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param alias alias name
+ * @param aliases fill-in list, aliases is a pointer to an array of
+ *        <code>ucnv_countAliases()</code> string-pointers
+ *        (<code>const char *</code>) that will be filled in.
+ *        The strings themselves are owned by the library.
+ * @param pErrorCode result of operation
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
+
+/**
+ * Return a new UEnumeration object for enumerating all the
+ * alias names for a given converter that are recognized by a standard.
+ * This method only enumerates the listed entries in the alias file.
+ * The convrtrs.txt file can be modified to change the results of
+ * this function.
+ * The first result in this list is the same result given by
+ * <code>ucnv_getStandardName</code>, which is the default alias for
+ * the specified standard name. The returned object must be closed with
+ * <code>uenum_close</code> when you are done with the object.
+ *
+ * @param convName original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ *      are such standards
+ * @param pErrorCode The error code
+ * @return A UEnumeration object for getting all aliases that are recognized
+ *      by a standard. If any of the parameters are invalid, NULL
+ *      is returned.
+ * @see ucnv_getStandardName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.2
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucnv_openStandardNames(const char *convName,
+                       const char *standard,
+                       UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of standards associated to converter names.
+ * @return number of standards
+ * @stable ICU 2.0
+ */
+U_STABLE uint16_t U_EXPORT2
+ucnv_countStandards(void);
+
+/**
+ * Gives the name of the standard at given index of standard list.
+ * @param n index in standard list
+ * @param pErrorCode result of operation
+ * @return returns the name of the standard at given index. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Returns a standard name for a given converter name.
+ * <p>
+ * Example alias table:<br>
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ * <p>
+ * Result of ucnv_getStandardName("conv", "STANDARD1") from example
+ * alias table:<br>
+ * <b>"alias2"</b>
+ *
+ * @param name original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ *        are such standards
+ * @param pErrorCode result of operation
+ * @return returns the standard converter name;
+ *         if a standard converter name cannot be determined,
+ *         then <code>NULL</code> is returned. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * This function will return the internal canonical converter name of the
+ * tagged alias. This is the opposite of ucnv_openStandardNames, which
+ * returns the tagged alias given the canonical name.
+ * <p>
+ * Example alias table:<br>
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ * <p>
+ * Result of ucnv_getStandardName("alias1", "STANDARD1") from example
+ * alias table:<br>
+ * <b>"conv"</b>
+ *
+ * @return returns the canonical converter name;
+ *         if a standard or alias name cannot be determined,
+ *         then <code>NULL</code> is returned. The returned string is
+ *         owned by the library.
+ * @see ucnv_getStandardName
+ * @stable ICU 2.4
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * Returns the current default converter name. If you want to open
+ * a default converter, you do not need to use this function.
+ * It is faster if you pass a NULL argument to ucnv_open the
+ * default converter.
+ *
+ * @return returns the current default converter name.
+ *         Storage owned by the library
+ * @see ucnv_setDefaultName
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+ucnv_getDefaultName(void);
+
+/**
+ * This function is not thread safe. DO NOT call this function when ANY ICU
+ * function is being used from more than one thread! This function sets the
+ * current default converter name. If this function needs to be called, it
+ * should be called during application initialization. Most of the time, the
+ * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument
+ * is sufficient for your application.
+ * @param name the converter name to be the default (must be known by ICU).
+ * @see ucnv_getDefaultName
+ * @system
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_setDefaultName(const char *name);
+
+/**
+ * Fixes the backslash character mismapping.  For example, in SJIS, the backslash 
+ * character in the ASCII portion is also used to represent the yen currency sign.  
+ * When mapping from Unicode character 0x005C, it's unclear whether to map the 
+ * character back to yen or backslash in SJIS.  This function will take the input
+ * buffer and replace all the yen sign characters with backslash.  This is necessary
+ * when the user tries to open a file with the input buffer on Windows.
+ * This function will test the converter to see whether such mapping is
+ * required.  You can sometimes avoid using this function by using the correct version
+ * of Shift-JIS.
+ *
+ * @param cnv The converter representing the target codepage.
+ * @param source the input buffer to be fixed
+ * @param sourceLen the length of the input buffer
+ * @see ucnv_isAmbiguous
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen);
+
+/**
+ * Determines if the converter contains ambiguous mappings of the same
+ * character or not.
+ * @param cnv the converter to be tested
+ * @return TRUE if the converter contains ambiguous mapping of the same 
+ * character, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2
+ucnv_isAmbiguous(const UConverter *cnv);
+
+/**
+ * Sets the converter to use fallback mappings or not.
+ * Regardless of this flag, the converter will always use
+ * fallbacks from Unicode Private Use code points, as well as
+ * reverse fallbacks (to Unicode).
+ * For details see ".ucm File Format"
+ * in the Conversion Data chapter of the ICU User Guide:
+ * http://www.icu-project.org/userguide/conversion-data.html#ucmformat
+ *
+ * @param cnv The converter to set the fallback mapping usage on.
+ * @param usesFallback TRUE if the user wants the converter to take advantage of the fallback 
+ * mapping, FALSE otherwise.
+ * @stable ICU 2.0
+ * @see ucnv_usesFallback
+ */
+U_STABLE void U_EXPORT2 
+ucnv_setFallback(UConverter *cnv, UBool usesFallback);
+
+/**
+ * Determines if the converter uses fallback mappings or not.
+ * This flag has restrictions, see ucnv_setFallback().
+ *
+ * @param cnv The converter to be tested
+ * @return TRUE if the converter uses fallback, FALSE otherwise.
+ * @stable ICU 2.0
+ * @see ucnv_setFallback
+ */
+U_STABLE UBool U_EXPORT2 
+ucnv_usesFallback(const UConverter *cnv);
+
+/**
+ * Detects Unicode signature byte sequences at the start of the byte stream
+ * and returns the charset name of the indicated Unicode charset.
+ * NULL is returned when no Unicode signature is recognized.
+ * The number of bytes in the signature is output as well.
+ *
+ * The caller can ucnv_open() a converter using the charset name.
+ * The first code unit (UChar) from the start of the stream will be U+FEFF
+ * (the Unicode BOM/signature character) and can usually be ignored.
+ *
+ * For most Unicode charsets it is also possible to ignore the indicated
+ * number of initial stream bytes and start converting after them.
+ * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which
+ * this will not work. Therefore, it is best to ignore the first output UChar
+ * instead of the input signature bytes.
+ * <p>
+ * Usage:
+ * @code     
+ *      UErrorCode err = U_ZERO_ERROR;
+ *      char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
+ *      int32_t signatureLength = 0;
+ *      char *encoding = ucnv_detectUnicodeSignature(input,sizeof(input),&signatureLength,&err);
+ *      UConverter *conv = NULL;
+ *      UChar output[100];
+ *      UChar *target = output, *out;
+ *      char *source = input;
+ *      if(encoding!=NULL && U_SUCCESS(err)){
+ *          // should signature be discarded ?
+ *          conv = ucnv_open(encoding, &err);
+ *          // do the conversion
+ *          ucnv_toUnicode(conv,
+ *                         target, output + sizeof(output)/U_SIZEOF_UCHAR,
+ *                         source, input + sizeof(input),
+ *                         NULL, TRUE, &err);
+ *          out = output;
+ *          if (discardSignature){
+ *              ++out; // ignore initial U+FEFF
+ *          }
+ *          while(out != target) {
+ *              printf("%04x ", *out++);
+ *          }
+ *          puts("");
+ *      }
+ *     
+ * @endcode
+ *
+ * @param source            The source string in which the signature should be detected.
+ * @param sourceLength      Length of the input string, or -1 if terminated with a NUL byte.
+ * @param signatureLength   A pointer to int32_t to receive the number of bytes that make up the signature 
+ *                          of the detected UTF. 0 if not detected.
+ *                          Can be a NULL pointer.
+ * @param pErrorCode        ICU error code in/out parameter.
+ *                          Must fulfill U_SUCCESS before the function call.
+ * @return The name of the encoding detected. NULL if encoding is not detected. 
+ * @stable ICU 2.4
+ */
+U_STABLE const char* U_EXPORT2
+ucnv_detectUnicodeSignature(const char* source,
+                            int32_t sourceLength,
+                            int32_t *signatureLength,
+                            UErrorCode *pErrorCode);
+
+/**
+ * Returns the number of UChars held in the converter's internal state 
+ * because more input is needed for completing the conversion. This function is 
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @param cnv       The converter in which the input is held
+ * @param status    ICU error code in/out parameter.
+ *                  Must fulfill U_SUCCESS before the function call.
+ * @return The number of UChars in the state. -1 if an error is encountered.
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status);
+
+/**
+ * Returns the number of chars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is 
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @param cnv       The converter in which the input is held as internal state
+ * @param status    ICU error code in/out parameter.
+ *                  Must fulfill U_SUCCESS before the function call.
+ * @return The number of chars in the state. -1 if an error is encountered.
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status);
+
+#endif
+
+#endif
+/*_UCNV*/

Deleted: MacRuby/trunk/icu-1060/unicode/ucnv_cb.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ucnv_cb.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ucnv_cb.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,162 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 2000-2004, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
- *  ucnv_cb.h:
- *  External APIs for the ICU's codeset conversion library
- *  Helena Shih
- * 
- * Modification History:
- *
- *   Date        Name        Description
- */
-
-/**
- * \file 
- * \brief C UConverter functions to aid the writers of callbacks
- *
- * <h2> Callback API for UConverter </h2>
- * 
- * These functions are provided here for the convenience of the callback
- * writer. If you are just looking for callback functions to use, please
- * see ucnv_err.h.  DO NOT call these functions directly when you are 
- * working with converters, unless your code has been called as a callback
- * via ucnv_setFromUCallback or ucnv_setToUCallback !!
- * 
- * A note about error codes and overflow.  Unlike other ICU functions,
- * these functions do not expect the error status to be U_ZERO_ERROR.
- * Callbacks must be much more careful about their error codes.
- * The error codes used here are in/out parameters, which should be passed
- * back in the callback's error parameter.
- * 
- * For example, if you call ucnv_cbfromUWriteBytes to write data out 
- * to the output codepage, it may return U_BUFFER_OVERFLOW_ERROR if 
- * the data did not fit in the target. But this isn't a failing error, 
- * in fact, ucnv_cbfromUWriteBytes may be called AGAIN with the error
- * status still U_BUFFER_OVERFLOW_ERROR to attempt to write further bytes,
- * which will also go into the internal overflow buffers.
- * 
- * Concerning offsets, the 'offset' parameters here are relative to the start
- * of SOURCE.  For example, Suppose the string "ABCD" was being converted 
- * from Unicode into a codepage which doesn't have a mapping for 'B'.
- * 'A' will be written out correctly, but
- * The FromU Callback will be called on an unassigned character for 'B'.
- * At this point, this is the state of the world:
- *    Target:    A [..]     [points after A]
- *    Source:  A B [C] D    [points to C - B has been consumed]
- *             0 1  2  3 
- *    codePoint = "B"       [the unassigned codepoint] 
- * 
- * Now, suppose a callback wants to write the substitution character '?' to
- * the target. It calls ucnv_cbFromUWriteBytes() to write the ?. 
- * It should pass ZERO as the offset, because the offset as far as the 
- * callback is concerned is relative to the SOURCE pointer [which points 
- * before 'C'.]  If the callback goes into the args and consumes 'C' also,
- * it would call FromUWriteBytes with an offset of 1 (and advance the source
- * pointer).
- *
- */
-
-#ifndef UCNV_CB_H
-#define UCNV_CB_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/ucnv_err.h"
-
-/**
- * ONLY used by FromU callback functions.
- * Writes out the specified byte output bytes to the target byte buffer or to converter internal buffers.
- *
- * @param args callback fromUnicode arguments
- * @param source source bytes to write
- * @param length length of bytes to write
- * @param offsetIndex the relative offset index from callback.
- * @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG> 
- * be returned to the user, because it means that not all data could be written into the target buffer, and some is 
- * in the converter error buffer.
- * @see ucnv_cbFromUWriteSub
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
-                        const char* source,
-                        int32_t length,
-                        int32_t offsetIndex,
-                        UErrorCode * err);
-
-/**
- * ONLY used by FromU callback functions.  
- * This function will write out the correct substitution character sequence 
- * to the target.
- *
- * @param args callback fromUnicode arguments
- * @param offsetIndex the relative offset index from the current source pointer to be used
- * @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG> 
- * be returned to the user, because it means that not all data could be written into the target buffer, and some is 
- * in the converter error buffer.
- * @see ucnv_cbFromUWriteBytes
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
-                      int32_t offsetIndex,
-                      UErrorCode * err);
-
-/**
- * ONLY used by fromU callback functions.  
- * This function will write out the error character(s) to the target UChar buffer.
- *
- * @param args callback fromUnicode arguments
- * @param source pointer to pointer to first UChar to write [on exit: 1 after last UChar processed]
- * @param sourceLimit pointer after last UChar to write
- * @param offsetIndex the relative offset index from callback which will be set
- * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
- * @see ucnv_cbToUWriteSub
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
-                             const UChar** source,
-                             const UChar*  sourceLimit,
-                             int32_t offsetIndex,
-                             UErrorCode * err);
-
-/**
- * ONLY used by ToU callback functions.
- *  This function will write out the specified characters to the target 
- * UChar buffer.
- *
- * @param args callback toUnicode arguments
- * @param source source string to write
- * @param length the length of source string
- * @param offsetIndex the relative offset index which will be written.
- * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
- * @see ucnv_cbToUWriteSub
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
-                                             const UChar* source,
-                                             int32_t length,
-                                             int32_t offsetIndex,
-                                             UErrorCode * err);
-
-/**
- * ONLY used by ToU  callback functions.  
- * This function will write out the Unicode substitution character (U+FFFD).
- *
- * @param args callback fromUnicode arguments
- * @param offsetIndex the relative offset index from callback.
- * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
- * @see ucnv_cbToUWriteUChars
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
-                       int32_t offsetIndex,
-                       UErrorCode * err);
-#endif
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/ucnv_cb.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ucnv_cb.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ucnv_cb.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ucnv_cb.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,162 @@
+/*
+**********************************************************************
+*   Copyright (C) 2000-2004, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+ *  ucnv_cb.h:
+ *  External APIs for the ICU's codeset conversion library
+ *  Helena Shih
+ * 
+ * Modification History:
+ *
+ *   Date        Name        Description
+ */
+
+/**
+ * \file 
+ * \brief C UConverter functions to aid the writers of callbacks
+ *
+ * <h2> Callback API for UConverter </h2>
+ * 
+ * These functions are provided here for the convenience of the callback
+ * writer. If you are just looking for callback functions to use, please
+ * see ucnv_err.h.  DO NOT call these functions directly when you are 
+ * working with converters, unless your code has been called as a callback
+ * via ucnv_setFromUCallback or ucnv_setToUCallback !!
+ * 
+ * A note about error codes and overflow.  Unlike other ICU functions,
+ * these functions do not expect the error status to be U_ZERO_ERROR.
+ * Callbacks must be much more careful about their error codes.
+ * The error codes used here are in/out parameters, which should be passed
+ * back in the callback's error parameter.
+ * 
+ * For example, if you call ucnv_cbfromUWriteBytes to write data out 
+ * to the output codepage, it may return U_BUFFER_OVERFLOW_ERROR if 
+ * the data did not fit in the target. But this isn't a failing error, 
+ * in fact, ucnv_cbfromUWriteBytes may be called AGAIN with the error
+ * status still U_BUFFER_OVERFLOW_ERROR to attempt to write further bytes,
+ * which will also go into the internal overflow buffers.
+ * 
+ * Concerning offsets, the 'offset' parameters here are relative to the start
+ * of SOURCE.  For example, Suppose the string "ABCD" was being converted 
+ * from Unicode into a codepage which doesn't have a mapping for 'B'.
+ * 'A' will be written out correctly, but
+ * The FromU Callback will be called on an unassigned character for 'B'.
+ * At this point, this is the state of the world:
+ *    Target:    A [..]     [points after A]
+ *    Source:  A B [C] D    [points to C - B has been consumed]
+ *             0 1  2  3 
+ *    codePoint = "B"       [the unassigned codepoint] 
+ * 
+ * Now, suppose a callback wants to write the substitution character '?' to
+ * the target. It calls ucnv_cbFromUWriteBytes() to write the ?. 
+ * It should pass ZERO as the offset, because the offset as far as the 
+ * callback is concerned is relative to the SOURCE pointer [which points 
+ * before 'C'.]  If the callback goes into the args and consumes 'C' also,
+ * it would call FromUWriteBytes with an offset of 1 (and advance the source
+ * pointer).
+ *
+ */
+
+#ifndef UCNV_CB_H
+#define UCNV_CB_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_err.h"
+
+/**
+ * ONLY used by FromU callback functions.
+ * Writes out the specified byte output bytes to the target byte buffer or to converter internal buffers.
+ *
+ * @param args callback fromUnicode arguments
+ * @param source source bytes to write
+ * @param length length of bytes to write
+ * @param offsetIndex the relative offset index from callback.
+ * @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG> 
+ * be returned to the user, because it means that not all data could be written into the target buffer, and some is 
+ * in the converter error buffer.
+ * @see ucnv_cbFromUWriteSub
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
+                        const char* source,
+                        int32_t length,
+                        int32_t offsetIndex,
+                        UErrorCode * err);
+
+/**
+ * ONLY used by FromU callback functions.  
+ * This function will write out the correct substitution character sequence 
+ * to the target.
+ *
+ * @param args callback fromUnicode arguments
+ * @param offsetIndex the relative offset index from the current source pointer to be used
+ * @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG> 
+ * be returned to the user, because it means that not all data could be written into the target buffer, and some is 
+ * in the converter error buffer.
+ * @see ucnv_cbFromUWriteBytes
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
+                      int32_t offsetIndex,
+                      UErrorCode * err);
+
+/**
+ * ONLY used by fromU callback functions.  
+ * This function will write out the error character(s) to the target UChar buffer.
+ *
+ * @param args callback fromUnicode arguments
+ * @param source pointer to pointer to first UChar to write [on exit: 1 after last UChar processed]
+ * @param sourceLimit pointer after last UChar to write
+ * @param offsetIndex the relative offset index from callback which will be set
+ * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
+ * @see ucnv_cbToUWriteSub
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
+                             const UChar** source,
+                             const UChar*  sourceLimit,
+                             int32_t offsetIndex,
+                             UErrorCode * err);
+
+/**
+ * ONLY used by ToU callback functions.
+ *  This function will write out the specified characters to the target 
+ * UChar buffer.
+ *
+ * @param args callback toUnicode arguments
+ * @param source source string to write
+ * @param length the length of source string
+ * @param offsetIndex the relative offset index which will be written.
+ * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
+ * @see ucnv_cbToUWriteSub
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
+                                             const UChar* source,
+                                             int32_t length,
+                                             int32_t offsetIndex,
+                                             UErrorCode * err);
+
+/**
+ * ONLY used by ToU  callback functions.  
+ * This function will write out the Unicode substitution character (U+FFFD).
+ *
+ * @param args callback fromUnicode arguments
+ * @param offsetIndex the relative offset index from callback.
+ * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
+ * @see ucnv_cbToUWriteUChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
+                       int32_t offsetIndex,
+                       UErrorCode * err);
+#endif
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/ucnv_err.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ucnv_err.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ucnv_err.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,463 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1999-2007, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
- *
- *
- *   ucnv_err.h:
- */
-
-/**
- * \file
- * \brief C UConverter predefined error callbacks
- *
- *  <h2>Error Behaviour Functions</h2>
- *  Defines some error behaviour functions called by ucnv_{from,to}Unicode
- *  These are provided as part of ICU and many are stable, but they
- *  can also be considered only as an example of what can be done with
- *  callbacks.  You may of course write your own.
- *
- *  If you want to write your own, you may also find the functions from
- *  ucnv_cb.h useful when writing your own callbacks.
- *
- *  These functions, although public, should NEVER be called directly.
- *  They should be used as parameters to the ucnv_setFromUCallback
- *  and ucnv_setToUCallback functions, to set the behaviour of a converter
- *  when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
- *
- *  usage example:  'STOP' doesn't need any context, but newContext
- *    could be set to something other than 'NULL' if needed. The available
- *    contexts in this header can modify the default behavior of the callback.
- *
- *  \code
- *  UErrorCode err = U_ZERO_ERROR;
- *  UConverter *myConverter = ucnv_open("ibm-949", &err);
- *  const void *oldContext;
- *  UConverterFromUCallback oldAction;
- *
- *
- *  if (U_SUCCESS(err))
- *  {
- *      ucnv_setFromUCallBack(myConverter,
- *                       UCNV_FROM_U_CALLBACK_STOP,
- *                       NULL,
- *                       &oldAction,
- *                       &oldContext,
- *                       &status);
- *  }
- *  \endcode
- *
- *  The code above tells "myConverter" to stop when it encounters an
- *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
- *  Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
- *  and ucnv_setToUCallBack would need to be called in order to change
- *  that behavior too.
- *
- *  Here is an example with a context:
- *
- *  \code
- *  UErrorCode err = U_ZERO_ERROR;
- *  UConverter *myConverter = ucnv_open("ibm-949", &err);
- *  const void *oldContext;
- *  UConverterFromUCallback oldAction;
- *
- *
- *  if (U_SUCCESS(err))
- *  {
- *      ucnv_setToUCallBack(myConverter,
- *                       UCNV_TO_U_CALLBACK_SUBSTITUTE,
- *                       UCNV_SUB_STOP_ON_ILLEGAL,
- *                       &oldAction,
- *                       &oldContext,
- *                       &status);
- *  }
- *  \endcode
- *
- *  The code above tells "myConverter" to stop when it encounters an
- *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
- *  Codepage -> Unicode. Any unmapped and legal characters will be
- *  substituted to be the default substitution character.
- */
-
-#ifndef UCNV_ERR_H
-#define UCNV_ERR_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-/** Forward declaring the UConverter structure. @stable ICU 2.0 */
-struct UConverter;
-
-/** @stable ICU 2.0 */
-typedef struct UConverter UConverter;
-
-/**
- * FROM_U, TO_U context options for sub callback
- * @stable ICU 2.0
- */
-#define UCNV_SUB_STOP_ON_ILLEGAL "i"
-
-/**
- * FROM_U, TO_U context options for skip callback
- * @stable ICU 2.0
- */
-#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
-
-/**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) 
- * @stable ICU 2.0
- */
-#define UCNV_ESCAPE_ICU       NULL
-/**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
- * @stable ICU 2.0
- */
-#define UCNV_ESCAPE_JAVA      "J"
-/**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
- * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
- * @stable ICU 2.0
- */
-#define UCNV_ESCAPE_C         "C"
-/**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
- * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
- * @stable ICU 2.0
- */
-#define UCNV_ESCAPE_XML_DEC   "D"
-/**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
- * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
- * @stable ICU 2.0
- */
-#define UCNV_ESCAPE_XML_HEX   "X"
-/**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
- * @stable ICU 2.0
- */
-#define UCNV_ESCAPE_UNICODE   "U"
-
-/**
- * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
- * a backslash, 1..6 hex digits, and a space)
- * @draft ICU 4.0
- */
-#define UCNV_ESCAPE_CSS2   "S"
-
-/** 
- * The process condition code to be used with the callbacks.  
- * Codes which are greater than UCNV_IRREGULAR should be 
- * passed on to any chained callbacks.
- * @stable ICU 2.0
- */
-typedef enum {
-    UCNV_UNASSIGNED = 0,  /**< The code point is unassigned.
-                             The error code U_INVALID_CHAR_FOUND will be set. */
-    UCNV_ILLEGAL = 1,     /**< The code point is illegal. For example, 
-                             \\x81\\x2E is illegal in SJIS because \\x2E
-                             is not a valid trail byte for the \\x81 
-                             lead byte.
-                             Also, starting with Unicode 3.0.1, non-shortest byte sequences
-                             in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
-                             are also illegal, not just irregular.
-                             The error code U_ILLEGAL_CHAR_FOUND will be set. */
-    UCNV_IRREGULAR = 2,   /**< The codepoint is not a regular sequence in 
-                             the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
-                             are irregular UTF-8 byte sequences for single surrogate
-                             code points.
-                             The error code U_INVALID_CHAR_FOUND will be set. */
-    UCNV_RESET = 3,       /**< The callback is called with this reason when a
-                             'reset' has occured. Callback should reset all
-                             state. */
-    UCNV_CLOSE = 4,        /**< Called when the converter is closed. The
-                             callback should release any allocated memory.*/
-    UCNV_CLONE = 5         /**< Called when ucnv_safeClone() is called on the
-                              converter. the pointer available as the
-                              'context' is an alias to the original converters'
-                              context pointer. If the context must be owned
-                              by the new converter, the callback must clone 
-                              the data and call ucnv_setFromUCallback 
-                              (or setToUCallback) with the correct pointer.
-                              @stable ICU 2.2
-                           */
-} UConverterCallbackReason;
-
-
-/**
- * The structure for the fromUnicode callback function parameter.
- * @stable ICU 2.0
- */
-typedef struct {
-    uint16_t size;              /**< The size of this struct. @stable ICU 2.0 */
-    UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0    */
-    UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0  */
-    const UChar *source;        /**< Pointer to the source source buffer. @stable ICU 2.0    */
-    const UChar *sourceLimit;   /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
-    char *target;               /**< Pointer to the target buffer. @stable ICU 2.0    */
-    const char *targetLimit;    /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
-    int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
-} UConverterFromUnicodeArgs;
-
-
-/**
- * The structure for the toUnicode callback function parameter.
- * @stable ICU 2.0
- */
-typedef struct {
-    uint16_t size;              /**< The size of this struct   @stable ICU 2.0 */
-    UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0   */
-    UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
-    const char *source;         /**< Pointer to the source source buffer. @stable ICU 2.0    */
-    const char *sourceLimit;    /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
-    UChar *target;              /**< Pointer to the target buffer. @stable ICU 2.0    */
-    const UChar *targetLimit;   /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
-    int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
-} UConverterToUnicodeArgs;
-
-
-/**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
- * returning the error code back to the caller immediately.
- *
- * @param context Pointer to the callback's private data
- * @param fromUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
- * @param reason Defines the reason the callback was invoked
- * @param err This should always be set to a failure status prior to calling.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
-                  const void *context,
-                  UConverterFromUnicodeArgs *fromUArgs,
-                  const UChar* codeUnits,
-                  int32_t length,
-                  UChar32 codePoint,
-                  UConverterCallbackReason reason,
-                  UErrorCode * err);
-
-
-
-/**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
- * returning the error code back to the caller immediately.
- *
- * @param context Pointer to the callback's private data
- * @param toUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param reason Defines the reason the callback was invoked
- * @param err This should always be set to a failure status prior to calling.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
-                  const void *context,
-                  UConverterToUnicodeArgs *toUArgs,
-                  const char* codeUnits,
-                  int32_t length,
-                  UConverterCallbackReason reason,
-                  UErrorCode * err);
-
-/**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
- * skips only UNASSINGED_SEQUENCE depending on the context parameter
- * simply ignoring those characters. 
- *
- * @param context  The function currently recognizes the callback options:
- *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
- *                      returning the error code back to the caller immediately.
- *                 NULL: Skips any ILLEGAL_SEQUENCE
- * @param fromUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
- * @param reason Defines the reason the callback was invoked
- * @param err Return value will be set to success if the callback was handled,
- *      otherwise this value will be set to a failure status.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
-                  const void *context,
-                  UConverterFromUnicodeArgs *fromUArgs,
-                  const UChar* codeUnits,
-                  int32_t length,
-                  UChar32 codePoint,
-                  UConverterCallbackReason reason,
-                  UErrorCode * err);
-
-/**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or 
- * UNASSIGNED_SEQUENCE depending on context parameter, with the
- * current substitution string for the converter. This is the default
- * callback.
- *
- * @param context The function currently recognizes the callback options:
- *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
- *                      returning the error code back to the caller immediately.
- *                 NULL: Substitutes any ILLEGAL_SEQUENCE
- * @param fromUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
- * @param reason Defines the reason the callback was invoked
- * @param err Return value will be set to success if the callback was handled,
- *      otherwise this value will be set to a failure status.
- * @see ucnv_setSubstChars
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
-                  const void *context,
-                  UConverterFromUnicodeArgs *fromUArgs,
-                  const UChar* codeUnits,
-                  int32_t length,
-                  UChar32 codePoint,
-                  UConverterCallbackReason reason,
-                  UErrorCode * err);
-
-/**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
- * hexadecimal representation of the illegal codepoints
- *
- * @param context The function currently recognizes the callback options:
- *        <ul>
- *        <li>UCNV_ESCAPE_ICU: Substitues the  ILLEGAL SEQUENCE with the hexadecimal 
- *          representation in the format  %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). 
- *          In the Event the converter doesn't support the characters {%,U}[A-F][0-9], 
- *          it will  substitute  the illegal sequence with the substitution characters.
- *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
- *          %UD84D%UDC56</li>
- *        <li>UCNV_ESCAPE_JAVA: Substitues the  ILLEGAL SEQUENCE with the hexadecimal 
- *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 
- *          In the Event the converter doesn't support the characters {\,u}[A-F][0-9], 
- *          it will  substitute  the illegal sequence with the substitution characters.
- *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
- *          \\uD84D\\uDC56</li>
- *        <li>UCNV_ESCAPE_C: Substitues the  ILLEGAL SEQUENCE with the hexadecimal 
- *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 
- *          In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], 
- *          it will  substitute  the illegal sequence with the substitution characters.
- *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
- *          \\U00023456</li>
- *        <li>UCNV_ESCAPE_XML_DEC: Substitues the  ILLEGAL SEQUENCE with the decimal 
- *          representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly. 
- *          In the Event the converter doesn't support the characters {&amp;,#}[0-9], 
- *          it will  substitute  the illegal sequence with the substitution characters.
- *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
- *          &amp;#144470; and Zero padding is ignored.</li>
- *        <li>UCNV_ESCAPE_XML_HEX:Substitues the  ILLEGAL SEQUENCE with the decimal 
- *          representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly. 
- *          In the Event the converter doesn't support the characters {&,#,x}[0-9], 
- *          it will  substitute  the illegal sequence with the substitution characters.
- *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
- *          \htmlonly&amp;#x23456;\endhtmlonly</li>
- *        </ul>
- * @param fromUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
- * @param reason Defines the reason the callback was invoked
- * @param err Return value will be set to success if the callback was handled,
- *      otherwise this value will be set to a failure status.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
-                  const void *context,
-                  UConverterFromUnicodeArgs *fromUArgs,
-                  const UChar* codeUnits,
-                  int32_t length,
-                  UChar32 codePoint,
-                  UConverterCallbackReason reason,
-                  UErrorCode * err);
-
-
-/**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
- * skips only UNASSINGED_SEQUENCE depending on the context parameter
- * simply ignoring those characters. 
- *
- * @param context  The function currently recognizes the callback options:
- *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
- *                      returning the error code back to the caller immediately.
- *                 NULL: Skips any ILLEGAL_SEQUENCE
- * @param toUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param reason Defines the reason the callback was invoked
- * @param err Return value will be set to success if the callback was handled,
- *      otherwise this value will be set to a failure status.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
-                  const void *context,
-                  UConverterToUnicodeArgs *toUArgs,
-                  const char* codeUnits,
-                  int32_t length,
-                  UConverterCallbackReason reason,
-                  UErrorCode * err);
-
-/**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or 
- * UNASSIGNED_SEQUENCE depending on context parameter,  with the
- * Unicode substitution character, U+FFFD.
- *
- * @param context  The function currently recognizes the callback options:
- *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
- *                      returning the error code back to the caller immediately.
- *                 NULL: Substitutes any ILLEGAL_SEQUENCE
- * @param toUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param reason Defines the reason the callback was invoked
- * @param err Return value will be set to success if the callback was handled,
- *      otherwise this value will be set to a failure status.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
-                  const void *context,
-                  UConverterToUnicodeArgs *toUArgs,
-                  const char* codeUnits,
-                  int32_t length,
-                  UConverterCallbackReason reason,
-                  UErrorCode * err);
-
-/**
- * DO NOT CALL THIS FUNCTION DIRECTLY!
- * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
- * hexadecimal representation of the illegal bytes
- *  (in the format  %XNN, e.g. "%XFF%X0A%XC8%X03").
- *
- * @param context This function currently recognizes the callback options:
- *      UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
- *      UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
- * @param toUArgs Information about the conversion in progress
- * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
- * @param length Size (in bytes) of the concerned codepage sequence
- * @param reason Defines the reason the callback was invoked
- * @param err Return value will be set to success if the callback was handled,
- *      otherwise this value will be set to a failure status.
- * @stable ICU 2.0
- */
-
-U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
-                  const void *context,
-                  UConverterToUnicodeArgs *toUArgs,
-                  const char* codeUnits,
-                  int32_t length,
-                  UConverterCallbackReason reason,
-                  UErrorCode * err);
-
-#endif
-
-#endif
-
-/*UCNV_ERR_H*/ 

Copied: MacRuby/trunk/icu-1060/unicode/ucnv_err.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ucnv_err.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ucnv_err.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ucnv_err.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,463 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999-2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+ *
+ *
+ *   ucnv_err.h:
+ */
+
+/**
+ * \file
+ * \brief C UConverter predefined error callbacks
+ *
+ *  <h2>Error Behaviour Functions</h2>
+ *  Defines some error behaviour functions called by ucnv_{from,to}Unicode
+ *  These are provided as part of ICU and many are stable, but they
+ *  can also be considered only as an example of what can be done with
+ *  callbacks.  You may of course write your own.
+ *
+ *  If you want to write your own, you may also find the functions from
+ *  ucnv_cb.h useful when writing your own callbacks.
+ *
+ *  These functions, although public, should NEVER be called directly.
+ *  They should be used as parameters to the ucnv_setFromUCallback
+ *  and ucnv_setToUCallback functions, to set the behaviour of a converter
+ *  when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
+ *
+ *  usage example:  'STOP' doesn't need any context, but newContext
+ *    could be set to something other than 'NULL' if needed. The available
+ *    contexts in this header can modify the default behavior of the callback.
+ *
+ *  \code
+ *  UErrorCode err = U_ZERO_ERROR;
+ *  UConverter *myConverter = ucnv_open("ibm-949", &err);
+ *  const void *oldContext;
+ *  UConverterFromUCallback oldAction;
+ *
+ *
+ *  if (U_SUCCESS(err))
+ *  {
+ *      ucnv_setFromUCallBack(myConverter,
+ *                       UCNV_FROM_U_CALLBACK_STOP,
+ *                       NULL,
+ *                       &oldAction,
+ *                       &oldContext,
+ *                       &status);
+ *  }
+ *  \endcode
+ *
+ *  The code above tells "myConverter" to stop when it encounters an
+ *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ *  Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
+ *  and ucnv_setToUCallBack would need to be called in order to change
+ *  that behavior too.
+ *
+ *  Here is an example with a context:
+ *
+ *  \code
+ *  UErrorCode err = U_ZERO_ERROR;
+ *  UConverter *myConverter = ucnv_open("ibm-949", &err);
+ *  const void *oldContext;
+ *  UConverterFromUCallback oldAction;
+ *
+ *
+ *  if (U_SUCCESS(err))
+ *  {
+ *      ucnv_setToUCallBack(myConverter,
+ *                       UCNV_TO_U_CALLBACK_SUBSTITUTE,
+ *                       UCNV_SUB_STOP_ON_ILLEGAL,
+ *                       &oldAction,
+ *                       &oldContext,
+ *                       &status);
+ *  }
+ *  \endcode
+ *
+ *  The code above tells "myConverter" to stop when it encounters an
+ *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ *  Codepage -> Unicode. Any unmapped and legal characters will be
+ *  substituted to be the default substitution character.
+ */
+
+#ifndef UCNV_ERR_H
+#define UCNV_ERR_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+/** Forward declaring the UConverter structure. @stable ICU 2.0 */
+struct UConverter;
+
+/** @stable ICU 2.0 */
+typedef struct UConverter UConverter;
+
+/**
+ * FROM_U, TO_U context options for sub callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SUB_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U, TO_U context options for skip callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) 
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_ICU       NULL
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_JAVA      "J"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
+ * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_C         "C"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_DEC   "D"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_HEX   "X"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_UNICODE   "U"
+
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
+ * a backslash, 1..6 hex digits, and a space)
+ * @draft ICU 4.0
+ */
+#define UCNV_ESCAPE_CSS2   "S"
+
+/** 
+ * The process condition code to be used with the callbacks.  
+ * Codes which are greater than UCNV_IRREGULAR should be 
+ * passed on to any chained callbacks.
+ * @stable ICU 2.0
+ */
+typedef enum {
+    UCNV_UNASSIGNED = 0,  /**< The code point is unassigned.
+                             The error code U_INVALID_CHAR_FOUND will be set. */
+    UCNV_ILLEGAL = 1,     /**< The code point is illegal. For example, 
+                             \\x81\\x2E is illegal in SJIS because \\x2E
+                             is not a valid trail byte for the \\x81 
+                             lead byte.
+                             Also, starting with Unicode 3.0.1, non-shortest byte sequences
+                             in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
+                             are also illegal, not just irregular.
+                             The error code U_ILLEGAL_CHAR_FOUND will be set. */
+    UCNV_IRREGULAR = 2,   /**< The codepoint is not a regular sequence in 
+                             the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
+                             are irregular UTF-8 byte sequences for single surrogate
+                             code points.
+                             The error code U_INVALID_CHAR_FOUND will be set. */
+    UCNV_RESET = 3,       /**< The callback is called with this reason when a
+                             'reset' has occured. Callback should reset all
+                             state. */
+    UCNV_CLOSE = 4,        /**< Called when the converter is closed. The
+                             callback should release any allocated memory.*/
+    UCNV_CLONE = 5         /**< Called when ucnv_safeClone() is called on the
+                              converter. the pointer available as the
+                              'context' is an alias to the original converters'
+                              context pointer. If the context must be owned
+                              by the new converter, the callback must clone 
+                              the data and call ucnv_setFromUCallback 
+                              (or setToUCallback) with the correct pointer.
+                              @stable ICU 2.2
+                           */
+} UConverterCallbackReason;
+
+
+/**
+ * The structure for the fromUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+    uint16_t size;              /**< The size of this struct. @stable ICU 2.0 */
+    UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0    */
+    UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0  */
+    const UChar *source;        /**< Pointer to the source source buffer. @stable ICU 2.0    */
+    const UChar *sourceLimit;   /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
+    char *target;               /**< Pointer to the target buffer. @stable ICU 2.0    */
+    const char *targetLimit;    /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
+    int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
+} UConverterFromUnicodeArgs;
+
+
+/**
+ * The structure for the toUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+    uint16_t size;              /**< The size of this struct   @stable ICU 2.0 */
+    UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0   */
+    UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
+    const char *source;         /**< Pointer to the source source buffer. @stable ICU 2.0    */
+    const char *sourceLimit;    /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
+    UChar *target;              /**< Pointer to the target buffer. @stable ICU 2.0    */
+    const UChar *targetLimit;   /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
+    int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
+} UConverterToUnicodeArgs;
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
+                  const void *context,
+                  UConverterFromUnicodeArgs *fromUArgs,
+                  const UChar* codeUnits,
+                  int32_t length,
+                  UChar32 codePoint,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
+                  const void *context,
+                  UConverterToUnicodeArgs *toUArgs,
+                  const char* codeUnits,
+                  int32_t length,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSINGED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters. 
+ *
+ * @param context  The function currently recognizes the callback options:
+ *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ *                      returning the error code back to the caller immediately.
+ *                 NULL: Skips any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ *      otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
+                  const void *context,
+                  UConverterFromUnicodeArgs *fromUArgs,
+                  const UChar* codeUnits,
+                  int32_t length,
+                  UChar32 codePoint,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or 
+ * UNASSIGNED_SEQUENCE depending on context parameter, with the
+ * current substitution string for the converter. This is the default
+ * callback.
+ *
+ * @param context The function currently recognizes the callback options:
+ *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ *                      returning the error code back to the caller immediately.
+ *                 NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ *      otherwise this value will be set to a failure status.
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
+                  const void *context,
+                  UConverterFromUnicodeArgs *fromUArgs,
+                  const UChar* codeUnits,
+                  int32_t length,
+                  UChar32 codePoint,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal codepoints
+ *
+ * @param context The function currently recognizes the callback options:
+ *        <ul>
+ *        <li>UCNV_ESCAPE_ICU: Substitues the  ILLEGAL SEQUENCE with the hexadecimal 
+ *          representation in the format  %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). 
+ *          In the Event the converter doesn't support the characters {%,U}[A-F][0-9], 
+ *          it will  substitute  the illegal sequence with the substitution characters.
+ *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ *          %UD84D%UDC56</li>
+ *        <li>UCNV_ESCAPE_JAVA: Substitues the  ILLEGAL SEQUENCE with the hexadecimal 
+ *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 
+ *          In the Event the converter doesn't support the characters {\,u}[A-F][0-9], 
+ *          it will  substitute  the illegal sequence with the substitution characters.
+ *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ *          \\uD84D\\uDC56</li>
+ *        <li>UCNV_ESCAPE_C: Substitues the  ILLEGAL SEQUENCE with the hexadecimal 
+ *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 
+ *          In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], 
+ *          it will  substitute  the illegal sequence with the substitution characters.
+ *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ *          \\U00023456</li>
+ *        <li>UCNV_ESCAPE_XML_DEC: Substitues the  ILLEGAL SEQUENCE with the decimal 
+ *          representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly. 
+ *          In the Event the converter doesn't support the characters {&amp;,#}[0-9], 
+ *          it will  substitute  the illegal sequence with the substitution characters.
+ *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ *          &amp;#144470; and Zero padding is ignored.</li>
+ *        <li>UCNV_ESCAPE_XML_HEX:Substitues the  ILLEGAL SEQUENCE with the decimal 
+ *          representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly. 
+ *          In the Event the converter doesn't support the characters {&,#,x}[0-9], 
+ *          it will  substitute  the illegal sequence with the substitution characters.
+ *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ *          \htmlonly&amp;#x23456;\endhtmlonly</li>
+ *        </ul>
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ *      otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
+                  const void *context,
+                  UConverterFromUnicodeArgs *fromUArgs,
+                  const UChar* codeUnits,
+                  int32_t length,
+                  UChar32 codePoint,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSINGED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters. 
+ *
+ * @param context  The function currently recognizes the callback options:
+ *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ *                      returning the error code back to the caller immediately.
+ *                 NULL: Skips any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ *      otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
+                  const void *context,
+                  UConverterToUnicodeArgs *toUArgs,
+                  const char* codeUnits,
+                  int32_t length,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or 
+ * UNASSIGNED_SEQUENCE depending on context parameter,  with the
+ * Unicode substitution character, U+FFFD.
+ *
+ * @param context  The function currently recognizes the callback options:
+ *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ *                      returning the error code back to the caller immediately.
+ *                 NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ *      otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
+                  const void *context,
+                  UConverterToUnicodeArgs *toUArgs,
+                  const char* codeUnits,
+                  int32_t length,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal bytes
+ *  (in the format  %XNN, e.g. "%XFF%X0A%XC8%X03").
+ *
+ * @param context This function currently recognizes the callback options:
+ *      UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
+ *      UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ *      otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+
+U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
+                  const void *context,
+                  UConverterToUnicodeArgs *toUArgs,
+                  const char* codeUnits,
+                  int32_t length,
+                  UConverterCallbackReason reason,
+                  UErrorCode * err);
+
+#endif
+
+#endif
+
+/*UCNV_ERR_H*/ 

Deleted: MacRuby/trunk/icu-1060/unicode/ucol.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ucol.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ucol.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1132 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (c) 1996-2008, International Business Machines Corporation and others.
-* All Rights Reserved.
-*******************************************************************************
-*/
-
-#ifndef UCOL_H
-#define UCOL_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION
-
-#include "unicode/unorm.h"
-#include "unicode/parseerr.h"
-#include "unicode/uloc.h"
-#include "unicode/uset.h"
-
-/**
- * \file
- * \brief C API: Collator 
- *
- * <h2> Collator C API </h2>
- *
- * The C API for Collator performs locale-sensitive
- * string comparison. You use this service to build
- * searching and sorting routines for natural language text.
- * <em>Important: </em>The ICU collation service has been reimplemented 
- * in order to achieve better performance and UCA compliance. 
- * For details, see the 
- * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
- * collation design document</a>.
- * <p>
- * For more information about the collation service see 
- * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
- * <p>
- * Collation service provides correct sorting orders for most locales supported in ICU. 
- * If specific data for a locale is not available, the orders eventually falls back
- * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 
- * <p>
- * Sort ordering may be customized by providing your own set of rules. For more on
- * this subject see the 
- * <a href="http://icu-project.org/userguide/Collate_Customization.html">
- * Collation customization</a> section of the users guide.
- * <p>
- * @see         UCollationResult
- * @see         UNormalizationMode
- * @see         UCollationStrength
- * @see         UCollationElements
- */
-
-/** A collator.
-*  For usage in C programs.
-*/
-struct UCollator;
-/** structure representing a collator object instance 
- * @stable ICU 2.0
- */
-typedef struct UCollator UCollator;
-
-
-/**
- * UCOL_LESS is returned if source string is compared to be less than target
- * string in the u_strcoll() method.
- * UCOL_EQUAL is returned if source string is compared to be equal to target
- * string in the u_strcoll() method.
- * UCOL_GREATER is returned if source string is compared to be greater than
- * target string in the u_strcoll() method.
- * @see u_strcoll()
- * <p>
- * Possible values for a comparison result 
- * @stable ICU 2.0
- */
-typedef enum {
-  /** string a == string b */
-  UCOL_EQUAL    = 0,
-  /** string a > string b */
-  UCOL_GREATER    = 1,
-  /** string a < string b */
-  UCOL_LESS    = -1
-} UCollationResult ;
-
-
-/** Enum containing attribute values for controling collation behavior.
- * Here are all the allowable values. Not every attribute can take every value. The only
- * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined  
- * value for that locale 
- * @stable ICU 2.0
- */
-typedef enum {
-  /** accepted by most attributes */
-  UCOL_DEFAULT = -1,
-
-  /** Primary collation strength */
-  UCOL_PRIMARY = 0,
-  /** Secondary collation strength */
-  UCOL_SECONDARY = 1,
-  /** Tertiary collation strength */
-  UCOL_TERTIARY = 2,
-  /** Default collation strength */
-  UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
-  UCOL_CE_STRENGTH_LIMIT,
-  /** Quaternary collation strength */
-  UCOL_QUATERNARY=3,
-  /** Identical collation strength */
-  UCOL_IDENTICAL=15,
-  UCOL_STRENGTH_LIMIT,
-
-  /** Turn the feature off - works for UCOL_FRENCH_COLLATION, 
-      UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
-      & UCOL_DECOMPOSITION_MODE*/
-  UCOL_OFF = 16,
-  /** Turn the feature on - works for UCOL_FRENCH_COLLATION, 
-      UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
-      & UCOL_DECOMPOSITION_MODE*/
-  UCOL_ON = 17,
-  
-  /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
-  UCOL_SHIFTED = 20,
-  /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
-  UCOL_NON_IGNORABLE = 21,
-
-  /** Valid for UCOL_CASE_FIRST - 
-      lower case sorts before upper case */
-  UCOL_LOWER_FIRST = 24,
-  /** upper case sorts before lower case */
-  UCOL_UPPER_FIRST = 25,
-
-  UCOL_ATTRIBUTE_VALUE_COUNT
-
-} UColAttributeValue;
-
-/**
- * Base letter represents a primary difference.  Set comparison
- * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
- * Use this to set the strength of a Collator object.
- * Example of primary difference, "abc" &lt; "abd"
- * 
- * Diacritical differences on the same base letter represent a secondary
- * difference.  Set comparison level to UCOL_SECONDARY to ignore tertiary
- * differences. Use this to set the strength of a Collator object.
- * Example of secondary difference, "&auml;" >> "a".
- *
- * Uppercase and lowercase versions of the same character represents a
- * tertiary difference.  Set comparison level to UCOL_TERTIARY to include
- * all comparison differences. Use this to set the strength of a Collator
- * object.
- * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
- *
- * Two characters are considered "identical" when they have the same
- * unicode spellings.  UCOL_IDENTICAL.
- * For example, "&auml;" == "&auml;".
- *
- * UCollationStrength is also used to determine the strength of sort keys 
- * generated from UCollator objects
- * These values can be now found in the UColAttributeValue enum.
- * @stable ICU 2.0
- **/
-typedef UColAttributeValue UCollationStrength;
-
-/** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
- * value, as well as the values specific to each one. 
- * @stable ICU 2.0
- */
-typedef enum {
-     /** Attribute for direction of secondary weights - used in French.
-      * Acceptable values are UCOL_ON, which results in secondary weights
-      * being considered backwards and UCOL_OFF which treats secondary
-      * weights in the order they appear.*/
-     UCOL_FRENCH_COLLATION, 
-     /** Attribute for handling variable elements.
-      * Acceptable values are UCOL_NON_IGNORABLE (default)
-      * which treats all the codepoints with non-ignorable 
-      * primary weights in the same way,
-      * and UCOL_SHIFTED which causes codepoints with primary 
-      * weights that are equal or below the variable top value
-      * to be ignored on primary level and moved to the quaternary 
-      * level.*/
-     UCOL_ALTERNATE_HANDLING, 
-     /** Controls the ordering of upper and lower case letters.
-      * Acceptable values are UCOL_OFF (default), which orders
-      * upper and lower case letters in accordance to their tertiary
-      * weights, UCOL_UPPER_FIRST which forces upper case letters to 
-      * sort before lower case letters, and UCOL_LOWER_FIRST which does 
-      * the opposite. */
-     UCOL_CASE_FIRST, 
-     /** Controls whether an extra case level (positioned before the third
-      * level) is generated or not. Acceptable values are UCOL_OFF (default), 
-      * when case level is not generated, and UCOL_ON which causes the case
-      * level to be generated. Contents of the case level are affected by
-      * the value of UCOL_CASE_FIRST attribute. A simple way to ignore 
-      * accent differences in a string is to set the strength to UCOL_PRIMARY
-      * and enable case level. */
-     UCOL_CASE_LEVEL,
-     /** Controls whether the normalization check and necessary normalizations
-      * are performed. When set to UCOL_OFF (default) no normalization check
-      * is performed. The correctness of the result is guaranteed only if the 
-      * input data is in so-called FCD form (see users manual for more info).
-      * When set to UCOL_ON, an incremental check is performed to see whether
-      * the input data is in the FCD form. If the data is not in the FCD form,
-      * incremental NFD normalization is performed. */
-     UCOL_NORMALIZATION_MODE, 
-     /** An alias for UCOL_NORMALIZATION_MODE attribute */
-     UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
-     /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
-      * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
-      * for most locales (except Japanese) is tertiary. Quaternary strength 
-      * is useful when combined with shifted setting for alternate handling
-      * attribute and for JIS x 4061 collation, when it is used to distinguish
-      * between Katakana  and Hiragana (this is achieved by setting the 
-      * UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level
-      * is affected only by the number of non ignorable code points in
-      * the string. Identical strength is rarely useful, as it amounts 
-      * to codepoints of the NFD form of the string. */
-     UCOL_STRENGTH,  
-     /** When turned on, this attribute positions Hiragana before all  
-      * non-ignorables on quaternary level This is a sneaky way to produce JIS
-      * sort order */
-     UCOL_HIRAGANA_QUATERNARY_MODE,
-     /** When turned on, this attribute generates a collation key
-      * for the numeric value of substrings of digits.
-      * This is a way to get '100' to sort AFTER '2'. */
-     UCOL_NUMERIC_COLLATION, 
-     UCOL_ATTRIBUTE_COUNT
-} UColAttribute;
-
-/** Options for retrieving the rule string 
- *  @stable ICU 2.0
- */
-typedef enum {
-  /** Retrieve tailoring only */
-  UCOL_TAILORING_ONLY, 
-  /** Retrieve UCA rules and tailoring */
-  UCOL_FULL_RULES 
-} UColRuleOption ;
-
-/**
- * Open a UCollator for comparing strings.
- * The UCollator pointer is used in all the calls to the Collation 
- * service. After finished, collator must be disposed of by calling
- * {@link #ucol_close }.
- * @param loc The locale containing the required collation rules. 
- *            Special values for locales can be passed in - 
- *            if NULL is passed for the locale, the default locale
- *            collation rules will be used. If empty string ("") or
- *            "root" are passed, UCA rules will be used.
- * @param status A pointer to an UErrorCode to receive any errors
- * @return A pointer to a UCollator, or 0 if an error occurred.
- * @see ucol_openRules
- * @see ucol_safeClone
- * @see ucol_close
- * @stable ICU 2.0
- */
-U_STABLE UCollator* U_EXPORT2 
-ucol_open(const char *loc, UErrorCode *status);
-
-/**
- * Produce an UCollator instance according to the rules supplied.
- * The rules are used to change the default ordering, defined in the
- * UCA in a process called tailoring. The resulting UCollator pointer
- * can be used in the same way as the one obtained by {@link #ucol_strcoll }.
- * @param rules A string describing the collation rules. For the syntax
- *              of the rules please see users guide.
- * @param rulesLength The length of rules, or -1 if null-terminated.
- * @param normalizationMode The normalization mode: One of
- *             UCOL_OFF     (expect the text to not need normalization),
- *             UCOL_ON      (normalize), or
- *             UCOL_DEFAULT (set the mode according to the rules)
- * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
- * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
- * @param parseError  A pointer to UParseError to recieve information about errors
- *                    occurred during parsing. This argument can currently be set
- *                    to NULL, but at users own risk. Please provide a real structure.
- * @param status A pointer to an UErrorCode to receive any errors
- * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case
- *         of error - please use status argument to check for errors.
- * @see ucol_open
- * @see ucol_safeClone
- * @see ucol_close
- * @stable ICU 2.0
- */
-U_STABLE UCollator* U_EXPORT2 
-ucol_openRules( const UChar        *rules,
-                int32_t            rulesLength,
-                UColAttributeValue normalizationMode,
-                UCollationStrength strength,
-                UParseError        *parseError,
-                UErrorCode         *status);
-
-/** 
- * Open a collator defined by a short form string.
- * The structure and the syntax of the string is defined in the "Naming collators"
- * section of the users guide: 
- * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
- * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final
- * strength will be 3. 3066bis locale overrides individual locale parts.
- * The call to this function is equivalent to a call to ucol_open, followed by a 
- * series of calls to ucol_setAttribute and ucol_setVariableTop.
- * @param definition A short string containing a locale and a set of attributes. 
- *                   Attributes not explicitly mentioned are left at the default
- *                   state for a locale.
- * @param parseError if not NULL, structure that will get filled with error's pre
- *                   and post context in case of error.
- * @param forceDefaults if FALSE, the settings that are the same as the collator 
- *                   default settings will not be applied (for example, setting
- *                   French secondary on a French collator would not be executed). 
- *                   If TRUE, all the settings will be applied regardless of the 
- *                   collator default value. If the definition
- *                   strings are to be cached, should be set to FALSE.
- * @param status     Error code. Apart from regular error conditions connected to 
- *                   instantiating collators (like out of memory or similar), this
- *                   API will return an error if an invalid attribute or attribute/value
- *                   combination is specified.
- * @return           A pointer to a UCollator or 0 if an error occured (including an 
- *                   invalid attribute).
- * @see ucol_open
- * @see ucol_setAttribute
- * @see ucol_setVariableTop
- * @see ucol_getShortDefinitionString
- * @see ucol_normalizeShortDefinitionString
- * @stable ICU 3.0
- *
- */
-U_STABLE UCollator* U_EXPORT2
-ucol_openFromShortString( const char *definition,
-                          UBool forceDefaults,
-                          UParseError *parseError,
-                          UErrorCode *status);
-
-/**
- * Get a set containing the contractions defined by the collator. The set includes
- * both the UCA contractions and the contractions defined by the collator. This set
- * will contain only strings. If a tailoring explicitly suppresses contractions from 
- * the UCA (like Russian), removed contractions will not be in the resulting set.
- * @param coll collator 
- * @param conts the set to hold the result. It gets emptied before
- *              contractions are added. 
- * @param status to hold the error code
- * @return the size of the contraction set
- *
- * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead
- */
-U_DEPRECATED int32_t U_EXPORT2
-ucol_getContractions( const UCollator *coll,
-                  USet *conts,
-                  UErrorCode *status);
-
-/**
- * Get a set containing the expansions defined by the collator. The set includes
- * both the UCA expansions and the expansions defined by the tailoring
- * @param coll collator
- * @param contractions if not NULL, the set to hold the contractions
- * @param expansions if not NULL, the set to hold the expansions
- * @param addPrefixes add the prefix contextual elements to contractions
- * @param status to hold the error code
- *
- * @stable ICU 3.4
- */
-U_STABLE void U_EXPORT2
-ucol_getContractionsAndExpansions( const UCollator *coll,
-                  USet *contractions, USet *expansions,
-                  UBool addPrefixes, UErrorCode *status);
-
-/** 
- * Close a UCollator.
- * Once closed, a UCollator should not be used. Every open collator should
- * be closed. Otherwise, a memory leak will result.
- * @param coll The UCollator to close.
- * @see ucol_open
- * @see ucol_openRules
- * @see ucol_safeClone
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucol_close(UCollator *coll);
-
-/**
- * Compare two strings.
- * The strings will be compared using the options already specified.
- * @param coll The UCollator containing the comparison rules.
- * @param source The source string.
- * @param sourceLength The length of source, or -1 if null-terminated.
- * @param target The target string.
- * @param targetLength The length of target, or -1 if null-terminated.
- * @return The result of comparing the strings; one of UCOL_EQUAL,
- * UCOL_GREATER, UCOL_LESS
- * @see ucol_greater
- * @see ucol_greaterOrEqual
- * @see ucol_equal
- * @stable ICU 2.0
- */
-U_STABLE UCollationResult U_EXPORT2 
-ucol_strcoll(    const    UCollator    *coll,
-        const    UChar        *source,
-        int32_t            sourceLength,
-        const    UChar        *target,
-        int32_t            targetLength);
-
-/**
- * Determine if one string is greater than another.
- * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
- * @param coll The UCollator containing the comparison rules.
- * @param source The source string.
- * @param sourceLength The length of source, or -1 if null-terminated.
- * @param target The target string.
- * @param targetLength The length of target, or -1 if null-terminated.
- * @return TRUE if source is greater than target, FALSE otherwise.
- * @see ucol_strcoll
- * @see ucol_greaterOrEqual
- * @see ucol_equal
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2 
-ucol_greater(const UCollator *coll,
-             const UChar     *source, int32_t sourceLength,
-             const UChar     *target, int32_t targetLength);
-
-/**
- * Determine if one string is greater than or equal to another.
- * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS
- * @param coll The UCollator containing the comparison rules.
- * @param source The source string.
- * @param sourceLength The length of source, or -1 if null-terminated.
- * @param target The target string.
- * @param targetLength The length of target, or -1 if null-terminated.
- * @return TRUE if source is greater than or equal to target, FALSE otherwise.
- * @see ucol_strcoll
- * @see ucol_greater
- * @see ucol_equal
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2 
-ucol_greaterOrEqual(const UCollator *coll,
-                    const UChar     *source, int32_t sourceLength,
-                    const UChar     *target, int32_t targetLength);
-
-/**
- * Compare two strings for equality.
- * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL
- * @param coll The UCollator containing the comparison rules.
- * @param source The source string.
- * @param sourceLength The length of source, or -1 if null-terminated.
- * @param target The target string.
- * @param targetLength The length of target, or -1 if null-terminated.
- * @return TRUE if source is equal to target, FALSE otherwise
- * @see ucol_strcoll
- * @see ucol_greater
- * @see ucol_greaterOrEqual
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2 
-ucol_equal(const UCollator *coll,
-           const UChar     *source, int32_t sourceLength,
-           const UChar     *target, int32_t targetLength);
-
-/**
- * Compare two UTF-8 encoded trings.
- * The strings will be compared using the options already specified.
- * @param coll The UCollator containing the comparison rules.
- * @param sIter The source string iterator.
- * @param tIter The target string iterator.
- * @return The result of comparing the strings; one of UCOL_EQUAL,
- * UCOL_GREATER, UCOL_LESS
- * @param status A pointer to an UErrorCode to receive any errors
- * @see ucol_strcoll
- * @stable ICU 2.6
- */
-U_STABLE UCollationResult U_EXPORT2 
-ucol_strcollIter(  const    UCollator    *coll,
-                  UCharIterator *sIter,
-                  UCharIterator *tIter,
-                  UErrorCode *status);
-
-/**
- * Get the collation strength used in a UCollator.
- * The strength influences how strings are compared.
- * @param coll The UCollator to query.
- * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
- * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
- * @see ucol_setStrength
- * @stable ICU 2.0
- */
-U_STABLE UCollationStrength U_EXPORT2 
-ucol_getStrength(const UCollator *coll);
-
-/**
- * Set the collation strength used in a UCollator.
- * The strength influences how strings are compared.
- * @param coll The UCollator to set.
- * @param strength The desired collation strength; one of UCOL_PRIMARY, 
- * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
- * @see ucol_getStrength
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucol_setStrength(UCollator *coll,
-                 UCollationStrength strength);
-
-/**
- * Get the display name for a UCollator.
- * The display name is suitable for presentation to a user.
- * @param objLoc The locale of the collator in question.
- * @param dispLoc The locale for display.
- * @param result A pointer to a buffer to receive the attribute.
- * @param resultLength The maximum size of result.
- * @param status A pointer to an UErrorCode to receive any errors
- * @return The total buffer size needed; if greater than resultLength,
- * the output was truncated.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucol_getDisplayName(    const    char        *objLoc,
-            const    char        *dispLoc,
-            UChar             *result,
-            int32_t         resultLength,
-            UErrorCode        *status);
-
-/**
- * Get a locale for which collation rules are available.
- * A UCollator in a locale returned by this function will perform the correct
- * collation for the locale.
- * @param index The index of the desired locale.
- * @return A locale for which collation rules are available, or 0 if none.
- * @see ucol_countAvailable
- * @stable ICU 2.0
- */
-U_STABLE const char* U_EXPORT2 
-ucol_getAvailable(int32_t index);
-
-/**
- * Determine how many locales have collation rules available.
- * This function is most useful as determining the loop ending condition for
- * calls to {@link #ucol_getAvailable }.
- * @return The number of locales for which collation rules are available.
- * @see ucol_getAvailable
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucol_countAvailable(void);
-
-#if !UCONFIG_NO_SERVICE
-/**
- * Create a string enumerator of all locales for which a valid
- * collator may be opened.
- * @param status input-output error code
- * @return a string enumeration over locale strings. The caller is
- * responsible for closing the result.
- * @stable ICU 3.0
- */
-U_STABLE UEnumeration* U_EXPORT2
-ucol_openAvailableLocales(UErrorCode *status);
-#endif
-
-/**
- * Create a string enumerator of all possible keywords that are relevant to
- * collation. At this point, the only recognized keyword for this
- * service is "collation".
- * @param status input-output error code
- * @return a string enumeration over locale strings. The caller is
- * responsible for closing the result.
- * @stable ICU 3.0
- */
-U_STABLE UEnumeration* U_EXPORT2
-ucol_getKeywords(UErrorCode *status);
-
-/**
- * Given a keyword, create a string enumeration of all values
- * for that keyword that are currently in use.
- * @param keyword a particular keyword as enumerated by
- * ucol_getKeywords. If any other keyword is passed in, *status is set
- * to U_ILLEGAL_ARGUMENT_ERROR.
- * @param status input-output error code
- * @return a string enumeration over collation keyword values, or NULL
- * upon error. The caller is responsible for closing the result.
- * @stable ICU 3.0
- */
-U_STABLE UEnumeration* U_EXPORT2
-ucol_getKeywordValues(const char *keyword, UErrorCode *status);
-
-/**
- * Return the functionally equivalent locale for the given
- * requested locale, with respect to given keyword, for the
- * collation service.  If two locales return the same result, then
- * collators instantiated for these locales will behave
- * equivalently.  The converse is not always true; two collators
- * may in fact be equivalent, but return different results, due to
- * internal details.  The return result has no other meaning than
- * that stated above, and implies nothing as to the relationship
- * between the two locales.  This is intended for use by
- * applications who wish to cache collators, or otherwise reuse
- * collators when possible.  The functional equivalent may change
- * over time.  For more information, please see the <a
- * href="http://icu-project.org/userguide/locale.html#services">
- * Locales and Services</a> section of the ICU User Guide.
- * @param result fillin for the functionally equivalent locale
- * @param resultCapacity capacity of the fillin buffer
- * @param keyword a particular keyword as enumerated by
- * ucol_getKeywords.
- * @param locale the requested locale
- * @param isAvailable if non-NULL, pointer to a fillin parameter that
- * indicates whether the requested locale was 'available' to the
- * collation service. A locale is defined as 'available' if it
- * physically exists within the collation locale data.
- * @param status pointer to input-output error code
- * @return the actual buffer size needed for the locale.  If greater
- * than resultCapacity, the returned full name will be truncated and
- * an error code will be returned.
- * @stable ICU 3.0
- */
-U_STABLE int32_t U_EXPORT2
-ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
-                             const char* keyword, const char* locale,
-                             UBool* isAvailable, UErrorCode* status);
-
-/**
- * Get the collation rules from a UCollator.
- * The rules will follow the rule syntax.
- * @param coll The UCollator to query.
- * @param length 
- * @return The collation rules.
- * @stable ICU 2.0
- */
-U_STABLE const UChar* U_EXPORT2 
-ucol_getRules(    const    UCollator    *coll, 
-        int32_t            *length);
-
-/** Get the short definition string for a collator. This API harvests the collator's
- *  locale and the attribute set and produces a string that can be used for opening 
- *  a collator with the same properties using the ucol_openFromShortString API.
- *  This string will be normalized.
- *  The structure and the syntax of the string is defined in the "Naming collators"
- *  section of the users guide: 
- *  http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
- *  This API supports preflighting.
- *  @param coll a collator
- *  @param locale a locale that will appear as a collators locale in the resulting
- *                short string definition. If NULL, the locale will be harvested 
- *                from the collator.
- *  @param buffer space to hold the resulting string
- *  @param capacity capacity of the buffer
- *  @param status for returning errors. All the preflighting errors are featured
- *  @return length of the resulting string
- *  @see ucol_openFromShortString
- *  @see ucol_normalizeShortDefinitionString
- *  @stable ICU 3.0
- */
-U_STABLE int32_t U_EXPORT2
-ucol_getShortDefinitionString(const UCollator *coll,
-                              const char *locale,
-                              char *buffer,
-                              int32_t capacity,
-                              UErrorCode *status);
-
-/** Verifies and normalizes short definition string.
- *  Normalized short definition string has all the option sorted by the argument name,
- *  so that equivalent definition strings are the same. 
- *  This API supports preflighting.
- *  @param source definition string
- *  @param destination space to hold the resulting string
- *  @param capacity capacity of the buffer
- *  @param parseError if not NULL, structure that will get filled with error's pre
- *                   and post context in case of error.
- *  @param status     Error code. This API will return an error if an invalid attribute 
- *                    or attribute/value combination is specified. All the preflighting 
- *                    errors are also featured
- *  @return length of the resulting normalized string.
- *
- *  @see ucol_openFromShortString
- *  @see ucol_getShortDefinitionString
- * 
- *  @stable ICU 3.0
- */
-
-U_STABLE int32_t U_EXPORT2
-ucol_normalizeShortDefinitionString(const char *source,
-                                    char *destination,
-                                    int32_t capacity,
-                                    UParseError *parseError,
-                                    UErrorCode *status);
-
-
-/**
- * Get a sort key for a string from a UCollator.
- * Sort keys may be compared using <TT>strcmp</TT>.
- * @param coll The UCollator containing the collation rules.
- * @param source The string to transform.
- * @param sourceLength The length of source, or -1 if null-terminated.
- * @param result A pointer to a buffer to receive the attribute.
- * @param resultLength The maximum size of result.
- * @return The size needed to fully store the sort key.
- *      If there was an internal error generating the sort key,
- *      a zero value is returned.
- * @see ucol_keyHashCode
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucol_getSortKey(const    UCollator    *coll,
-        const    UChar        *source,
-        int32_t        sourceLength,
-        uint8_t        *result,
-        int32_t        resultLength);
-
-
-/** Gets the next count bytes of a sort key. Caller needs
- *  to preserve state array between calls and to provide
- *  the same type of UCharIterator set with the same string.
- *  The destination buffer provided must be big enough to store
- *  the number of requested bytes. Generated sortkey is not 
- *  compatible with sortkeys generated using ucol_getSortKey
- *  API, since we don't do any compression. If uncompressed
- *  sortkeys are required, this API can be used.
- *  @param coll The UCollator containing the collation rules.
- *  @param iter UCharIterator containing the string we need 
- *              the sort key to be calculated for.
- *  @param state Opaque state of sortkey iteration.
- *  @param dest Buffer to hold the resulting sortkey part
- *  @param count number of sort key bytes required.
- *  @param status error code indicator.
- *  @return the actual number of bytes of a sortkey. It can be
- *          smaller than count if we have reached the end of 
- *          the sort key.
- *  @stable ICU 2.6
- */
-U_STABLE int32_t U_EXPORT2 
-ucol_nextSortKeyPart(const UCollator *coll,
-                     UCharIterator *iter,
-                     uint32_t state[2],
-                     uint8_t *dest, int32_t count,
-                     UErrorCode *status);
-
-/** enum that is taken by ucol_getBound API 
- * See below for explanation                
- * do not change the values assigned to the 
- * members of this enum. Underlying code    
- * depends on them having these numbers     
- * @stable ICU 2.0
- */
-typedef enum {
-  /** lower bound */
-  UCOL_BOUND_LOWER = 0,
-  /** upper bound that will match strings of exact size */
-  UCOL_BOUND_UPPER = 1,
-  /** upper bound that will match all the strings that have the same initial substring as the given string */
-  UCOL_BOUND_UPPER_LONG = 2,
-  UCOL_BOUND_VALUE_COUNT
-} UColBoundMode;
-
-/**
- * Produce a bound for a given sortkey and a number of levels.
- * Return value is always the number of bytes needed, regardless of 
- * whether the result buffer was big enough or even valid.<br>
- * Resulting bounds can be used to produce a range of strings that are
- * between upper and lower bounds. For example, if bounds are produced
- * for a sortkey of string "smith", strings between upper and lower 
- * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
- * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
- * is produced, strings matched would be as above. However, if bound
- * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
- * also match "Smithsonian" and similar.<br>
- * For more on usage, see example in cintltst/capitst.c in procedure
- * TestBounds.
- * Sort keys may be compared using <TT>strcmp</TT>.
- * @param source The source sortkey.
- * @param sourceLength The length of source, or -1 if null-terminated. 
- *                     (If an unmodified sortkey is passed, it is always null 
- *                      terminated).
- * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 
- *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that 
- *                  produces upper bound that matches strings of the same length 
- *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the 
- *                  same starting substring as the source string.
- * @param noOfLevels  Number of levels required in the resulting bound (for most 
- *                    uses, the recommended value is 1). See users guide for 
- *                    explanation on number of levels a sortkey can have.
- * @param result A pointer to a buffer to receive the resulting sortkey.
- * @param resultLength The maximum size of result.
- * @param status Used for returning error code if something went wrong. If the 
- *               number of levels requested is higher than the number of levels
- *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 
- *               issued.
- * @return The size needed to fully store the bound. 
- * @see ucol_keyHashCode
- * @stable ICU 2.1
- */
-U_STABLE int32_t U_EXPORT2 
-ucol_getBound(const uint8_t       *source,
-        int32_t             sourceLength,
-        UColBoundMode       boundType,
-        uint32_t            noOfLevels,
-        uint8_t             *result,
-        int32_t             resultLength,
-        UErrorCode          *status);
-        
-/**
- * Gets the version information for a Collator. Version is currently
- * an opaque 32-bit number which depends, among other things, on major
- * versions of the collator tailoring and UCA.
- * @param coll The UCollator to query.
- * @param info the version # information, the result will be filled in
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-ucol_getVersion(const UCollator* coll, UVersionInfo info);
-
-/**
- * Gets the UCA version information for a Collator. Version is the
- * UCA version number (3.1.1, 4.0).
- * @param coll The UCollator to query.
- * @param info the version # information, the result will be filled in
- * @stable ICU 2.8
- */
-U_STABLE void U_EXPORT2
-ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
-
-/** 
- * Merge two sort keys. The levels are merged with their corresponding counterparts
- * (primaries with primaries, secondaries with secondaries etc.). Between the values
- * from the same level a separator is inserted.
- * example (uncompressed): 
- * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00
- * will be merged as 
- * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
- * This allows for concatenating of first and last names for sorting, among other things.
- * If the destination buffer is not big enough, the results are undefined.
- * If any of source lengths are zero or any of source pointers are NULL/undefined, 
- * result is of size zero.
- * @param src1 pointer to the first sortkey
- * @param src1Length length of the first sortkey
- * @param src2 pointer to the second sortkey
- * @param src2Length length of the second sortkey
- * @param dest buffer to hold the result
- * @param destCapacity size of the buffer for the result
- * @return size of the result. If the buffer is big enough size is always
- *         src1Length+src2Length-1
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
-                   const uint8_t *src2, int32_t src2Length,
-                   uint8_t *dest, int32_t destCapacity);
-
-/**
- * Universal attribute setter
- * @param coll collator which attributes are to be changed
- * @param attr attribute type 
- * @param value attribute value
- * @param status to indicate whether the operation went on smoothly or there were errors
- * @see UColAttribute
- * @see UColAttributeValue
- * @see ucol_getAttribute
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
-
-/**
- * Universal attribute getter
- * @param coll collator which attributes are to be changed
- * @param attr attribute type
- * @return attribute value
- * @param status to indicate whether the operation went on smoothly or there were errors
- * @see UColAttribute
- * @see UColAttributeValue
- * @see ucol_setAttribute
- * @stable ICU 2.0
- */
-U_STABLE UColAttributeValue  U_EXPORT2 
-ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
-
-/** Variable top
- * is a two byte primary value which causes all the codepoints with primary values that
- * are less or equal than the variable top to be shifted when alternate handling is set
- * to UCOL_SHIFTED.
- * Sets the variable top to a collation element value of a string supplied. 
- * @param coll collator which variable top needs to be changed
- * @param varTop one or more (if contraction) UChars to which the variable top should be set
- * @param len length of variable top string. If -1 it is considered to be zero terminated.
- * @param status error code. If error code is set, the return value is undefined. 
- *               Errors set by this function are: <br>
- *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such 
- *    a contraction<br>
- *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
- * @return a 32 bit value containing the value of the variable top in upper 16 bits. 
- *         Lower 16 bits are undefined
- * @see ucol_getVariableTop
- * @see ucol_restoreVariableTop
- * @stable ICU 2.0
- */
-U_STABLE uint32_t U_EXPORT2 
-ucol_setVariableTop(UCollator *coll, 
-                    const UChar *varTop, int32_t len, 
-                    UErrorCode *status);
-
-/** 
- * Gets the variable top value of a Collator. 
- * Lower 16 bits are undefined and should be ignored.
- * @param coll collator which variable top needs to be retrieved
- * @param status error code (not changed by function). If error code is set, 
- *               the return value is undefined.
- * @return the variable top value of a Collator.
- * @see ucol_setVariableTop
- * @see ucol_restoreVariableTop
- * @stable ICU 2.0
- */
-U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
-
-/** 
- * Sets the variable top to a collation element value supplied. Variable top is 
- * set to the upper 16 bits. 
- * Lower 16 bits are ignored.
- * @param coll collator which variable top needs to be changed
- * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop
- * @param status error code (not changed by function)
- * @see ucol_getVariableTop
- * @see ucol_setVariableTop
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status);
-
-/**
- * Thread safe cloning operation. The result is a clone of a given collator.
- * @param coll collator to be cloned
- * @param stackBuffer user allocated space for the new clone. 
- * If NULL new memory will be allocated. 
- *  If buffer is not large enough, new memory will be allocated.
- *  Clients can use the U_COL_SAFECLONE_BUFFERSIZE. 
- *  This will probably be enough to avoid memory allocations.
- * @param pBufferSize pointer to size of allocated space. 
- *  If *pBufferSize == 0, a sufficient size for use in cloning will 
- *  be returned ('pre-flighting')
- *  If *pBufferSize is not enough for a stack-based safe clone, 
- *  new memory will be allocated.
- * @param status to indicate whether the operation went on smoothly or there were errors
- *    An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any
- * allocations were necessary.
- * @return pointer to the new clone
- * @see ucol_open
- * @see ucol_openRules
- * @see ucol_close
- * @stable ICU 2.0
- */
-U_STABLE UCollator* U_EXPORT2 
-ucol_safeClone(const UCollator *coll,
-               void            *stackBuffer,
-               int32_t         *pBufferSize,
-               UErrorCode      *status);
-
-/** default memory size for the new clone. It needs to be this large for os/400 large pointers 
- * @stable ICU 2.0
- */
-#define U_COL_SAFECLONE_BUFFERSIZE 512
-
-/**
- * Returns current rules. Delta defines whether full rules are returned or just the tailoring. 
- * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough 
- * to store rules, will store up to available space.
- * @param coll collator to get the rules from
- * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 
- * @param buffer buffer to store the result in. If NULL, you'll get no rules.
- * @param bufferLen lenght of buffer to store rules in. If less then needed you'll get only the part that fits in.
- * @return current rules
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
-
-/**
- * gets the locale name of the collator. If the collator
- * is instantiated from the rules, then this function returns
- * NULL.
- * @param coll The UCollator for which the locale is needed
- * @param type You can choose between requested, valid and actual
- *             locale. For description see the definition of
- *             ULocDataLocaleType in uloc.h
- * @param status error code of the operation
- * @return real locale name from which the collation data comes. 
- *         If the collator was instantiated from rules, returns
- *         NULL.
- * @deprecated ICU 2.8 Use ucol_getLocaleByType instead
- */
-U_DEPRECATED const char * U_EXPORT2
-ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
-
-
-/**
- * gets the locale name of the collator. If the collator
- * is instantiated from the rules, then this function returns
- * NULL.
- * @param coll The UCollator for which the locale is needed
- * @param type You can choose between requested, valid and actual
- *             locale. For description see the definition of
- *             ULocDataLocaleType in uloc.h
- * @param status error code of the operation
- * @return real locale name from which the collation data comes. 
- *         If the collator was instantiated from rules, returns
- *         NULL.
- * @stable ICU 2.8
- */
-U_STABLE const char * U_EXPORT2
-ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
-
-/**
- * Get an Unicode set that contains all the characters and sequences tailored in 
- * this collator. The result must be disposed of by using uset_close.
- * @param coll        The UCollator for which we want to get tailored chars
- * @param status      error code of the operation
- * @return a pointer to newly created USet. Must be be disposed by using uset_close
- * @see ucol_openRules
- * @see uset_close
- * @stable ICU 2.4
- */
-U_STABLE USet * U_EXPORT2
-ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
-
-/**
- * Universal attribute getter that returns UCOL_DEFAULT if the value is default
- * @param coll collator which attributes are to be changed
- * @param attr attribute type
- * @return attribute value or UCOL_DEFAULT if the value is default
- * @param status to indicate whether the operation went on smoothly or there were errors
- * @see UColAttribute
- * @see UColAttributeValue
- * @see ucol_setAttribute
- * @internal ICU 3.0
- */
-U_INTERNAL UColAttributeValue  U_EXPORT2
-ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status);
-
-/** Check whether two collators are equal. Collators are considered equal if they
- *  will sort strings the same. This means that both the current attributes and the
- *  rules must be equivalent. Currently used for RuleBasedCollator::operator==.
- *  @param source first collator
- *  @param target second collator
- *  @return TRUE or FALSE
- *  @internal ICU 3.0
- */
-U_INTERNAL UBool U_EXPORT2
-ucol_equals(const UCollator *source, const UCollator *target);
-
-/** Calculates the set of unsafe code points, given a collator.
- *   A character is unsafe if you could append any character and cause the ordering to alter significantly.
- *   Collation sorts in normalized order, so anything that rearranges in normalization can cause this.
- *   Thus if you have a character like a_umlaut, and you add a lower_dot to it,
- *   then it normalizes to a_lower_dot + umlaut, and sorts differently.
- *  @param coll Collator
- *  @param unsafe a fill-in set to receive the unsafe points
- *  @param status for catching errors
- *  @return number of elements in the set
- *  @internal ICU 3.0
- */
-U_INTERNAL int32_t U_EXPORT2
-ucol_getUnsafeSet( const UCollator *coll,
-                  USet *unsafe,
-                  UErrorCode *status);
-
-/** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away.
- * @internal ICU 3.2.1
- */
-U_INTERNAL void U_EXPORT2
-ucol_forgetUCA(void);
-
-/** Touches all resources needed for instantiating a collator from a short string definition,
- *  thus filling up the cache.
- * @param definition A short string containing a locale and a set of attributes. 
- *                   Attributes not explicitly mentioned are left at the default
- *                   state for a locale.
- * @param parseError if not NULL, structure that will get filled with error's pre
- *                   and post context in case of error.
- * @param forceDefaults if FALSE, the settings that are the same as the collator 
- *                   default settings will not be applied (for example, setting
- *                   French secondary on a French collator would not be executed). 
- *                   If TRUE, all the settings will be applied regardless of the 
- *                   collator default value. If the definition
- *                   strings are to be cached, should be set to FALSE.
- * @param status     Error code. Apart from regular error conditions connected to 
- *                   instantiating collators (like out of memory or similar), this
- *                   API will return an error if an invalid attribute or attribute/value
- *                   combination is specified.
- * @see ucol_openFromShortString
- * @internal ICU 3.2.1
- */
-U_INTERNAL void U_EXPORT2
-ucol_prepareShortStringOpen( const char *definition,
-                          UBool forceDefaults,
-                          UParseError *parseError,
-                          UErrorCode *status);
-
-/** Creates a binary image of a collator. This binary image can be stored and 
- *  later used to instantiate a collator using ucol_openBinary.
- *  This API supports preflighting.
- *  @param coll Collator
- *  @param buffer a fill-in buffer to receive the binary image
- *  @param capacity capacity of the destination buffer
- *  @param status for catching errors
- *  @return size of the image
- *  @see ucol_openBinary
- *  @stable ICU 3.2
- */
-U_STABLE int32_t U_EXPORT2
-ucol_cloneBinary(const UCollator *coll,
-                 uint8_t *buffer, int32_t capacity,
-                 UErrorCode *status);
-
-/** Opens a collator from a collator binary image created using
- *  ucol_cloneBinary. Binary image used in instantiation of the 
- *  collator remains owned by the user and should stay around for 
- *  the lifetime of the collator. The API also takes a base collator
- *  which usualy should be UCA.
- *  @param bin binary image owned by the user and required through the
- *             lifetime of the collator
- *  @param length size of the image. If negative, the API will try to
- *                figure out the length of the image
- *  @param base fallback collator, usually UCA. Base is required to be
- *              present through the lifetime of the collator. Currently 
- *              it cannot be NULL.
- *  @param status for catching errors
- *  @return newly created collator
- *  @see ucol_cloneBinary
- *  @stable ICU 3.2
- */
-U_STABLE UCollator* U_EXPORT2
-ucol_openBinary(const uint8_t *bin, int32_t length, 
-                const UCollator *base, 
-                UErrorCode *status);
-
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-#endif
-

Copied: MacRuby/trunk/icu-1060/unicode/ucol.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ucol.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ucol.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ucol.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1132 @@
+/*
+*******************************************************************************
+* Copyright (c) 1996-2008, International Business Machines Corporation and others.
+* All Rights Reserved.
+*******************************************************************************
+*/
+
+#ifndef UCOL_H
+#define UCOL_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/unorm.h"
+#include "unicode/parseerr.h"
+#include "unicode/uloc.h"
+#include "unicode/uset.h"
+
+/**
+ * \file
+ * \brief C API: Collator 
+ *
+ * <h2> Collator C API </h2>
+ *
+ * The C API for Collator performs locale-sensitive
+ * string comparison. You use this service to build
+ * searching and sorting routines for natural language text.
+ * <em>Important: </em>The ICU collation service has been reimplemented 
+ * in order to achieve better performance and UCA compliance. 
+ * For details, see the 
+ * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
+ * collation design document</a>.
+ * <p>
+ * For more information about the collation service see 
+ * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
+ * <p>
+ * Collation service provides correct sorting orders for most locales supported in ICU. 
+ * If specific data for a locale is not available, the orders eventually falls back
+ * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 
+ * <p>
+ * Sort ordering may be customized by providing your own set of rules. For more on
+ * this subject see the 
+ * <a href="http://icu-project.org/userguide/Collate_Customization.html">
+ * Collation customization</a> section of the users guide.
+ * <p>
+ * @see         UCollationResult
+ * @see         UNormalizationMode
+ * @see         UCollationStrength
+ * @see         UCollationElements
+ */
+
+/** A collator.
+*  For usage in C programs.
+*/
+struct UCollator;
+/** structure representing a collator object instance 
+ * @stable ICU 2.0
+ */
+typedef struct UCollator UCollator;
+
+
+/**
+ * UCOL_LESS is returned if source string is compared to be less than target
+ * string in the u_strcoll() method.
+ * UCOL_EQUAL is returned if source string is compared to be equal to target
+ * string in the u_strcoll() method.
+ * UCOL_GREATER is returned if source string is compared to be greater than
+ * target string in the u_strcoll() method.
+ * @see u_strcoll()
+ * <p>
+ * Possible values for a comparison result 
+ * @stable ICU 2.0
+ */
+typedef enum {
+  /** string a == string b */
+  UCOL_EQUAL    = 0,
+  /** string a > string b */
+  UCOL_GREATER    = 1,
+  /** string a < string b */
+  UCOL_LESS    = -1
+} UCollationResult ;
+
+
+/** Enum containing attribute values for controling collation behavior.
+ * Here are all the allowable values. Not every attribute can take every value. The only
+ * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined  
+ * value for that locale 
+ * @stable ICU 2.0
+ */
+typedef enum {
+  /** accepted by most attributes */
+  UCOL_DEFAULT = -1,
+
+  /** Primary collation strength */
+  UCOL_PRIMARY = 0,
+  /** Secondary collation strength */
+  UCOL_SECONDARY = 1,
+  /** Tertiary collation strength */
+  UCOL_TERTIARY = 2,
+  /** Default collation strength */
+  UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
+  UCOL_CE_STRENGTH_LIMIT,
+  /** Quaternary collation strength */
+  UCOL_QUATERNARY=3,
+  /** Identical collation strength */
+  UCOL_IDENTICAL=15,
+  UCOL_STRENGTH_LIMIT,
+
+  /** Turn the feature off - works for UCOL_FRENCH_COLLATION, 
+      UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
+      & UCOL_DECOMPOSITION_MODE*/
+  UCOL_OFF = 16,
+  /** Turn the feature on - works for UCOL_FRENCH_COLLATION, 
+      UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
+      & UCOL_DECOMPOSITION_MODE*/
+  UCOL_ON = 17,
+  
+  /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
+  UCOL_SHIFTED = 20,
+  /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
+  UCOL_NON_IGNORABLE = 21,
+
+  /** Valid for UCOL_CASE_FIRST - 
+      lower case sorts before upper case */
+  UCOL_LOWER_FIRST = 24,
+  /** upper case sorts before lower case */
+  UCOL_UPPER_FIRST = 25,
+
+  UCOL_ATTRIBUTE_VALUE_COUNT
+
+} UColAttributeValue;
+
+/**
+ * Base letter represents a primary difference.  Set comparison
+ * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
+ * Use this to set the strength of a Collator object.
+ * Example of primary difference, "abc" &lt; "abd"
+ * 
+ * Diacritical differences on the same base letter represent a secondary
+ * difference.  Set comparison level to UCOL_SECONDARY to ignore tertiary
+ * differences. Use this to set the strength of a Collator object.
+ * Example of secondary difference, "&auml;" >> "a".
+ *
+ * Uppercase and lowercase versions of the same character represents a
+ * tertiary difference.  Set comparison level to UCOL_TERTIARY to include
+ * all comparison differences. Use this to set the strength of a Collator
+ * object.
+ * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
+ *
+ * Two characters are considered "identical" when they have the same
+ * unicode spellings.  UCOL_IDENTICAL.
+ * For example, "&auml;" == "&auml;".
+ *
+ * UCollationStrength is also used to determine the strength of sort keys 
+ * generated from UCollator objects
+ * These values can be now found in the UColAttributeValue enum.
+ * @stable ICU 2.0
+ **/
+typedef UColAttributeValue UCollationStrength;
+
+/** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
+ * value, as well as the values specific to each one. 
+ * @stable ICU 2.0
+ */
+typedef enum {
+     /** Attribute for direction of secondary weights - used in French.
+      * Acceptable values are UCOL_ON, which results in secondary weights
+      * being considered backwards and UCOL_OFF which treats secondary
+      * weights in the order they appear.*/
+     UCOL_FRENCH_COLLATION, 
+     /** Attribute for handling variable elements.
+      * Acceptable values are UCOL_NON_IGNORABLE (default)
+      * which treats all the codepoints with non-ignorable 
+      * primary weights in the same way,
+      * and UCOL_SHIFTED which causes codepoints with primary 
+      * weights that are equal or below the variable top value
+      * to be ignored on primary level and moved to the quaternary 
+      * level.*/
+     UCOL_ALTERNATE_HANDLING, 
+     /** Controls the ordering of upper and lower case letters.
+      * Acceptable values are UCOL_OFF (default), which orders
+      * upper and lower case letters in accordance to their tertiary
+      * weights, UCOL_UPPER_FIRST which forces upper case letters to 
+      * sort before lower case letters, and UCOL_LOWER_FIRST which does 
+      * the opposite. */
+     UCOL_CASE_FIRST, 
+     /** Controls whether an extra case level (positioned before the third
+      * level) is generated or not. Acceptable values are UCOL_OFF (default), 
+      * when case level is not generated, and UCOL_ON which causes the case
+      * level to be generated. Contents of the case level are affected by
+      * the value of UCOL_CASE_FIRST attribute. A simple way to ignore 
+      * accent differences in a string is to set the strength to UCOL_PRIMARY
+      * and enable case level. */
+     UCOL_CASE_LEVEL,
+     /** Controls whether the normalization check and necessary normalizations
+      * are performed. When set to UCOL_OFF (default) no normalization check
+      * is performed. The correctness of the result is guaranteed only if the 
+      * input data is in so-called FCD form (see users manual for more info).
+      * When set to UCOL_ON, an incremental check is performed to see whether
+      * the input data is in the FCD form. If the data is not in the FCD form,
+      * incremental NFD normalization is performed. */
+     UCOL_NORMALIZATION_MODE, 
+     /** An alias for UCOL_NORMALIZATION_MODE attribute */
+     UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
+     /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
+      * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
+      * for most locales (except Japanese) is tertiary. Quaternary strength 
+      * is useful when combined with shifted setting for alternate handling
+      * attribute and for JIS x 4061 collation, when it is used to distinguish
+      * between Katakana  and Hiragana (this is achieved by setting the 
+      * UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level
+      * is affected only by the number of non ignorable code points in
+      * the string. Identical strength is rarely useful, as it amounts 
+      * to codepoints of the NFD form of the string. */
+     UCOL_STRENGTH,  
+     /** When turned on, this attribute positions Hiragana before all  
+      * non-ignorables on quaternary level This is a sneaky way to produce JIS
+      * sort order */
+     UCOL_HIRAGANA_QUATERNARY_MODE,
+     /** When turned on, this attribute generates a collation key
+      * for the numeric value of substrings of digits.
+      * This is a way to get '100' to sort AFTER '2'. */
+     UCOL_NUMERIC_COLLATION, 
+     UCOL_ATTRIBUTE_COUNT
+} UColAttribute;
+
+/** Options for retrieving the rule string 
+ *  @stable ICU 2.0
+ */
+typedef enum {
+  /** Retrieve tailoring only */
+  UCOL_TAILORING_ONLY, 
+  /** Retrieve UCA rules and tailoring */
+  UCOL_FULL_RULES 
+} UColRuleOption ;
+
+/**
+ * Open a UCollator for comparing strings.
+ * The UCollator pointer is used in all the calls to the Collation 
+ * service. After finished, collator must be disposed of by calling
+ * {@link #ucol_close }.
+ * @param loc The locale containing the required collation rules. 
+ *            Special values for locales can be passed in - 
+ *            if NULL is passed for the locale, the default locale
+ *            collation rules will be used. If empty string ("") or
+ *            "root" are passed, UCA rules will be used.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return A pointer to a UCollator, or 0 if an error occurred.
+ * @see ucol_openRules
+ * @see ucol_safeClone
+ * @see ucol_close
+ * @stable ICU 2.0
+ */
+U_STABLE UCollator* U_EXPORT2 
+ucol_open(const char *loc, UErrorCode *status);
+
+/**
+ * Produce an UCollator instance according to the rules supplied.
+ * The rules are used to change the default ordering, defined in the
+ * UCA in a process called tailoring. The resulting UCollator pointer
+ * can be used in the same way as the one obtained by {@link #ucol_strcoll }.
+ * @param rules A string describing the collation rules. For the syntax
+ *              of the rules please see users guide.
+ * @param rulesLength The length of rules, or -1 if null-terminated.
+ * @param normalizationMode The normalization mode: One of
+ *             UCOL_OFF     (expect the text to not need normalization),
+ *             UCOL_ON      (normalize), or
+ *             UCOL_DEFAULT (set the mode according to the rules)
+ * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
+ * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
+ * @param parseError  A pointer to UParseError to recieve information about errors
+ *                    occurred during parsing. This argument can currently be set
+ *                    to NULL, but at users own risk. Please provide a real structure.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case
+ *         of error - please use status argument to check for errors.
+ * @see ucol_open
+ * @see ucol_safeClone
+ * @see ucol_close
+ * @stable ICU 2.0
+ */
+U_STABLE UCollator* U_EXPORT2 
+ucol_openRules( const UChar        *rules,
+                int32_t            rulesLength,
+                UColAttributeValue normalizationMode,
+                UCollationStrength strength,
+                UParseError        *parseError,
+                UErrorCode         *status);
+
+/** 
+ * Open a collator defined by a short form string.
+ * The structure and the syntax of the string is defined in the "Naming collators"
+ * section of the users guide: 
+ * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
+ * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final
+ * strength will be 3. 3066bis locale overrides individual locale parts.
+ * The call to this function is equivalent to a call to ucol_open, followed by a 
+ * series of calls to ucol_setAttribute and ucol_setVariableTop.
+ * @param definition A short string containing a locale and a set of attributes. 
+ *                   Attributes not explicitly mentioned are left at the default
+ *                   state for a locale.
+ * @param parseError if not NULL, structure that will get filled with error's pre
+ *                   and post context in case of error.
+ * @param forceDefaults if FALSE, the settings that are the same as the collator 
+ *                   default settings will not be applied (for example, setting
+ *                   French secondary on a French collator would not be executed). 
+ *                   If TRUE, all the settings will be applied regardless of the 
+ *                   collator default value. If the definition
+ *                   strings are to be cached, should be set to FALSE.
+ * @param status     Error code. Apart from regular error conditions connected to 
+ *                   instantiating collators (like out of memory or similar), this
+ *                   API will return an error if an invalid attribute or attribute/value
+ *                   combination is specified.
+ * @return           A pointer to a UCollator or 0 if an error occured (including an 
+ *                   invalid attribute).
+ * @see ucol_open
+ * @see ucol_setAttribute
+ * @see ucol_setVariableTop
+ * @see ucol_getShortDefinitionString
+ * @see ucol_normalizeShortDefinitionString
+ * @stable ICU 3.0
+ *
+ */
+U_STABLE UCollator* U_EXPORT2
+ucol_openFromShortString( const char *definition,
+                          UBool forceDefaults,
+                          UParseError *parseError,
+                          UErrorCode *status);
+
+/**
+ * Get a set containing the contractions defined by the collator. The set includes
+ * both the UCA contractions and the contractions defined by the collator. This set
+ * will contain only strings. If a tailoring explicitly suppresses contractions from 
+ * the UCA (like Russian), removed contractions will not be in the resulting set.
+ * @param coll collator 
+ * @param conts the set to hold the result. It gets emptied before
+ *              contractions are added. 
+ * @param status to hold the error code
+ * @return the size of the contraction set
+ *
+ * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead
+ */
+U_DEPRECATED int32_t U_EXPORT2
+ucol_getContractions( const UCollator *coll,
+                  USet *conts,
+                  UErrorCode *status);
+
+/**
+ * Get a set containing the expansions defined by the collator. The set includes
+ * both the UCA expansions and the expansions defined by the tailoring
+ * @param coll collator
+ * @param contractions if not NULL, the set to hold the contractions
+ * @param expansions if not NULL, the set to hold the expansions
+ * @param addPrefixes add the prefix contextual elements to contractions
+ * @param status to hold the error code
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+ucol_getContractionsAndExpansions( const UCollator *coll,
+                  USet *contractions, USet *expansions,
+                  UBool addPrefixes, UErrorCode *status);
+
+/** 
+ * Close a UCollator.
+ * Once closed, a UCollator should not be used. Every open collator should
+ * be closed. Otherwise, a memory leak will result.
+ * @param coll The UCollator to close.
+ * @see ucol_open
+ * @see ucol_openRules
+ * @see ucol_safeClone
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucol_close(UCollator *coll);
+
+/**
+ * Compare two strings.
+ * The strings will be compared using the options already specified.
+ * @param coll The UCollator containing the comparison rules.
+ * @param source The source string.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param target The target string.
+ * @param targetLength The length of target, or -1 if null-terminated.
+ * @return The result of comparing the strings; one of UCOL_EQUAL,
+ * UCOL_GREATER, UCOL_LESS
+ * @see ucol_greater
+ * @see ucol_greaterOrEqual
+ * @see ucol_equal
+ * @stable ICU 2.0
+ */
+U_STABLE UCollationResult U_EXPORT2 
+ucol_strcoll(    const    UCollator    *coll,
+        const    UChar        *source,
+        int32_t            sourceLength,
+        const    UChar        *target,
+        int32_t            targetLength);
+
+/**
+ * Determine if one string is greater than another.
+ * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
+ * @param coll The UCollator containing the comparison rules.
+ * @param source The source string.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param target The target string.
+ * @param targetLength The length of target, or -1 if null-terminated.
+ * @return TRUE if source is greater than target, FALSE otherwise.
+ * @see ucol_strcoll
+ * @see ucol_greaterOrEqual
+ * @see ucol_equal
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2 
+ucol_greater(const UCollator *coll,
+             const UChar     *source, int32_t sourceLength,
+             const UChar     *target, int32_t targetLength);
+
+/**
+ * Determine if one string is greater than or equal to another.
+ * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS
+ * @param coll The UCollator containing the comparison rules.
+ * @param source The source string.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param target The target string.
+ * @param targetLength The length of target, or -1 if null-terminated.
+ * @return TRUE if source is greater than or equal to target, FALSE otherwise.
+ * @see ucol_strcoll
+ * @see ucol_greater
+ * @see ucol_equal
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2 
+ucol_greaterOrEqual(const UCollator *coll,
+                    const UChar     *source, int32_t sourceLength,
+                    const UChar     *target, int32_t targetLength);
+
+/**
+ * Compare two strings for equality.
+ * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL
+ * @param coll The UCollator containing the comparison rules.
+ * @param source The source string.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param target The target string.
+ * @param targetLength The length of target, or -1 if null-terminated.
+ * @return TRUE if source is equal to target, FALSE otherwise
+ * @see ucol_strcoll
+ * @see ucol_greater
+ * @see ucol_greaterOrEqual
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2 
+ucol_equal(const UCollator *coll,
+           const UChar     *source, int32_t sourceLength,
+           const UChar     *target, int32_t targetLength);
+
+/**
+ * Compare two UTF-8 encoded trings.
+ * The strings will be compared using the options already specified.
+ * @param coll The UCollator containing the comparison rules.
+ * @param sIter The source string iterator.
+ * @param tIter The target string iterator.
+ * @return The result of comparing the strings; one of UCOL_EQUAL,
+ * UCOL_GREATER, UCOL_LESS
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @see ucol_strcoll
+ * @stable ICU 2.6
+ */
+U_STABLE UCollationResult U_EXPORT2 
+ucol_strcollIter(  const    UCollator    *coll,
+                  UCharIterator *sIter,
+                  UCharIterator *tIter,
+                  UErrorCode *status);
+
+/**
+ * Get the collation strength used in a UCollator.
+ * The strength influences how strings are compared.
+ * @param coll The UCollator to query.
+ * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
+ * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
+ * @see ucol_setStrength
+ * @stable ICU 2.0
+ */
+U_STABLE UCollationStrength U_EXPORT2 
+ucol_getStrength(const UCollator *coll);
+
+/**
+ * Set the collation strength used in a UCollator.
+ * The strength influences how strings are compared.
+ * @param coll The UCollator to set.
+ * @param strength The desired collation strength; one of UCOL_PRIMARY, 
+ * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
+ * @see ucol_getStrength
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucol_setStrength(UCollator *coll,
+                 UCollationStrength strength);
+
+/**
+ * Get the display name for a UCollator.
+ * The display name is suitable for presentation to a user.
+ * @param objLoc The locale of the collator in question.
+ * @param dispLoc The locale for display.
+ * @param result A pointer to a buffer to receive the attribute.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return The total buffer size needed; if greater than resultLength,
+ * the output was truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucol_getDisplayName(    const    char        *objLoc,
+            const    char        *dispLoc,
+            UChar             *result,
+            int32_t         resultLength,
+            UErrorCode        *status);
+
+/**
+ * Get a locale for which collation rules are available.
+ * A UCollator in a locale returned by this function will perform the correct
+ * collation for the locale.
+ * @param index The index of the desired locale.
+ * @return A locale for which collation rules are available, or 0 if none.
+ * @see ucol_countAvailable
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2 
+ucol_getAvailable(int32_t index);
+
+/**
+ * Determine how many locales have collation rules available.
+ * This function is most useful as determining the loop ending condition for
+ * calls to {@link #ucol_getAvailable }.
+ * @return The number of locales for which collation rules are available.
+ * @see ucol_getAvailable
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucol_countAvailable(void);
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * Create a string enumerator of all locales for which a valid
+ * collator may be opened.
+ * @param status input-output error code
+ * @return a string enumeration over locale strings. The caller is
+ * responsible for closing the result.
+ * @stable ICU 3.0
+ */
+U_STABLE UEnumeration* U_EXPORT2
+ucol_openAvailableLocales(UErrorCode *status);
+#endif
+
+/**
+ * Create a string enumerator of all possible keywords that are relevant to
+ * collation. At this point, the only recognized keyword for this
+ * service is "collation".
+ * @param status input-output error code
+ * @return a string enumeration over locale strings. The caller is
+ * responsible for closing the result.
+ * @stable ICU 3.0
+ */
+U_STABLE UEnumeration* U_EXPORT2
+ucol_getKeywords(UErrorCode *status);
+
+/**
+ * Given a keyword, create a string enumeration of all values
+ * for that keyword that are currently in use.
+ * @param keyword a particular keyword as enumerated by
+ * ucol_getKeywords. If any other keyword is passed in, *status is set
+ * to U_ILLEGAL_ARGUMENT_ERROR.
+ * @param status input-output error code
+ * @return a string enumeration over collation keyword values, or NULL
+ * upon error. The caller is responsible for closing the result.
+ * @stable ICU 3.0
+ */
+U_STABLE UEnumeration* U_EXPORT2
+ucol_getKeywordValues(const char *keyword, UErrorCode *status);
+
+/**
+ * Return the functionally equivalent locale for the given
+ * requested locale, with respect to given keyword, for the
+ * collation service.  If two locales return the same result, then
+ * collators instantiated for these locales will behave
+ * equivalently.  The converse is not always true; two collators
+ * may in fact be equivalent, but return different results, due to
+ * internal details.  The return result has no other meaning than
+ * that stated above, and implies nothing as to the relationship
+ * between the two locales.  This is intended for use by
+ * applications who wish to cache collators, or otherwise reuse
+ * collators when possible.  The functional equivalent may change
+ * over time.  For more information, please see the <a
+ * href="http://icu-project.org/userguide/locale.html#services">
+ * Locales and Services</a> section of the ICU User Guide.
+ * @param result fillin for the functionally equivalent locale
+ * @param resultCapacity capacity of the fillin buffer
+ * @param keyword a particular keyword as enumerated by
+ * ucol_getKeywords.
+ * @param locale the requested locale
+ * @param isAvailable if non-NULL, pointer to a fillin parameter that
+ * indicates whether the requested locale was 'available' to the
+ * collation service. A locale is defined as 'available' if it
+ * physically exists within the collation locale data.
+ * @param status pointer to input-output error code
+ * @return the actual buffer size needed for the locale.  If greater
+ * than resultCapacity, the returned full name will be truncated and
+ * an error code will be returned.
+ * @stable ICU 3.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
+                             const char* keyword, const char* locale,
+                             UBool* isAvailable, UErrorCode* status);
+
+/**
+ * Get the collation rules from a UCollator.
+ * The rules will follow the rule syntax.
+ * @param coll The UCollator to query.
+ * @param length 
+ * @return The collation rules.
+ * @stable ICU 2.0
+ */
+U_STABLE const UChar* U_EXPORT2 
+ucol_getRules(    const    UCollator    *coll, 
+        int32_t            *length);
+
+/** Get the short definition string for a collator. This API harvests the collator's
+ *  locale and the attribute set and produces a string that can be used for opening 
+ *  a collator with the same properties using the ucol_openFromShortString API.
+ *  This string will be normalized.
+ *  The structure and the syntax of the string is defined in the "Naming collators"
+ *  section of the users guide: 
+ *  http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators
+ *  This API supports preflighting.
+ *  @param coll a collator
+ *  @param locale a locale that will appear as a collators locale in the resulting
+ *                short string definition. If NULL, the locale will be harvested 
+ *                from the collator.
+ *  @param buffer space to hold the resulting string
+ *  @param capacity capacity of the buffer
+ *  @param status for returning errors. All the preflighting errors are featured
+ *  @return length of the resulting string
+ *  @see ucol_openFromShortString
+ *  @see ucol_normalizeShortDefinitionString
+ *  @stable ICU 3.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_getShortDefinitionString(const UCollator *coll,
+                              const char *locale,
+                              char *buffer,
+                              int32_t capacity,
+                              UErrorCode *status);
+
+/** Verifies and normalizes short definition string.
+ *  Normalized short definition string has all the option sorted by the argument name,
+ *  so that equivalent definition strings are the same. 
+ *  This API supports preflighting.
+ *  @param source definition string
+ *  @param destination space to hold the resulting string
+ *  @param capacity capacity of the buffer
+ *  @param parseError if not NULL, structure that will get filled with error's pre
+ *                   and post context in case of error.
+ *  @param status     Error code. This API will return an error if an invalid attribute 
+ *                    or attribute/value combination is specified. All the preflighting 
+ *                    errors are also featured
+ *  @return length of the resulting normalized string.
+ *
+ *  @see ucol_openFromShortString
+ *  @see ucol_getShortDefinitionString
+ * 
+ *  @stable ICU 3.0
+ */
+
+U_STABLE int32_t U_EXPORT2
+ucol_normalizeShortDefinitionString(const char *source,
+                                    char *destination,
+                                    int32_t capacity,
+                                    UParseError *parseError,
+                                    UErrorCode *status);
+
+
+/**
+ * Get a sort key for a string from a UCollator.
+ * Sort keys may be compared using <TT>strcmp</TT>.
+ * @param coll The UCollator containing the collation rules.
+ * @param source The string to transform.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param result A pointer to a buffer to receive the attribute.
+ * @param resultLength The maximum size of result.
+ * @return The size needed to fully store the sort key.
+ *      If there was an internal error generating the sort key,
+ *      a zero value is returned.
+ * @see ucol_keyHashCode
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucol_getSortKey(const    UCollator    *coll,
+        const    UChar        *source,
+        int32_t        sourceLength,
+        uint8_t        *result,
+        int32_t        resultLength);
+
+
+/** Gets the next count bytes of a sort key. Caller needs
+ *  to preserve state array between calls and to provide
+ *  the same type of UCharIterator set with the same string.
+ *  The destination buffer provided must be big enough to store
+ *  the number of requested bytes. Generated sortkey is not 
+ *  compatible with sortkeys generated using ucol_getSortKey
+ *  API, since we don't do any compression. If uncompressed
+ *  sortkeys are required, this API can be used.
+ *  @param coll The UCollator containing the collation rules.
+ *  @param iter UCharIterator containing the string we need 
+ *              the sort key to be calculated for.
+ *  @param state Opaque state of sortkey iteration.
+ *  @param dest Buffer to hold the resulting sortkey part
+ *  @param count number of sort key bytes required.
+ *  @param status error code indicator.
+ *  @return the actual number of bytes of a sortkey. It can be
+ *          smaller than count if we have reached the end of 
+ *          the sort key.
+ *  @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2 
+ucol_nextSortKeyPart(const UCollator *coll,
+                     UCharIterator *iter,
+                     uint32_t state[2],
+                     uint8_t *dest, int32_t count,
+                     UErrorCode *status);
+
+/** enum that is taken by ucol_getBound API 
+ * See below for explanation                
+ * do not change the values assigned to the 
+ * members of this enum. Underlying code    
+ * depends on them having these numbers     
+ * @stable ICU 2.0
+ */
+typedef enum {
+  /** lower bound */
+  UCOL_BOUND_LOWER = 0,
+  /** upper bound that will match strings of exact size */
+  UCOL_BOUND_UPPER = 1,
+  /** upper bound that will match all the strings that have the same initial substring as the given string */
+  UCOL_BOUND_UPPER_LONG = 2,
+  UCOL_BOUND_VALUE_COUNT
+} UColBoundMode;
+
+/**
+ * Produce a bound for a given sortkey and a number of levels.
+ * Return value is always the number of bytes needed, regardless of 
+ * whether the result buffer was big enough or even valid.<br>
+ * Resulting bounds can be used to produce a range of strings that are
+ * between upper and lower bounds. For example, if bounds are produced
+ * for a sortkey of string "smith", strings between upper and lower 
+ * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
+ * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
+ * is produced, strings matched would be as above. However, if bound
+ * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
+ * also match "Smithsonian" and similar.<br>
+ * For more on usage, see example in cintltst/capitst.c in procedure
+ * TestBounds.
+ * Sort keys may be compared using <TT>strcmp</TT>.
+ * @param source The source sortkey.
+ * @param sourceLength The length of source, or -1 if null-terminated. 
+ *                     (If an unmodified sortkey is passed, it is always null 
+ *                      terminated).
+ * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 
+ *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that 
+ *                  produces upper bound that matches strings of the same length 
+ *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the 
+ *                  same starting substring as the source string.
+ * @param noOfLevels  Number of levels required in the resulting bound (for most 
+ *                    uses, the recommended value is 1). See users guide for 
+ *                    explanation on number of levels a sortkey can have.
+ * @param result A pointer to a buffer to receive the resulting sortkey.
+ * @param resultLength The maximum size of result.
+ * @param status Used for returning error code if something went wrong. If the 
+ *               number of levels requested is higher than the number of levels
+ *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 
+ *               issued.
+ * @return The size needed to fully store the bound. 
+ * @see ucol_keyHashCode
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2 
+ucol_getBound(const uint8_t       *source,
+        int32_t             sourceLength,
+        UColBoundMode       boundType,
+        uint32_t            noOfLevels,
+        uint8_t             *result,
+        int32_t             resultLength,
+        UErrorCode          *status);
+        
+/**
+ * Gets the version information for a Collator. Version is currently
+ * an opaque 32-bit number which depends, among other things, on major
+ * versions of the collator tailoring and UCA.
+ * @param coll The UCollator to query.
+ * @param info the version # information, the result will be filled in
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_getVersion(const UCollator* coll, UVersionInfo info);
+
+/**
+ * Gets the UCA version information for a Collator. Version is the
+ * UCA version number (3.1.1, 4.0).
+ * @param coll The UCollator to query.
+ * @param info the version # information, the result will be filled in
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
+
+/** 
+ * Merge two sort keys. The levels are merged with their corresponding counterparts
+ * (primaries with primaries, secondaries with secondaries etc.). Between the values
+ * from the same level a separator is inserted.
+ * example (uncompressed): 
+ * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00
+ * will be merged as 
+ * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
+ * This allows for concatenating of first and last names for sorting, among other things.
+ * If the destination buffer is not big enough, the results are undefined.
+ * If any of source lengths are zero or any of source pointers are NULL/undefined, 
+ * result is of size zero.
+ * @param src1 pointer to the first sortkey
+ * @param src1Length length of the first sortkey
+ * @param src2 pointer to the second sortkey
+ * @param src2Length length of the second sortkey
+ * @param dest buffer to hold the result
+ * @param destCapacity size of the buffer for the result
+ * @return size of the result. If the buffer is big enough size is always
+ *         src1Length+src2Length-1
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
+                   const uint8_t *src2, int32_t src2Length,
+                   uint8_t *dest, int32_t destCapacity);
+
+/**
+ * Universal attribute setter
+ * @param coll collator which attributes are to be changed
+ * @param attr attribute type 
+ * @param value attribute value
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @see UColAttribute
+ * @see UColAttributeValue
+ * @see ucol_getAttribute
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
+
+/**
+ * Universal attribute getter
+ * @param coll collator which attributes are to be changed
+ * @param attr attribute type
+ * @return attribute value
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @see UColAttribute
+ * @see UColAttributeValue
+ * @see ucol_setAttribute
+ * @stable ICU 2.0
+ */
+U_STABLE UColAttributeValue  U_EXPORT2 
+ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
+
+/** Variable top
+ * is a two byte primary value which causes all the codepoints with primary values that
+ * are less or equal than the variable top to be shifted when alternate handling is set
+ * to UCOL_SHIFTED.
+ * Sets the variable top to a collation element value of a string supplied. 
+ * @param coll collator which variable top needs to be changed
+ * @param varTop one or more (if contraction) UChars to which the variable top should be set
+ * @param len length of variable top string. If -1 it is considered to be zero terminated.
+ * @param status error code. If error code is set, the return value is undefined. 
+ *               Errors set by this function are: <br>
+ *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such 
+ *    a contraction<br>
+ *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
+ * @return a 32 bit value containing the value of the variable top in upper 16 bits. 
+ *         Lower 16 bits are undefined
+ * @see ucol_getVariableTop
+ * @see ucol_restoreVariableTop
+ * @stable ICU 2.0
+ */
+U_STABLE uint32_t U_EXPORT2 
+ucol_setVariableTop(UCollator *coll, 
+                    const UChar *varTop, int32_t len, 
+                    UErrorCode *status);
+
+/** 
+ * Gets the variable top value of a Collator. 
+ * Lower 16 bits are undefined and should be ignored.
+ * @param coll collator which variable top needs to be retrieved
+ * @param status error code (not changed by function). If error code is set, 
+ *               the return value is undefined.
+ * @return the variable top value of a Collator.
+ * @see ucol_setVariableTop
+ * @see ucol_restoreVariableTop
+ * @stable ICU 2.0
+ */
+U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
+
+/** 
+ * Sets the variable top to a collation element value supplied. Variable top is 
+ * set to the upper 16 bits. 
+ * Lower 16 bits are ignored.
+ * @param coll collator which variable top needs to be changed
+ * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop
+ * @param status error code (not changed by function)
+ * @see ucol_getVariableTop
+ * @see ucol_setVariableTop
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status);
+
+/**
+ * Thread safe cloning operation. The result is a clone of a given collator.
+ * @param coll collator to be cloned
+ * @param stackBuffer user allocated space for the new clone. 
+ * If NULL new memory will be allocated. 
+ *  If buffer is not large enough, new memory will be allocated.
+ *  Clients can use the U_COL_SAFECLONE_BUFFERSIZE. 
+ *  This will probably be enough to avoid memory allocations.
+ * @param pBufferSize pointer to size of allocated space. 
+ *  If *pBufferSize == 0, a sufficient size for use in cloning will 
+ *  be returned ('pre-flighting')
+ *  If *pBufferSize is not enough for a stack-based safe clone, 
+ *  new memory will be allocated.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ *    An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any
+ * allocations were necessary.
+ * @return pointer to the new clone
+ * @see ucol_open
+ * @see ucol_openRules
+ * @see ucol_close
+ * @stable ICU 2.0
+ */
+U_STABLE UCollator* U_EXPORT2 
+ucol_safeClone(const UCollator *coll,
+               void            *stackBuffer,
+               int32_t         *pBufferSize,
+               UErrorCode      *status);
+
+/** default memory size for the new clone. It needs to be this large for os/400 large pointers 
+ * @stable ICU 2.0
+ */
+#define U_COL_SAFECLONE_BUFFERSIZE 512
+
+/**
+ * Returns current rules. Delta defines whether full rules are returned or just the tailoring. 
+ * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough 
+ * to store rules, will store up to available space.
+ * @param coll collator to get the rules from
+ * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 
+ * @param buffer buffer to store the result in. If NULL, you'll get no rules.
+ * @param bufferLen lenght of buffer to store rules in. If less then needed you'll get only the part that fits in.
+ * @return current rules
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
+
+/**
+ * gets the locale name of the collator. If the collator
+ * is instantiated from the rules, then this function returns
+ * NULL.
+ * @param coll The UCollator for which the locale is needed
+ * @param type You can choose between requested, valid and actual
+ *             locale. For description see the definition of
+ *             ULocDataLocaleType in uloc.h
+ * @param status error code of the operation
+ * @return real locale name from which the collation data comes. 
+ *         If the collator was instantiated from rules, returns
+ *         NULL.
+ * @deprecated ICU 2.8 Use ucol_getLocaleByType instead
+ */
+U_DEPRECATED const char * U_EXPORT2
+ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
+
+
+/**
+ * gets the locale name of the collator. If the collator
+ * is instantiated from the rules, then this function returns
+ * NULL.
+ * @param coll The UCollator for which the locale is needed
+ * @param type You can choose between requested, valid and actual
+ *             locale. For description see the definition of
+ *             ULocDataLocaleType in uloc.h
+ * @param status error code of the operation
+ * @return real locale name from which the collation data comes. 
+ *         If the collator was instantiated from rules, returns
+ *         NULL.
+ * @stable ICU 2.8
+ */
+U_STABLE const char * U_EXPORT2
+ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
+
+/**
+ * Get an Unicode set that contains all the characters and sequences tailored in 
+ * this collator. The result must be disposed of by using uset_close.
+ * @param coll        The UCollator for which we want to get tailored chars
+ * @param status      error code of the operation
+ * @return a pointer to newly created USet. Must be be disposed by using uset_close
+ * @see ucol_openRules
+ * @see uset_close
+ * @stable ICU 2.4
+ */
+U_STABLE USet * U_EXPORT2
+ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
+
+/**
+ * Universal attribute getter that returns UCOL_DEFAULT if the value is default
+ * @param coll collator which attributes are to be changed
+ * @param attr attribute type
+ * @return attribute value or UCOL_DEFAULT if the value is default
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @see UColAttribute
+ * @see UColAttributeValue
+ * @see ucol_setAttribute
+ * @internal ICU 3.0
+ */
+U_INTERNAL UColAttributeValue  U_EXPORT2
+ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status);
+
+/** Check whether two collators are equal. Collators are considered equal if they
+ *  will sort strings the same. This means that both the current attributes and the
+ *  rules must be equivalent. Currently used for RuleBasedCollator::operator==.
+ *  @param source first collator
+ *  @param target second collator
+ *  @return TRUE or FALSE
+ *  @internal ICU 3.0
+ */
+U_INTERNAL UBool U_EXPORT2
+ucol_equals(const UCollator *source, const UCollator *target);
+
+/** Calculates the set of unsafe code points, given a collator.
+ *   A character is unsafe if you could append any character and cause the ordering to alter significantly.
+ *   Collation sorts in normalized order, so anything that rearranges in normalization can cause this.
+ *   Thus if you have a character like a_umlaut, and you add a lower_dot to it,
+ *   then it normalizes to a_lower_dot + umlaut, and sorts differently.
+ *  @param coll Collator
+ *  @param unsafe a fill-in set to receive the unsafe points
+ *  @param status for catching errors
+ *  @return number of elements in the set
+ *  @internal ICU 3.0
+ */
+U_INTERNAL int32_t U_EXPORT2
+ucol_getUnsafeSet( const UCollator *coll,
+                  USet *unsafe,
+                  UErrorCode *status);
+
+/** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away.
+ * @internal ICU 3.2.1
+ */
+U_INTERNAL void U_EXPORT2
+ucol_forgetUCA(void);
+
+/** Touches all resources needed for instantiating a collator from a short string definition,
+ *  thus filling up the cache.
+ * @param definition A short string containing a locale and a set of attributes. 
+ *                   Attributes not explicitly mentioned are left at the default
+ *                   state for a locale.
+ * @param parseError if not NULL, structure that will get filled with error's pre
+ *                   and post context in case of error.
+ * @param forceDefaults if FALSE, the settings that are the same as the collator 
+ *                   default settings will not be applied (for example, setting
+ *                   French secondary on a French collator would not be executed). 
+ *                   If TRUE, all the settings will be applied regardless of the 
+ *                   collator default value. If the definition
+ *                   strings are to be cached, should be set to FALSE.
+ * @param status     Error code. Apart from regular error conditions connected to 
+ *                   instantiating collators (like out of memory or similar), this
+ *                   API will return an error if an invalid attribute or attribute/value
+ *                   combination is specified.
+ * @see ucol_openFromShortString
+ * @internal ICU 3.2.1
+ */
+U_INTERNAL void U_EXPORT2
+ucol_prepareShortStringOpen( const char *definition,
+                          UBool forceDefaults,
+                          UParseError *parseError,
+                          UErrorCode *status);
+
+/** Creates a binary image of a collator. This binary image can be stored and 
+ *  later used to instantiate a collator using ucol_openBinary.
+ *  This API supports preflighting.
+ *  @param coll Collator
+ *  @param buffer a fill-in buffer to receive the binary image
+ *  @param capacity capacity of the destination buffer
+ *  @param status for catching errors
+ *  @return size of the image
+ *  @see ucol_openBinary
+ *  @stable ICU 3.2
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_cloneBinary(const UCollator *coll,
+                 uint8_t *buffer, int32_t capacity,
+                 UErrorCode *status);
+
+/** Opens a collator from a collator binary image created using
+ *  ucol_cloneBinary. Binary image used in instantiation of the 
+ *  collator remains owned by the user and should stay around for 
+ *  the lifetime of the collator. The API also takes a base collator
+ *  which usualy should be UCA.
+ *  @param bin binary image owned by the user and required through the
+ *             lifetime of the collator
+ *  @param length size of the image. If negative, the API will try to
+ *                figure out the length of the image
+ *  @param base fallback collator, usually UCA. Base is required to be
+ *              present through the lifetime of the collator. Currently 
+ *              it cannot be NULL.
+ *  @param status for catching errors
+ *  @return newly created collator
+ *  @see ucol_cloneBinary
+ *  @stable ICU 3.2
+ */
+U_STABLE UCollator* U_EXPORT2
+ucol_openBinary(const uint8_t *bin, int32_t length, 
+                const UCollator *base, 
+                UErrorCode *status);
+
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif
+

Deleted: MacRuby/trunk/icu-1060/unicode/ucoleitr.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ucoleitr.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ucoleitr.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,315 +0,0 @@
-/*
-*******************************************************************************
-*   Copyright (C) 2001-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*******************************************************************************
-*
-* File ucoleitr.cpp
-*
-* Modification History:
-*
-* Date        Name        Description
-* 02/15/2001  synwee      Modified all methods to process its own function 
-*                         instead of calling the equivalent c++ api (coleitr.h)
-*******************************************************************************/
-
-#ifndef UCOLEITR_H
-#define UCOLEITR_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION
-
-/**  
- * This indicates an error has occured during processing or if no more CEs is 
- * to be returned.
- * @stable ICU 2.0
- */
-#define UCOL_NULLORDER        ((int32_t)0xFFFFFFFF)
-
-/**  
- * This indicates an error has occured during processing or there are no more CEs 
- * to be returned.
- *
- * @internal
- */
-#define UCOL_PROCESSED_NULLORDER        ((int64_t)U_INT64_MAX)
-
-#include "unicode/ucol.h"
-
-/** 
- * The UCollationElements struct.
- * For usage in C programs.
- * @stable ICU 2.0
- */
-typedef struct UCollationElements UCollationElements;
-
-/**
- * \file
- * \brief C API: UCollationElements
- *
- * The UCollationElements API is used as an iterator to walk through each 
- * character of an international string. Use the iterator to return the
- * ordering priority of the positioned character. The ordering priority of a 
- * character, which we refer to as a key, defines how a character is collated 
- * in the given collation object.
- * For example, consider the following in Spanish:
- * <pre>
- * .       "ca" -> the first key is key('c') and second key is key('a').
- * .       "cha" -> the first key is key('ch') and second key is key('a').
- * </pre>
- * And in German,
- * <pre>
- * .       "<ae ligature>b"-> the first key is key('a'), the second key is key('e'), and
- * .       the third key is key('b').
- * </pre>
- * <p>Example of the iterator usage: (without error checking)
- * <pre>
- * .  void CollationElementIterator_Example()
- * .  {
- * .      UChar *s;
- * .      t_int32 order, primaryOrder;
- * .      UCollationElements *c;
- * .      UCollatorOld *coll;
- * .      UErrorCode success = U_ZERO_ERROR;
- * .      s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) );
- * .      u_uastrcpy(s, "This is a test");
- * .      coll = ucol_open(NULL, &success);
- * .      c = ucol_openElements(coll, str, u_strlen(str), &status);
- * .      order = ucol_next(c, &success);
- * .      ucol_reset(c);
- * .      order = ucol_prev(c, &success);
- * .      free(s);
- * .      ucol_close(coll);
- * .      ucol_closeElements(c);
- * .  }
- * </pre>
- * <p>
- * ucol_next() returns the collation order of the next.
- * ucol_prev() returns the collation order of the previous character.
- * The Collation Element Iterator moves only in one direction between calls to
- * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used. 
- * Whenever ucol_prev is to be called after ucol_next() or vice versa, 
- * ucol_reset has to be called first to reset the status, shifting pointers to 
- * either the end or the start of the string. Hence at the next call of 
- * ucol_prev or ucol_next, the first or last collation order will be returned. 
- * If a change of direction is done without a ucol_reset, the result is 
- * undefined.
- * The result of a forward iterate (ucol_next) and reversed result of the  
- * backward iterate (ucol_prev) on the same string are equivalent, if 
- * collation orders with the value UCOL_IGNORABLE are ignored.
- * Character based on the comparison level of the collator.  A collation order 
- * consists of primary order, secondary order and tertiary order.  The data 
- * type of the collation order is <strong>t_int32</strong>. 
- *
- * @see UCollator
- */
-
-/**
- * Open the collation elements for a string.
- *
- * @param coll The collator containing the desired collation rules.
- * @param text The text to iterate over.
- * @param textLength The number of characters in text, or -1 if null-terminated
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return a struct containing collation element information
- * @stable ICU 2.0
- */
-U_STABLE UCollationElements* U_EXPORT2 
-ucol_openElements(const UCollator  *coll,
-                  const UChar      *text,
-                        int32_t    textLength,
-                        UErrorCode *status);
-
-/**
- * get a hash code for a key... Not very useful!
- * @param key    the given key.
- * @param length the size of the key array.
- * @return       the hash code.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucol_keyHashCode(const uint8_t* key, int32_t length);
-
-/**
- * Close a UCollationElements.
- * Once closed, a UCollationElements may no longer be used.
- * @param elems The UCollationElements to close.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucol_closeElements(UCollationElements *elems);
-
-/**
- * Reset the collation elements to their initial state.
- * This will move the 'cursor' to the beginning of the text.
- * Property settings for collation will be reset to the current status.
- * @param elems The UCollationElements to reset.
- * @see ucol_next
- * @see ucol_previous
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucol_reset(UCollationElements *elems);
-
-/**
- * Get the ordering priority of the next collation element in the text.
- * A single character may contain more than one collation element.
- * @param elems The UCollationElements containing the text.
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return The next collation elements ordering, otherwise returns NULLORDER 
- *         if an error has occured or if the end of string has been reached
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucol_next(UCollationElements *elems, UErrorCode *status);
-
-/**
- * Get the ordering priority of the previous collation element in the text.
- * A single character may contain more than one collation element.
- * Note that internally a stack is used to store buffered collation elements. 
- * It is very rare that the stack will overflow, however if such a case is 
- * encountered, the problem can be solved by increasing the size 
- * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
- * @param elems The UCollationElements containing the text.
- * @param status A pointer to an UErrorCode to receive any errors. Noteably 
- *               a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
- *               buffer has been exhausted.
- * @return The previous collation elements ordering, otherwise returns 
- *         NULLORDER if an error has occured or if the start of string has 
- *         been reached.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucol_previous(UCollationElements *elems, UErrorCode *status);
-
-/**
- * Get the processed ordering priority of the next collation element in the text.
- * A single character may contain more than one collation element.
- *
- * @param elems The UCollationElements containing the text.
- * @param ixLow a pointer to an int32_t to receive the iterator index before fetching the CE.
- * @param ixHigh a pointer to an int32_t to receive the iterator index after fetching the CE.
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return The next collation elements ordering, otherwise returns UCOL_PROCESSED_NULLORDER 
- *         if an error has occured or if the end of string has been reached
- *
- * @internal
- */
-U_INTERNAL int64_t U_EXPORT2
-ucol_nextProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
-
-/**
- * Get the processed ordering priority of the previous collation element in the text.
- * A single character may contain more than one collation element.
- * Note that internally a stack is used to store buffered collation elements. 
- * It is very rare that the stack will overflow, however if such a case is 
- * encountered, the problem can be solved by increasing the size 
- * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
- *
- * @param elems The UCollationElements containing the text.
- * @param ixLow A pointer to an int32_t to receive the iterator index after fetching the CE
- * @param ixHigh A pointer to an int32_t to receiver the iterator index before fetching the CE
- * @param status A pointer to an UErrorCode to receive any errors. Noteably 
- *               a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
- *               buffer has been exhausted.
- * @return The previous collation elements ordering, otherwise returns 
- *         UCOL_PROCESSED_NULLORDER if an error has occured or if the start of
- *         string has been reached.
- *
- * @internal
- */
-U_INTERNAL int64_t U_EXPORT2
-ucol_previousProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
-
-/**
- * Get the maximum length of any expansion sequences that end with the 
- * specified comparison order.
- * This is useful for .... ?
- * @param elems The UCollationElements containing the text.
- * @param order A collation order returned by previous or next.
- * @return maximum size of the expansion sequences ending with the collation 
- *         element or 1 if collation element does not occur at the end of any 
- *         expansion sequence
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucol_getMaxExpansion(const UCollationElements *elems, int32_t order);
-
-/**
- * Set the text containing the collation elements.
- * Property settings for collation will remain the same.
- * In order to reset the iterator to the current collation property settings,
- * the API reset() has to be called.
- * @param elems The UCollationElements to set.
- * @param text The source text containing the collation elements.
- * @param textLength The length of text, or -1 if null-terminated.
- * @param status A pointer to an UErrorCode to receive any errors.
- * @see ucol_getText
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucol_setText(      UCollationElements *elems, 
-             const UChar              *text,
-                   int32_t            textLength,
-                   UErrorCode         *status);
-
-/**
- * Get the offset of the current source character.
- * This is an offset into the text of the character containing the current
- * collation elements.
- * @param elems The UCollationElements to query.
- * @return The offset of the current source character.
- * @see ucol_setOffset
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ucol_getOffset(const UCollationElements *elems);
-
-/**
- * Set the offset of the current source character.
- * This is an offset into the text of the character to be processed.
- * Property settings for collation will remain the same.
- * In order to reset the iterator to the current collation property settings,
- * the API reset() has to be called.
- * @param elems The UCollationElements to set.
- * @param offset The desired character offset.
- * @param status A pointer to an UErrorCode to receive any errors.
- * @see ucol_getOffset
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ucol_setOffset(UCollationElements *elems,
-               int32_t        offset,
-               UErrorCode         *status);
-
-/**
-* Get the primary order of a collation order.
-* @param order the collation order
-* @return the primary order of a collation order.
-* @stable ICU 2.6
-*/
-U_STABLE int32_t U_EXPORT2
-ucol_primaryOrder (int32_t order); 
-
-/**
-* Get the secondary order of a collation order.
-* @param order the collation order
-* @return the secondary order of a collation order.
-* @stable ICU 2.6
-*/
-U_STABLE int32_t U_EXPORT2
-ucol_secondaryOrder (int32_t order); 
-
-/**
-* Get the tertiary order of a collation order.
-* @param order the collation order
-* @return the tertiary order of a collation order.
-* @stable ICU 2.6
-*/
-U_STABLE int32_t U_EXPORT2
-ucol_tertiaryOrder (int32_t order); 
-
-#endif /* #if !UCONFIG_NO_COLLATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/ucoleitr.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ucoleitr.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ucoleitr.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ucoleitr.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,315 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2001-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*
+* File ucoleitr.cpp
+*
+* Modification History:
+*
+* Date        Name        Description
+* 02/15/2001  synwee      Modified all methods to process its own function 
+*                         instead of calling the equivalent c++ api (coleitr.h)
+*******************************************************************************/
+
+#ifndef UCOLEITR_H
+#define UCOLEITR_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+/**  
+ * This indicates an error has occured during processing or if no more CEs is 
+ * to be returned.
+ * @stable ICU 2.0
+ */
+#define UCOL_NULLORDER        ((int32_t)0xFFFFFFFF)
+
+/**  
+ * This indicates an error has occured during processing or there are no more CEs 
+ * to be returned.
+ *
+ * @internal
+ */
+#define UCOL_PROCESSED_NULLORDER        ((int64_t)U_INT64_MAX)
+
+#include "unicode/ucol.h"
+
+/** 
+ * The UCollationElements struct.
+ * For usage in C programs.
+ * @stable ICU 2.0
+ */
+typedef struct UCollationElements UCollationElements;
+
+/**
+ * \file
+ * \brief C API: UCollationElements
+ *
+ * The UCollationElements API is used as an iterator to walk through each 
+ * character of an international string. Use the iterator to return the
+ * ordering priority of the positioned character. The ordering priority of a 
+ * character, which we refer to as a key, defines how a character is collated 
+ * in the given collation object.
+ * For example, consider the following in Spanish:
+ * <pre>
+ * .       "ca" -> the first key is key('c') and second key is key('a').
+ * .       "cha" -> the first key is key('ch') and second key is key('a').
+ * </pre>
+ * And in German,
+ * <pre>
+ * .       "<ae ligature>b"-> the first key is key('a'), the second key is key('e'), and
+ * .       the third key is key('b').
+ * </pre>
+ * <p>Example of the iterator usage: (without error checking)
+ * <pre>
+ * .  void CollationElementIterator_Example()
+ * .  {
+ * .      UChar *s;
+ * .      t_int32 order, primaryOrder;
+ * .      UCollationElements *c;
+ * .      UCollatorOld *coll;
+ * .      UErrorCode success = U_ZERO_ERROR;
+ * .      s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) );
+ * .      u_uastrcpy(s, "This is a test");
+ * .      coll = ucol_open(NULL, &success);
+ * .      c = ucol_openElements(coll, str, u_strlen(str), &status);
+ * .      order = ucol_next(c, &success);
+ * .      ucol_reset(c);
+ * .      order = ucol_prev(c, &success);
+ * .      free(s);
+ * .      ucol_close(coll);
+ * .      ucol_closeElements(c);
+ * .  }
+ * </pre>
+ * <p>
+ * ucol_next() returns the collation order of the next.
+ * ucol_prev() returns the collation order of the previous character.
+ * The Collation Element Iterator moves only in one direction between calls to
+ * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used. 
+ * Whenever ucol_prev is to be called after ucol_next() or vice versa, 
+ * ucol_reset has to be called first to reset the status, shifting pointers to 
+ * either the end or the start of the string. Hence at the next call of 
+ * ucol_prev or ucol_next, the first or last collation order will be returned. 
+ * If a change of direction is done without a ucol_reset, the result is 
+ * undefined.
+ * The result of a forward iterate (ucol_next) and reversed result of the  
+ * backward iterate (ucol_prev) on the same string are equivalent, if 
+ * collation orders with the value UCOL_IGNORABLE are ignored.
+ * Character based on the comparison level of the collator.  A collation order 
+ * consists of primary order, secondary order and tertiary order.  The data 
+ * type of the collation order is <strong>t_int32</strong>. 
+ *
+ * @see UCollator
+ */
+
+/**
+ * Open the collation elements for a string.
+ *
+ * @param coll The collator containing the desired collation rules.
+ * @param text The text to iterate over.
+ * @param textLength The number of characters in text, or -1 if null-terminated
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return a struct containing collation element information
+ * @stable ICU 2.0
+ */
+U_STABLE UCollationElements* U_EXPORT2 
+ucol_openElements(const UCollator  *coll,
+                  const UChar      *text,
+                        int32_t    textLength,
+                        UErrorCode *status);
+
+/**
+ * get a hash code for a key... Not very useful!
+ * @param key    the given key.
+ * @param length the size of the key array.
+ * @return       the hash code.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucol_keyHashCode(const uint8_t* key, int32_t length);
+
+/**
+ * Close a UCollationElements.
+ * Once closed, a UCollationElements may no longer be used.
+ * @param elems The UCollationElements to close.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucol_closeElements(UCollationElements *elems);
+
+/**
+ * Reset the collation elements to their initial state.
+ * This will move the 'cursor' to the beginning of the text.
+ * Property settings for collation will be reset to the current status.
+ * @param elems The UCollationElements to reset.
+ * @see ucol_next
+ * @see ucol_previous
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucol_reset(UCollationElements *elems);
+
+/**
+ * Get the ordering priority of the next collation element in the text.
+ * A single character may contain more than one collation element.
+ * @param elems The UCollationElements containing the text.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The next collation elements ordering, otherwise returns NULLORDER 
+ *         if an error has occured or if the end of string has been reached
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucol_next(UCollationElements *elems, UErrorCode *status);
+
+/**
+ * Get the ordering priority of the previous collation element in the text.
+ * A single character may contain more than one collation element.
+ * Note that internally a stack is used to store buffered collation elements. 
+ * It is very rare that the stack will overflow, however if such a case is 
+ * encountered, the problem can be solved by increasing the size 
+ * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
+ * @param elems The UCollationElements containing the text.
+ * @param status A pointer to an UErrorCode to receive any errors. Noteably 
+ *               a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
+ *               buffer has been exhausted.
+ * @return The previous collation elements ordering, otherwise returns 
+ *         NULLORDER if an error has occured or if the start of string has 
+ *         been reached.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucol_previous(UCollationElements *elems, UErrorCode *status);
+
+/**
+ * Get the processed ordering priority of the next collation element in the text.
+ * A single character may contain more than one collation element.
+ *
+ * @param elems The UCollationElements containing the text.
+ * @param ixLow a pointer to an int32_t to receive the iterator index before fetching the CE.
+ * @param ixHigh a pointer to an int32_t to receive the iterator index after fetching the CE.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The next collation elements ordering, otherwise returns UCOL_PROCESSED_NULLORDER 
+ *         if an error has occured or if the end of string has been reached
+ *
+ * @internal
+ */
+U_INTERNAL int64_t U_EXPORT2
+ucol_nextProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
+
+/**
+ * Get the processed ordering priority of the previous collation element in the text.
+ * A single character may contain more than one collation element.
+ * Note that internally a stack is used to store buffered collation elements. 
+ * It is very rare that the stack will overflow, however if such a case is 
+ * encountered, the problem can be solved by increasing the size 
+ * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
+ *
+ * @param elems The UCollationElements containing the text.
+ * @param ixLow A pointer to an int32_t to receive the iterator index after fetching the CE
+ * @param ixHigh A pointer to an int32_t to receiver the iterator index before fetching the CE
+ * @param status A pointer to an UErrorCode to receive any errors. Noteably 
+ *               a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
+ *               buffer has been exhausted.
+ * @return The previous collation elements ordering, otherwise returns 
+ *         UCOL_PROCESSED_NULLORDER if an error has occured or if the start of
+ *         string has been reached.
+ *
+ * @internal
+ */
+U_INTERNAL int64_t U_EXPORT2
+ucol_previousProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
+
+/**
+ * Get the maximum length of any expansion sequences that end with the 
+ * specified comparison order.
+ * This is useful for .... ?
+ * @param elems The UCollationElements containing the text.
+ * @param order A collation order returned by previous or next.
+ * @return maximum size of the expansion sequences ending with the collation 
+ *         element or 1 if collation element does not occur at the end of any 
+ *         expansion sequence
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucol_getMaxExpansion(const UCollationElements *elems, int32_t order);
+
+/**
+ * Set the text containing the collation elements.
+ * Property settings for collation will remain the same.
+ * In order to reset the iterator to the current collation property settings,
+ * the API reset() has to be called.
+ * @param elems The UCollationElements to set.
+ * @param text The source text containing the collation elements.
+ * @param textLength The length of text, or -1 if null-terminated.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @see ucol_getText
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucol_setText(      UCollationElements *elems, 
+             const UChar              *text,
+                   int32_t            textLength,
+                   UErrorCode         *status);
+
+/**
+ * Get the offset of the current source character.
+ * This is an offset into the text of the character containing the current
+ * collation elements.
+ * @param elems The UCollationElements to query.
+ * @return The offset of the current source character.
+ * @see ucol_setOffset
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ucol_getOffset(const UCollationElements *elems);
+
+/**
+ * Set the offset of the current source character.
+ * This is an offset into the text of the character to be processed.
+ * Property settings for collation will remain the same.
+ * In order to reset the iterator to the current collation property settings,
+ * the API reset() has to be called.
+ * @param elems The UCollationElements to set.
+ * @param offset The desired character offset.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @see ucol_getOffset
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ucol_setOffset(UCollationElements *elems,
+               int32_t        offset,
+               UErrorCode         *status);
+
+/**
+* Get the primary order of a collation order.
+* @param order the collation order
+* @return the primary order of a collation order.
+* @stable ICU 2.6
+*/
+U_STABLE int32_t U_EXPORT2
+ucol_primaryOrder (int32_t order); 
+
+/**
+* Get the secondary order of a collation order.
+* @param order the collation order
+* @return the secondary order of a collation order.
+* @stable ICU 2.6
+*/
+U_STABLE int32_t U_EXPORT2
+ucol_secondaryOrder (int32_t order); 
+
+/**
+* Get the tertiary order of a collation order.
+* @param order the collation order
+* @return the tertiary order of a collation order.
+* @stable ICU 2.6
+*/
+U_STABLE int32_t U_EXPORT2
+ucol_tertiaryOrder (int32_t order); 
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/uconfig.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uconfig.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uconfig.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,228 +0,0 @@
-/*  
-**********************************************************************
-*   Copyright (C) 2002-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*   file name:  uconfig.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2002sep19
-*   created by: Markus W. Scherer
-*/
-
-#ifndef __UCONFIG_H__
-#define __UCONFIG_H__
-
-
-/*!
- * \file
- * \brief Switches for excluding parts of ICU library code modules.
- *
- * Allows to build partial, smaller libraries for special purposes.
- * By default, all modules are built.
- * The switches are fairly coarse, controlling large modules.
- * Basic services cannot be turned off.
- *
- * Building with any of these options does not guarantee that the
- * ICU build process will completely work. It is recommended that
- * the ICU libraries and data be built using the normal build.
- * At that time you should remove the data used by those services.
- * After building the ICU data library, you should rebuild the ICU
- * libraries with these switches customized to your needs.
- *
- * @stable ICU 2.4
- */
-
-/**
- * \def UCONFIG_USE_LOCAL
- * If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
- * prior to determining default settings for uconfig variables.
- * 
- * @internal ICU 4.0
- * 
- */
-#if defined(UCONFIG_USE_LOCAL)
-#include "uconfig_local.h"
-#endif
-
-/**
- * \def UCONFIG_ONLY_COLLATION
- * This switch turns off modules that are not needed for collation.
- *
- * It does not turn off legacy conversion because that is necessary
- * for ICU to work on EBCDIC platforms (for the default converter).
- * If you want "only collation" and do not build for EBCDIC,
- * then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well.
- *
- * @stable ICU 2.4
- */
-#ifndef UCONFIG_ONLY_COLLATION
-#   define UCONFIG_ONLY_COLLATION 0
-#endif
-
-#if UCONFIG_ONLY_COLLATION
-    /* common library */
-#   define UCONFIG_NO_BREAK_ITERATION 1
-#   define UCONFIG_NO_IDNA 1
-
-    /* i18n library */
-#   if UCONFIG_NO_COLLATION
-#       error Contradictory collation switches in uconfig.h.
-#   endif
-#   define UCONFIG_NO_FORMATTING 1
-#   define UCONFIG_NO_TRANSLITERATION 1
-#   define UCONFIG_NO_REGULAR_EXPRESSIONS 1
-#endif
-
-/* common library switches -------------------------------------------------- */
-
-/**
- * \def UCONFIG_NO_FILE_IO
- * This switch turns off all file access in the common library
- * where file access is only used for data loading.
- * ICU data must then be provided in the form of a data DLL (or with an
- * equivalent way to link to the data residing in an executable,
- * as in building a combined library with both the common library's code and
- * the data), or via udata_setCommonData().
- * Application data must be provided via udata_setAppData() or by using
- * "open" functions that take pointers to data, for example ucol_openBinary().
- *
- * File access is not used at all in the i18n library.
- *
- * File access cannot be turned off for the icuio library or for the ICU
- * test suites and ICU tools.
- *
- * @stable ICU 3.6
- */
-#ifndef UCONFIG_NO_FILE_IO
-#   define UCONFIG_NO_FILE_IO 0
-#endif
-
-/**
- * \def UCONFIG_NO_CONVERSION
- * ICU will not completely build with this switch turned on.
- * This switch turns off all converters.
- *
- * @stable ICU 3.2
- */
-#ifndef UCONFIG_NO_CONVERSION
-#   define UCONFIG_NO_CONVERSION 0
-#endif
-
-#if UCONFIG_NO_CONVERSION
-#   define UCONFIG_NO_LEGACY_CONVERSION 1
-#endif
-
-/**
- * \def UCONFIG_NO_LEGACY_CONVERSION
- * This switch turns off all converters except for
- * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
- * - US-ASCII
- * - ISO-8859-1
- *
- * Turning off legacy conversion is not possible on EBCDIC platforms
- * because they need ibm-37 or ibm-1047 default converters.
- *
- * @stable ICU 2.4
- */
-#ifndef UCONFIG_NO_LEGACY_CONVERSION
-#   define UCONFIG_NO_LEGACY_CONVERSION 0
-#endif
-
-/**
- * \def UCONFIG_NO_NORMALIZATION
- * This switch turns off normalization.
- * It implies turning off several other services as well, for example
- * collation and IDNA.
- *
- * @stable ICU 2.6
- */
-#ifndef UCONFIG_NO_NORMALIZATION
-#   define UCONFIG_NO_NORMALIZATION 0
-#elif UCONFIG_NO_NORMALIZATION
-    /* common library */
-#   define UCONFIG_NO_IDNA 1
-
-    /* i18n library */
-#   if UCONFIG_ONLY_COLLATION
-#       error Contradictory collation switches in uconfig.h.
-#   endif
-#   define UCONFIG_NO_COLLATION 1
-#   define UCONFIG_NO_TRANSLITERATION 1
-#endif
-
-/**
- * \def UCONFIG_NO_BREAK_ITERATION
- * This switch turns off break iteration.
- *
- * @stable ICU 2.4
- */
-#ifndef UCONFIG_NO_BREAK_ITERATION
-#   define UCONFIG_NO_BREAK_ITERATION 0
-#endif
-
-/**
- * \def UCONFIG_NO_IDNA
- * This switch turns off IDNA.
- *
- * @stable ICU 2.6
- */
-#ifndef UCONFIG_NO_IDNA
-#   define UCONFIG_NO_IDNA 0
-#endif
-
-/* i18n library switches ---------------------------------------------------- */
-
-/**
- * \def UCONFIG_NO_COLLATION
- * This switch turns off collation and collation-based string search.
- *
- * @stable ICU 2.4
- */
-#ifndef UCONFIG_NO_COLLATION
-#   define UCONFIG_NO_COLLATION 0
-#endif
-
-/**
- * \def UCONFIG_NO_FORMATTING
- * This switch turns off formatting and calendar/timezone services.
- *
- * @stable ICU 2.4
- */
-#ifndef UCONFIG_NO_FORMATTING
-#   define UCONFIG_NO_FORMATTING 0
-#endif
-
-/**
- * \def UCONFIG_NO_TRANSLITERATION
- * This switch turns off transliteration.
- *
- * @stable ICU 2.4
- */
-#ifndef UCONFIG_NO_TRANSLITERATION
-#   define UCONFIG_NO_TRANSLITERATION 0
-#endif
-
-/**
- * \def UCONFIG_NO_REGULAR_EXPRESSIONS
- * This switch turns off regular expressions.
- *
- * @stable ICU 2.4
- */
-#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
-#   define UCONFIG_NO_REGULAR_EXPRESSIONS 0
-#endif
-
-/**
- * \def UCONFIG_NO_SERVICE
- * This switch turns off service registration.
- *
- * @stable ICU 3.2
- */
-#ifndef UCONFIG_NO_SERVICE
-#   define UCONFIG_NO_SERVICE 1
-#endif
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/uconfig.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uconfig.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uconfig.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uconfig.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,228 @@
+/*  
+**********************************************************************
+*   Copyright (C) 2002-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  uconfig.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002sep19
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UCONFIG_H__
+#define __UCONFIG_H__
+
+
+/*!
+ * \file
+ * \brief Switches for excluding parts of ICU library code modules.
+ *
+ * Allows to build partial, smaller libraries for special purposes.
+ * By default, all modules are built.
+ * The switches are fairly coarse, controlling large modules.
+ * Basic services cannot be turned off.
+ *
+ * Building with any of these options does not guarantee that the
+ * ICU build process will completely work. It is recommended that
+ * the ICU libraries and data be built using the normal build.
+ * At that time you should remove the data used by those services.
+ * After building the ICU data library, you should rebuild the ICU
+ * libraries with these switches customized to your needs.
+ *
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def UCONFIG_USE_LOCAL
+ * If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
+ * prior to determining default settings for uconfig variables.
+ * 
+ * @internal ICU 4.0
+ * 
+ */
+#if defined(UCONFIG_USE_LOCAL)
+#include "uconfig_local.h"
+#endif
+
+/**
+ * \def UCONFIG_ONLY_COLLATION
+ * This switch turns off modules that are not needed for collation.
+ *
+ * It does not turn off legacy conversion because that is necessary
+ * for ICU to work on EBCDIC platforms (for the default converter).
+ * If you want "only collation" and do not build for EBCDIC,
+ * then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_ONLY_COLLATION
+#   define UCONFIG_ONLY_COLLATION 0
+#endif
+
+#if UCONFIG_ONLY_COLLATION
+    /* common library */
+#   define UCONFIG_NO_BREAK_ITERATION 1
+#   define UCONFIG_NO_IDNA 1
+
+    /* i18n library */
+#   if UCONFIG_NO_COLLATION
+#       error Contradictory collation switches in uconfig.h.
+#   endif
+#   define UCONFIG_NO_FORMATTING 1
+#   define UCONFIG_NO_TRANSLITERATION 1
+#   define UCONFIG_NO_REGULAR_EXPRESSIONS 1
+#endif
+
+/* common library switches -------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_FILE_IO
+ * This switch turns off all file access in the common library
+ * where file access is only used for data loading.
+ * ICU data must then be provided in the form of a data DLL (or with an
+ * equivalent way to link to the data residing in an executable,
+ * as in building a combined library with both the common library's code and
+ * the data), or via udata_setCommonData().
+ * Application data must be provided via udata_setAppData() or by using
+ * "open" functions that take pointers to data, for example ucol_openBinary().
+ *
+ * File access is not used at all in the i18n library.
+ *
+ * File access cannot be turned off for the icuio library or for the ICU
+ * test suites and ICU tools.
+ *
+ * @stable ICU 3.6
+ */
+#ifndef UCONFIG_NO_FILE_IO
+#   define UCONFIG_NO_FILE_IO 0
+#endif
+
+/**
+ * \def UCONFIG_NO_CONVERSION
+ * ICU will not completely build with this switch turned on.
+ * This switch turns off all converters.
+ *
+ * @stable ICU 3.2
+ */
+#ifndef UCONFIG_NO_CONVERSION
+#   define UCONFIG_NO_CONVERSION 0
+#endif
+
+#if UCONFIG_NO_CONVERSION
+#   define UCONFIG_NO_LEGACY_CONVERSION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_LEGACY_CONVERSION
+ * This switch turns off all converters except for
+ * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
+ * - US-ASCII
+ * - ISO-8859-1
+ *
+ * Turning off legacy conversion is not possible on EBCDIC platforms
+ * because they need ibm-37 or ibm-1047 default converters.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_LEGACY_CONVERSION
+#   define UCONFIG_NO_LEGACY_CONVERSION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_NORMALIZATION
+ * This switch turns off normalization.
+ * It implies turning off several other services as well, for example
+ * collation and IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_NORMALIZATION
+#   define UCONFIG_NO_NORMALIZATION 0
+#elif UCONFIG_NO_NORMALIZATION
+    /* common library */
+#   define UCONFIG_NO_IDNA 1
+
+    /* i18n library */
+#   if UCONFIG_ONLY_COLLATION
+#       error Contradictory collation switches in uconfig.h.
+#   endif
+#   define UCONFIG_NO_COLLATION 1
+#   define UCONFIG_NO_TRANSLITERATION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_BREAK_ITERATION
+ * This switch turns off break iteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_BREAK_ITERATION
+#   define UCONFIG_NO_BREAK_ITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_IDNA
+ * This switch turns off IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_IDNA
+#   define UCONFIG_NO_IDNA 0
+#endif
+
+/* i18n library switches ---------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_COLLATION
+ * This switch turns off collation and collation-based string search.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_COLLATION
+#   define UCONFIG_NO_COLLATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_FORMATTING
+ * This switch turns off formatting and calendar/timezone services.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_FORMATTING
+#   define UCONFIG_NO_FORMATTING 0
+#endif
+
+/**
+ * \def UCONFIG_NO_TRANSLITERATION
+ * This switch turns off transliteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_TRANSLITERATION
+#   define UCONFIG_NO_TRANSLITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_REGULAR_EXPRESSIONS
+ * This switch turns off regular expressions.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
+#   define UCONFIG_NO_REGULAR_EXPRESSIONS 0
+#endif
+
+/**
+ * \def UCONFIG_NO_SERVICE
+ * This switch turns off service registration.
+ *
+ * @stable ICU 3.2
+ */
+#ifndef UCONFIG_NO_SERVICE
+#   define UCONFIG_NO_SERVICE 1
+#endif
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/ucsdet.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ucsdet.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ucsdet.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,349 +0,0 @@
-/*
- **********************************************************************
- *   Copyright (C) 2005-2007, International Business Machines
- *   Corporation and others.  All Rights Reserved.
- **********************************************************************
- *   file name:  ucsdet.h
- *   encoding:   US-ASCII
- *   indentation:4
- *
- *   created on: 2005Aug04
- *   created by: Andy Heninger
- *
- *   ICU Character Set Detection, API for C
- *
- *   Draft version 18 Oct 2005
- *
- */
-
-#ifndef __UCSDET_H
-#define __UCSDET_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-#include "unicode/uenum.h"
-
-/**
- * \file 
- * \brief C API: Charset Detection API
- *
- * This API provides a facility for detecting the
- * charset or encoding of character data in an unknown text format.
- * The input data can be from an array of bytes.
- * <p>
- * Character set detection is at best an imprecise operation.  The detection
- * process will attempt to identify the charset that best matches the characteristics
- * of the byte data, but the process is partly statistical in nature, and
- * the results can not be guaranteed to always be correct.
- * <p>
- * For best accuracy in charset detection, the input data should be primarily
- * in a single language, and a minimum of a few hundred bytes worth of plain text
- * in the language are needed.  The detection process will attempt to
- * ignore html or xml style markup that could otherwise obscure the content.
- */
- 
-
-struct UCharsetDetector;
-/**
-  * Structure representing a charset detector
-  * @stable ICU 3.6
-  */
-typedef struct UCharsetDetector UCharsetDetector;
-
-struct UCharsetMatch;
-/**
-  *  Opaque structure representing a match that was identified
-  *  from a charset detection operation.
-  *  @stable ICU 3.6
-  */
-typedef struct UCharsetMatch UCharsetMatch;
-
-/**
-  *  Open a charset detector.
-  *
-  *  @param status Any error conditions occurring during the open
-  *                operation are reported back in this variable.
-  *  @return the newly opened charset detector.
-  *  @stable ICU 3.6
-  */
-U_STABLE UCharsetDetector * U_EXPORT2
-ucsdet_open(UErrorCode   *status);
-
-/**
-  * Close a charset detector.  All storage and any other resources
-  *   owned by this charset detector will be released.  Failure to
-  *   close a charset detector when finished with it can result in
-  *   memory leaks in the application.
-  *
-  *  @param ucsd  The charset detector to be closed.
-  *  @stable ICU 3.6
-  */
-U_STABLE void U_EXPORT2
-ucsdet_close(UCharsetDetector *ucsd);
-
-/**
-  * Set the input byte data whose charset is to detected.
-  *
-  * Ownership of the input  text byte array remains with the caller.
-  * The input string must not be altered or deleted until the charset
-  * detector is either closed or reset to refer to different input text.
-  *
-  * @param ucsd   the charset detector to be used.
-  * @param textIn the input text of unknown encoding.   .
-  * @param len    the length of the input text, or -1 if the text
-  *               is NUL terminated.
-  * @param status any error conditions are reported back in this variable.
-  *
-  * @stable ICU 3.6
-  */
-U_STABLE void U_EXPORT2
-ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status);
-
-
-/** Set the declared encoding for charset detection.
- *  The declared encoding of an input text is an encoding obtained
- *  by the user from an http header or xml declaration or similar source that
- *  can be provided as an additional hint to the charset detector.
- *
- *  How and whether the declared encoding will be used during the
- *  detection process is TBD.
- *
- * @param ucsd      the charset detector to be used.
- * @param encoding  an encoding for the current data obtained from
- *                  a header or declaration or other source outside
- *                  of the byte data itself.
- * @param length    the length of the encoding name, or -1 if the name string
- *                  is NUL terminated.
- * @param status    any error conditions are reported back in this variable.
- *
- * @stable ICU 3.6
- */
-U_STABLE void U_EXPORT2
-ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status);
-
-
-/**
- * Return the charset that best matches the supplied input data.
- * 
- * Note though, that because the detection 
- * only looks at the start of the input data,
- * there is a possibility that the returned charset will fail to handle
- * the full set of input data.
- * <p>
- * The returned UCharsetMatch object is owned by the UCharsetDetector.
- * It will remain valid until the detector input is reset, or until
- * the detector is closed.
- * <p>
- * The function will fail if
- *  <ul>
- *    <li>no charset appears to match the data.</li>
- *    <li>no input text has been provided</li>
- *  </ul>
- *
- * @param ucsd      the charset detector to be used.
- * @param status    any error conditions are reported back in this variable.
- * @return          a UCharsetMatch  representing the best matching charset,
- *                  or NULL if no charset matches the byte data.
- *
- * @stable ICU 3.6
- */
-U_STABLE const UCharsetMatch * U_EXPORT2
-ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status);
-    
-
-/**
- *  Find all charset matches that appear to be consistent with the input,
- *  returning an array of results.  The results are ordered with the
- *  best quality match first.
- *
- *  Because the detection only looks at a limited amount of the
- *  input byte data, some of the returned charsets may fail to handle
- *  the all of input data.
- *  <p>
- *  The returned UCharsetMatch objects are owned by the UCharsetDetector.
- *  They will remain valid until the detector is closed or modified
- *  
- * <p>
- * Return an error if 
- *  <ul>
- *    <li>no charsets appear to match the input data.</li>
- *    <li>no input text has been provided</li>
- *  </ul>
- * 
- * @param ucsd          the charset detector to be used.
- * @param matchesFound  pointer to a variable that will be set to the
- *                      number of charsets identified that are consistent with
- *                      the input data.  Output only.
- * @param status        any error conditions are reported back in this variable.
- * @return              A pointer to an array of pointers to UCharSetMatch objects.
- *                      This array, and the UCharSetMatch instances to which it refers,
- *                      are owned by the UCharsetDetector, and will remain valid until
- *                      the detector is closed or modified.
- * @stable ICU 3.6
- */
-U_STABLE const UCharsetMatch ** U_EXPORT2
-ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status);
-
-
-
-/**
- *  Get the name of the charset represented by a UCharsetMatch.
- *
- *  The storage for the returned name string is owned by the
- *  UCharsetMatch, and will remain valid while the UCharsetMatch
- *  is valid.
- *
- *  The name returned is suitable for use with the ICU conversion APIs.
- *
- *  @param ucsm    The charset match object.
- *  @param status  Any error conditions are reported back in this variable.
- *  @return        The name of the matching charset.
- *
- *  @stable ICU 3.6
- */
-U_STABLE const char * U_EXPORT2
-ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status);
-
-/**
- *  Get a confidence number for the quality of the match of the byte
- *  data with the charset.  Confidence numbers range from zero to 100,
- *  with 100 representing complete confidence and zero representing
- *  no confidence.
- *
- *  The confidence values are somewhat arbitrary.  They define an
- *  an ordering within the results for any single detection operation
- *  but are not generally comparable between the results for different input.
- *
- *  A confidence value of ten does have a general meaning - it is used
- *  for charsets that can represent the input data, but for which there
- *  is no other indication that suggests that the charset is the correct one.
- *  Pure 7 bit ASCII data, for example, is compatible with a
- *  great many charsets, most of which will appear as possible matches
- *  with a confidence of 10.
- *
- *  @param ucsm    The charset match object.
- *  @param status  Any error conditions are reported back in this variable.
- *  @return        A confidence number for the charset match.
- *
- *  @stable ICU 3.6
- */
-U_STABLE int32_t U_EXPORT2
-ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status);
-
-/**
- *  Get the RFC 3066 code for the language of the input data.
- *
- *  The Charset Detection service is intended primarily for detecting
- *  charsets, not language.  For some, but not all, charsets, a language is
- *  identified as a byproduct of the detection process, and that is what
- *  is returned by this function.
- *
- *  CAUTION:
- *    1.  Language information is not available for input data encoded in
- *        all charsets. In particular, no language is identified
- *        for UTF-8 input data.
- *
- *    2.  Closely related languages may sometimes be confused.
- *
- *  If more accurate language detection is required, a linguistic
- *  analysis package should be used.
- *
- *  The storage for the returned name string is owned by the
- *  UCharsetMatch, and will remain valid while the UCharsetMatch
- *  is valid.
- *
- *  @param ucsm    The charset match object.
- *  @param status  Any error conditions are reported back in this variable.
- *  @return        The RFC 3066 code for the language of the input data, or
- *                 an empty string if the language could not be determined.
- *
- *  @stable ICU 3.6
- */
-U_STABLE const char * U_EXPORT2
-ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status);
-
-
-/**
-  *  Get the entire input text as a UChar string, placing it into
-  *  a caller-supplied buffer.  A terminating
-  *  NUL character will be appended to the buffer if space is available.
-  *
-  *  The number of UChars in the output string, not including the terminating
-  *  NUL, is returned. 
-  *
-  *  If the supplied buffer is smaller than required to hold the output,
-  *  the contents of the buffer are undefined.  The full output string length
-  *  (in UChars) is returned as always, and can be used to allocate a buffer
-  *  of the correct size.
-  *
-  *
-  * @param ucsm    The charset match object.
-  * @param buf     A UChar buffer to be filled with the converted text data.
-  * @param cap     The capacity of the buffer in UChars.
-  * @param status  Any error conditions are reported back in this variable.
-  * @return        The number of UChars in the output string.
-  *
-  * @stable ICU 3.6
-  */
-U_STABLE  int32_t U_EXPORT2
-ucsdet_getUChars(const UCharsetMatch *ucsm,
-                 UChar *buf, int32_t cap, UErrorCode *status);
-
-
-
-/**
-  *  Get an iterator over the set of all detectable charsets - 
-  *  over the charsets that are known to the charset detection
-  *  service.
-  *
-  *  The returned UEnumeration provides access to the names of
-  *  the charsets.
-  *
-  *  The state of the Charset detector that is passed in does not
-  *  affect the result of this function, but requiring a valid, open
-  *  charset detector as a parameter insures that the charset detection
-  *  service has been safely initialized and that the required detection
-  *  data is available.
-  *
-  *  @param ucsd a Charset detector.
-  *  @param status  Any error conditions are reported back in this variable.
-  *  @return an iterator providing access to the detectable charset names.
-  *  @stable ICU 3.6
-  */
-U_STABLE  UEnumeration * U_EXPORT2
-ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status);
-
-
-/**
-  *  Test whether input filtering is enabled for this charset detector.
-  *  Input filtering removes text that appears to be HTML or xml
-  *  markup from the input before applying the code page detection
-  *  heuristics.
-  *
-  *  @param ucsd  The charset detector to check.
-  *  @return TRUE if filtering is enabled.
-  *  @stable ICU 3.6
-  */
-U_STABLE  UBool U_EXPORT2
-ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
-
-
-/**
- * Enable filtering of input text. If filtering is enabled,
- * text within angle brackets ("<" and ">") will be removed
- * before detection, which will remove most HTML or xml markup.
- *
- * @param ucsd   the charset detector to be modified.
- * @param filter <code>true</code> to enable input text filtering.
- * @return The previous setting.
- *
- * @stable ICU 3.6
- */
-U_STABLE  UBool U_EXPORT2
-ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter);
-
-#endif
-#endif   /* __UCSDET_H */
-
-

Copied: MacRuby/trunk/icu-1060/unicode/ucsdet.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ucsdet.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ucsdet.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ucsdet.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,349 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 2005-2007, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ *   file name:  ucsdet.h
+ *   encoding:   US-ASCII
+ *   indentation:4
+ *
+ *   created on: 2005Aug04
+ *   created by: Andy Heninger
+ *
+ *   ICU Character Set Detection, API for C
+ *
+ *   Draft version 18 Oct 2005
+ *
+ */
+
+#ifndef __UCSDET_H
+#define __UCSDET_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+#include "unicode/uenum.h"
+
+/**
+ * \file 
+ * \brief C API: Charset Detection API
+ *
+ * This API provides a facility for detecting the
+ * charset or encoding of character data in an unknown text format.
+ * The input data can be from an array of bytes.
+ * <p>
+ * Character set detection is at best an imprecise operation.  The detection
+ * process will attempt to identify the charset that best matches the characteristics
+ * of the byte data, but the process is partly statistical in nature, and
+ * the results can not be guaranteed to always be correct.
+ * <p>
+ * For best accuracy in charset detection, the input data should be primarily
+ * in a single language, and a minimum of a few hundred bytes worth of plain text
+ * in the language are needed.  The detection process will attempt to
+ * ignore html or xml style markup that could otherwise obscure the content.
+ */
+ 
+
+struct UCharsetDetector;
+/**
+  * Structure representing a charset detector
+  * @stable ICU 3.6
+  */
+typedef struct UCharsetDetector UCharsetDetector;
+
+struct UCharsetMatch;
+/**
+  *  Opaque structure representing a match that was identified
+  *  from a charset detection operation.
+  *  @stable ICU 3.6
+  */
+typedef struct UCharsetMatch UCharsetMatch;
+
+/**
+  *  Open a charset detector.
+  *
+  *  @param status Any error conditions occurring during the open
+  *                operation are reported back in this variable.
+  *  @return the newly opened charset detector.
+  *  @stable ICU 3.6
+  */
+U_STABLE UCharsetDetector * U_EXPORT2
+ucsdet_open(UErrorCode   *status);
+
+/**
+  * Close a charset detector.  All storage and any other resources
+  *   owned by this charset detector will be released.  Failure to
+  *   close a charset detector when finished with it can result in
+  *   memory leaks in the application.
+  *
+  *  @param ucsd  The charset detector to be closed.
+  *  @stable ICU 3.6
+  */
+U_STABLE void U_EXPORT2
+ucsdet_close(UCharsetDetector *ucsd);
+
+/**
+  * Set the input byte data whose charset is to detected.
+  *
+  * Ownership of the input  text byte array remains with the caller.
+  * The input string must not be altered or deleted until the charset
+  * detector is either closed or reset to refer to different input text.
+  *
+  * @param ucsd   the charset detector to be used.
+  * @param textIn the input text of unknown encoding.   .
+  * @param len    the length of the input text, or -1 if the text
+  *               is NUL terminated.
+  * @param status any error conditions are reported back in this variable.
+  *
+  * @stable ICU 3.6
+  */
+U_STABLE void U_EXPORT2
+ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status);
+
+
+/** Set the declared encoding for charset detection.
+ *  The declared encoding of an input text is an encoding obtained
+ *  by the user from an http header or xml declaration or similar source that
+ *  can be provided as an additional hint to the charset detector.
+ *
+ *  How and whether the declared encoding will be used during the
+ *  detection process is TBD.
+ *
+ * @param ucsd      the charset detector to be used.
+ * @param encoding  an encoding for the current data obtained from
+ *                  a header or declaration or other source outside
+ *                  of the byte data itself.
+ * @param length    the length of the encoding name, or -1 if the name string
+ *                  is NUL terminated.
+ * @param status    any error conditions are reported back in this variable.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status);
+
+
+/**
+ * Return the charset that best matches the supplied input data.
+ * 
+ * Note though, that because the detection 
+ * only looks at the start of the input data,
+ * there is a possibility that the returned charset will fail to handle
+ * the full set of input data.
+ * <p>
+ * The returned UCharsetMatch object is owned by the UCharsetDetector.
+ * It will remain valid until the detector input is reset, or until
+ * the detector is closed.
+ * <p>
+ * The function will fail if
+ *  <ul>
+ *    <li>no charset appears to match the data.</li>
+ *    <li>no input text has been provided</li>
+ *  </ul>
+ *
+ * @param ucsd      the charset detector to be used.
+ * @param status    any error conditions are reported back in this variable.
+ * @return          a UCharsetMatch  representing the best matching charset,
+ *                  or NULL if no charset matches the byte data.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE const UCharsetMatch * U_EXPORT2
+ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status);
+    
+
+/**
+ *  Find all charset matches that appear to be consistent with the input,
+ *  returning an array of results.  The results are ordered with the
+ *  best quality match first.
+ *
+ *  Because the detection only looks at a limited amount of the
+ *  input byte data, some of the returned charsets may fail to handle
+ *  the all of input data.
+ *  <p>
+ *  The returned UCharsetMatch objects are owned by the UCharsetDetector.
+ *  They will remain valid until the detector is closed or modified
+ *  
+ * <p>
+ * Return an error if 
+ *  <ul>
+ *    <li>no charsets appear to match the input data.</li>
+ *    <li>no input text has been provided</li>
+ *  </ul>
+ * 
+ * @param ucsd          the charset detector to be used.
+ * @param matchesFound  pointer to a variable that will be set to the
+ *                      number of charsets identified that are consistent with
+ *                      the input data.  Output only.
+ * @param status        any error conditions are reported back in this variable.
+ * @return              A pointer to an array of pointers to UCharSetMatch objects.
+ *                      This array, and the UCharSetMatch instances to which it refers,
+ *                      are owned by the UCharsetDetector, and will remain valid until
+ *                      the detector is closed or modified.
+ * @stable ICU 3.6
+ */
+U_STABLE const UCharsetMatch ** U_EXPORT2
+ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status);
+
+
+
+/**
+ *  Get the name of the charset represented by a UCharsetMatch.
+ *
+ *  The storage for the returned name string is owned by the
+ *  UCharsetMatch, and will remain valid while the UCharsetMatch
+ *  is valid.
+ *
+ *  The name returned is suitable for use with the ICU conversion APIs.
+ *
+ *  @param ucsm    The charset match object.
+ *  @param status  Any error conditions are reported back in this variable.
+ *  @return        The name of the matching charset.
+ *
+ *  @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status);
+
+/**
+ *  Get a confidence number for the quality of the match of the byte
+ *  data with the charset.  Confidence numbers range from zero to 100,
+ *  with 100 representing complete confidence and zero representing
+ *  no confidence.
+ *
+ *  The confidence values are somewhat arbitrary.  They define an
+ *  an ordering within the results for any single detection operation
+ *  but are not generally comparable between the results for different input.
+ *
+ *  A confidence value of ten does have a general meaning - it is used
+ *  for charsets that can represent the input data, but for which there
+ *  is no other indication that suggests that the charset is the correct one.
+ *  Pure 7 bit ASCII data, for example, is compatible with a
+ *  great many charsets, most of which will appear as possible matches
+ *  with a confidence of 10.
+ *
+ *  @param ucsm    The charset match object.
+ *  @param status  Any error conditions are reported back in this variable.
+ *  @return        A confidence number for the charset match.
+ *
+ *  @stable ICU 3.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status);
+
+/**
+ *  Get the RFC 3066 code for the language of the input data.
+ *
+ *  The Charset Detection service is intended primarily for detecting
+ *  charsets, not language.  For some, but not all, charsets, a language is
+ *  identified as a byproduct of the detection process, and that is what
+ *  is returned by this function.
+ *
+ *  CAUTION:
+ *    1.  Language information is not available for input data encoded in
+ *        all charsets. In particular, no language is identified
+ *        for UTF-8 input data.
+ *
+ *    2.  Closely related languages may sometimes be confused.
+ *
+ *  If more accurate language detection is required, a linguistic
+ *  analysis package should be used.
+ *
+ *  The storage for the returned name string is owned by the
+ *  UCharsetMatch, and will remain valid while the UCharsetMatch
+ *  is valid.
+ *
+ *  @param ucsm    The charset match object.
+ *  @param status  Any error conditions are reported back in this variable.
+ *  @return        The RFC 3066 code for the language of the input data, or
+ *                 an empty string if the language could not be determined.
+ *
+ *  @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status);
+
+
+/**
+  *  Get the entire input text as a UChar string, placing it into
+  *  a caller-supplied buffer.  A terminating
+  *  NUL character will be appended to the buffer if space is available.
+  *
+  *  The number of UChars in the output string, not including the terminating
+  *  NUL, is returned. 
+  *
+  *  If the supplied buffer is smaller than required to hold the output,
+  *  the contents of the buffer are undefined.  The full output string length
+  *  (in UChars) is returned as always, and can be used to allocate a buffer
+  *  of the correct size.
+  *
+  *
+  * @param ucsm    The charset match object.
+  * @param buf     A UChar buffer to be filled with the converted text data.
+  * @param cap     The capacity of the buffer in UChars.
+  * @param status  Any error conditions are reported back in this variable.
+  * @return        The number of UChars in the output string.
+  *
+  * @stable ICU 3.6
+  */
+U_STABLE  int32_t U_EXPORT2
+ucsdet_getUChars(const UCharsetMatch *ucsm,
+                 UChar *buf, int32_t cap, UErrorCode *status);
+
+
+
+/**
+  *  Get an iterator over the set of all detectable charsets - 
+  *  over the charsets that are known to the charset detection
+  *  service.
+  *
+  *  The returned UEnumeration provides access to the names of
+  *  the charsets.
+  *
+  *  The state of the Charset detector that is passed in does not
+  *  affect the result of this function, but requiring a valid, open
+  *  charset detector as a parameter insures that the charset detection
+  *  service has been safely initialized and that the required detection
+  *  data is available.
+  *
+  *  @param ucsd a Charset detector.
+  *  @param status  Any error conditions are reported back in this variable.
+  *  @return an iterator providing access to the detectable charset names.
+  *  @stable ICU 3.6
+  */
+U_STABLE  UEnumeration * U_EXPORT2
+ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status);
+
+
+/**
+  *  Test whether input filtering is enabled for this charset detector.
+  *  Input filtering removes text that appears to be HTML or xml
+  *  markup from the input before applying the code page detection
+  *  heuristics.
+  *
+  *  @param ucsd  The charset detector to check.
+  *  @return TRUE if filtering is enabled.
+  *  @stable ICU 3.6
+  */
+U_STABLE  UBool U_EXPORT2
+ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
+
+
+/**
+ * Enable filtering of input text. If filtering is enabled,
+ * text within angle brackets ("<" and ">") will be removed
+ * before detection, which will remove most HTML or xml markup.
+ *
+ * @param ucsd   the charset detector to be modified.
+ * @param filter <code>true</code> to enable input text filtering.
+ * @return The previous setting.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE  UBool U_EXPORT2
+ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter);
+
+#endif
+#endif   /* __UCSDET_H */
+
+

Deleted: MacRuby/trunk/icu-1060/unicode/ucurr.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ucurr.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ucurr.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,270 +0,0 @@
-/*
-**********************************************************************
-* Copyright (c) 2002-2008, International Business Machines
-* Corporation and others.  All Rights Reserved.
-**********************************************************************
-*/
-#ifndef _UCURR_H_
-#define _UCURR_H_
-
-#include "unicode/utypes.h"
-#include "unicode/uenum.h"
-
-/**
- * \file 
- * \brief C API: Encapsulates information about a currency.
- */
-
-#if !UCONFIG_NO_FORMATTING
-
-/**
- * The ucurr API encapsulates information about a currency, as defined by
- * ISO 4217.  A currency is represented by a 3-character string
- * containing its ISO 4217 code.  This API can return various data
- * necessary the proper display of a currency:
- *
- * <ul><li>A display symbol, for a specific locale
- * <li>The number of fraction digits to display
- * <li>A rounding increment
- * </ul>
- *
- * The <tt>DecimalFormat</tt> class uses these data to display
- * currencies.
- * @author Alan Liu
- * @since ICU 2.2
- */
-
-/**
- * Finds a currency code for the given locale.
- * @param locale the locale for which to retrieve a currency code. 
- *               Currency can be specified by the "currency" keyword
- *               in which case it overrides the default currency code
- * @param buff   fill in buffer. Can be NULL for preflighting.
- * @param buffCapacity capacity of the fill in buffer. Can be 0 for
- *               preflighting. If it is non-zero, the buff parameter
- *               must not be NULL.
- * @param ec error code
- * @return length of the currency string. It should always be 3. If 0,
- *                currency couldn't be found or the input values are 
- *                invalid. 
- * @stable ICU 2.8
- */
-U_STABLE int32_t U_EXPORT2
-ucurr_forLocale(const char* locale,
-                UChar* buff,
-                int32_t buffCapacity,
-                UErrorCode* ec);
-
-/**
- * Selector constants for ucurr_getName().
- *
- * @see ucurr_getName
- * @stable ICU 2.6
- */
-typedef enum UCurrNameStyle {
-    /**
-     * Selector for ucurr_getName indicating a symbolic name for a
-     * currency, such as "$" for USD.
-     * @stable ICU 2.6
-     */
-    UCURR_SYMBOL_NAME,
-
-    /**
-     * Selector for ucurr_getName indicating the long name for a
-     * currency, such as "US Dollar" for USD.
-     * @stable ICU 2.6
-     */
-    UCURR_LONG_NAME
-} UCurrNameStyle;
-
-#if !UCONFIG_NO_SERVICE
-/**
- * @stable ICU 2.6
- */
-typedef const void* UCurrRegistryKey;
-
-/**
- * Register an (existing) ISO 4217 currency code for the given locale.
- * Only the country code and the two variants EURO and PRE_EURO are
- * recognized.
- * @param isoCode the three-letter ISO 4217 currency code
- * @param locale  the locale for which to register this currency code
- * @param status the in/out status code
- * @return a registry key that can be used to unregister this currency code, or NULL
- * if there was an error.
- * @stable ICU 2.6
- */
-U_STABLE UCurrRegistryKey U_EXPORT2
-ucurr_register(const UChar* isoCode, 
-                   const char* locale,  
-                   UErrorCode* status);
-/**
- * Unregister the previously-registered currency definitions using the
- * URegistryKey returned from ucurr_register.  Key becomes invalid after
- * a successful call and should not be used again.  Any currency 
- * that might have been hidden by the original ucurr_register call is 
- * restored.
- * @param key the registry key returned by a previous call to ucurr_register
- * @param status the in/out status code, no special meanings are assigned
- * @return TRUE if the currency for this key was successfully unregistered
- * @stable ICU 2.6
- */
-U_STABLE UBool U_EXPORT2
-ucurr_unregister(UCurrRegistryKey key, UErrorCode* status);
-#endif /* UCONFIG_NO_SERVICE */
-
-/**
- * Returns the display name for the given currency in the
- * given locale.  For example, the display name for the USD
- * currency object in the en_US locale is "$".
- * @param currency null-terminated 3-letter ISO 4217 code
- * @param locale locale in which to display currency
- * @param nameStyle selector for which kind of name to return
- * @param isChoiceFormat fill-in set to TRUE if the returned value
- * is a ChoiceFormat pattern; otherwise it is a static string
- * @param len fill-in parameter to receive length of result
- * @param ec error code
- * @return pointer to display string of 'len' UChars.  If the resource
- * data contains no entry for 'currency', then 'currency' itself is
- * returned.  If *isChoiceFormat is TRUE, then the result is a
- * ChoiceFormat pattern.  Otherwise it is a static string.
- * @stable ICU 2.6
- */
-U_STABLE const UChar* U_EXPORT2
-ucurr_getName(const UChar* currency,
-              const char* locale,
-              UCurrNameStyle nameStyle,
-              UBool* isChoiceFormat,
-              int32_t* len,
-              UErrorCode* ec);
-
-/**
- * Returns the number of the number of fraction digits that should
- * be displayed for the given currency.
- * @param currency null-terminated 3-letter ISO 4217 code
- * @param ec input-output error code
- * @return a non-negative number of fraction digits to be
- * displayed, or 0 if there is an error
- * @stable ICU 3.0
- */
-U_STABLE int32_t U_EXPORT2
-ucurr_getDefaultFractionDigits(const UChar* currency,
-                               UErrorCode* ec);
-
-/**
- * Returns the rounding increment for the given currency, or 0.0 if no
- * rounding is done by the currency.
- * @param currency null-terminated 3-letter ISO 4217 code
- * @param ec input-output error code
- * @return the non-negative rounding increment, or 0.0 if none,
- * or 0.0 if there is an error
- * @stable ICU 3.0
- */
-U_STABLE double U_EXPORT2
-ucurr_getRoundingIncrement(const UChar* currency,
-                           UErrorCode* ec);
-
-/**
- * Selector constants for ucurr_openCurrencies().
- *
- * @see ucurr_openCurrencies
- * @stable ICU 3.2
- */
-typedef enum UCurrCurrencyType {
-    /**
-     * Select all ISO-4217 currency codes.
-     * @stable ICU 3.2
-     */
-    UCURR_ALL = INT32_MAX,
-    /**
-     * Select only ISO-4217 commonly used currency codes.
-     * These currencies can be found in common use, and they usually have
-     * bank notes or coins associated with the currency code.
-     * This does not include fund codes, precious metals and other
-     * various ISO-4217 codes limited to special financial products.
-     * @stable ICU 3.2
-     */
-    UCURR_COMMON = 1,
-    /**
-     * Select ISO-4217 uncommon currency codes.
-     * These codes respresent fund codes, precious metals and other
-     * various ISO-4217 codes limited to special financial products.
-     * A fund code is a monetary resource associated with a currency.
-     * @stable ICU 3.2
-     */
-    UCURR_UNCOMMON = 2,
-    /**
-     * Select only deprecated ISO-4217 codes.
-     * These codes are no longer in general public use.
-     * @stable ICU 3.2
-     */
-    UCURR_DEPRECATED = 4,
-    /**
-     * Select only non-deprecated ISO-4217 codes.
-     * These codes are in general public use.
-     * @stable ICU 3.2
-     */
-    UCURR_NON_DEPRECATED = 8
-} UCurrCurrencyType;
-
-/**
- * Provides a UEnumeration object for listing ISO-4217 codes.
- * @param currType You can use one of several UCurrCurrencyType values for this
- *      variable. You can also | (or) them together to get a specific list of
- *      currencies. Most people will want to use the (UCURR_CURRENCY|UCURR_NON_DEPRECATED) value to
- *      get a list of current currencies.
- * @param pErrorCode Error code
- * @stable ICU 3.2
- */
-U_STABLE UEnumeration * U_EXPORT2
-ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode);
-
-/** 
- * Finds the number of valid currency codes for the
- * given locale and date.
- * @param locale the locale for which to retrieve the
- *               currency count.
- * @param date   the date for which to retrieve the
- *               currency count for the given locale.
- * @param ec     error code
- * @return       the number of currency codes for the
- *               given locale and date.  If 0, currency
- *               codes couldn't be found for the input
- *               values are invalid.
- * @draft ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-ucurr_countCurrencies(const char* locale, 
-                 UDate date, 
-                 UErrorCode* ec); 
-
-/** 
- * Finds a currency code for the given locale and date 
- * @param locale the locale for which to retrieve a currency code.  
- *               Currency can be specified by the "currency" keyword 
- *               in which case it overrides the default currency code 
- * @param date   the date for which to retrieve a currency code for 
- *               the given locale. 
- * @param index  the index within the available list of currency codes
- *               for the given locale on the given date.
- * @param buff   fill in buffer. Can be NULL for preflighting. 
- * @param buffCapacity capacity of the fill in buffer. Can be 0 for 
- *               preflighting. If it is non-zero, the buff parameter 
- *               must not be NULL. 
- * @param ec     error code 
- * @return       length of the currency string. It should always be 3. 
- *               If 0, currency couldn't be found or the input values are  
- *               invalid.  
- * @draft ICU 4.0 
- */ 
-U_DRAFT int32_t U_EXPORT2 
-ucurr_forLocaleAndDate(const char* locale, 
-                UDate date, 
-                int32_t index,
-                UChar* buff, 
-                int32_t buffCapacity, 
-                UErrorCode* ec); 
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/ucurr.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ucurr.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ucurr.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ucurr.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,270 @@
+/*
+**********************************************************************
+* Copyright (c) 2002-2008, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+*/
+#ifndef _UCURR_H_
+#define _UCURR_H_
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file 
+ * \brief C API: Encapsulates information about a currency.
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * The ucurr API encapsulates information about a currency, as defined by
+ * ISO 4217.  A currency is represented by a 3-character string
+ * containing its ISO 4217 code.  This API can return various data
+ * necessary the proper display of a currency:
+ *
+ * <ul><li>A display symbol, for a specific locale
+ * <li>The number of fraction digits to display
+ * <li>A rounding increment
+ * </ul>
+ *
+ * The <tt>DecimalFormat</tt> class uses these data to display
+ * currencies.
+ * @author Alan Liu
+ * @since ICU 2.2
+ */
+
+/**
+ * Finds a currency code for the given locale.
+ * @param locale the locale for which to retrieve a currency code. 
+ *               Currency can be specified by the "currency" keyword
+ *               in which case it overrides the default currency code
+ * @param buff   fill in buffer. Can be NULL for preflighting.
+ * @param buffCapacity capacity of the fill in buffer. Can be 0 for
+ *               preflighting. If it is non-zero, the buff parameter
+ *               must not be NULL.
+ * @param ec error code
+ * @return length of the currency string. It should always be 3. If 0,
+ *                currency couldn't be found or the input values are 
+ *                invalid. 
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+ucurr_forLocale(const char* locale,
+                UChar* buff,
+                int32_t buffCapacity,
+                UErrorCode* ec);
+
+/**
+ * Selector constants for ucurr_getName().
+ *
+ * @see ucurr_getName
+ * @stable ICU 2.6
+ */
+typedef enum UCurrNameStyle {
+    /**
+     * Selector for ucurr_getName indicating a symbolic name for a
+     * currency, such as "$" for USD.
+     * @stable ICU 2.6
+     */
+    UCURR_SYMBOL_NAME,
+
+    /**
+     * Selector for ucurr_getName indicating the long name for a
+     * currency, such as "US Dollar" for USD.
+     * @stable ICU 2.6
+     */
+    UCURR_LONG_NAME
+} UCurrNameStyle;
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * @stable ICU 2.6
+ */
+typedef const void* UCurrRegistryKey;
+
+/**
+ * Register an (existing) ISO 4217 currency code for the given locale.
+ * Only the country code and the two variants EURO and PRE_EURO are
+ * recognized.
+ * @param isoCode the three-letter ISO 4217 currency code
+ * @param locale  the locale for which to register this currency code
+ * @param status the in/out status code
+ * @return a registry key that can be used to unregister this currency code, or NULL
+ * if there was an error.
+ * @stable ICU 2.6
+ */
+U_STABLE UCurrRegistryKey U_EXPORT2
+ucurr_register(const UChar* isoCode, 
+                   const char* locale,  
+                   UErrorCode* status);
+/**
+ * Unregister the previously-registered currency definitions using the
+ * URegistryKey returned from ucurr_register.  Key becomes invalid after
+ * a successful call and should not be used again.  Any currency 
+ * that might have been hidden by the original ucurr_register call is 
+ * restored.
+ * @param key the registry key returned by a previous call to ucurr_register
+ * @param status the in/out status code, no special meanings are assigned
+ * @return TRUE if the currency for this key was successfully unregistered
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+ucurr_unregister(UCurrRegistryKey key, UErrorCode* status);
+#endif /* UCONFIG_NO_SERVICE */
+
+/**
+ * Returns the display name for the given currency in the
+ * given locale.  For example, the display name for the USD
+ * currency object in the en_US locale is "$".
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param locale locale in which to display currency
+ * @param nameStyle selector for which kind of name to return
+ * @param isChoiceFormat fill-in set to TRUE if the returned value
+ * is a ChoiceFormat pattern; otherwise it is a static string
+ * @param len fill-in parameter to receive length of result
+ * @param ec error code
+ * @return pointer to display string of 'len' UChars.  If the resource
+ * data contains no entry for 'currency', then 'currency' itself is
+ * returned.  If *isChoiceFormat is TRUE, then the result is a
+ * ChoiceFormat pattern.  Otherwise it is a static string.
+ * @stable ICU 2.6
+ */
+U_STABLE const UChar* U_EXPORT2
+ucurr_getName(const UChar* currency,
+              const char* locale,
+              UCurrNameStyle nameStyle,
+              UBool* isChoiceFormat,
+              int32_t* len,
+              UErrorCode* ec);
+
+/**
+ * Returns the number of the number of fraction digits that should
+ * be displayed for the given currency.
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param ec input-output error code
+ * @return a non-negative number of fraction digits to be
+ * displayed, or 0 if there is an error
+ * @stable ICU 3.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucurr_getDefaultFractionDigits(const UChar* currency,
+                               UErrorCode* ec);
+
+/**
+ * Returns the rounding increment for the given currency, or 0.0 if no
+ * rounding is done by the currency.
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param ec input-output error code
+ * @return the non-negative rounding increment, or 0.0 if none,
+ * or 0.0 if there is an error
+ * @stable ICU 3.0
+ */
+U_STABLE double U_EXPORT2
+ucurr_getRoundingIncrement(const UChar* currency,
+                           UErrorCode* ec);
+
+/**
+ * Selector constants for ucurr_openCurrencies().
+ *
+ * @see ucurr_openCurrencies
+ * @stable ICU 3.2
+ */
+typedef enum UCurrCurrencyType {
+    /**
+     * Select all ISO-4217 currency codes.
+     * @stable ICU 3.2
+     */
+    UCURR_ALL = INT32_MAX,
+    /**
+     * Select only ISO-4217 commonly used currency codes.
+     * These currencies can be found in common use, and they usually have
+     * bank notes or coins associated with the currency code.
+     * This does not include fund codes, precious metals and other
+     * various ISO-4217 codes limited to special financial products.
+     * @stable ICU 3.2
+     */
+    UCURR_COMMON = 1,
+    /**
+     * Select ISO-4217 uncommon currency codes.
+     * These codes respresent fund codes, precious metals and other
+     * various ISO-4217 codes limited to special financial products.
+     * A fund code is a monetary resource associated with a currency.
+     * @stable ICU 3.2
+     */
+    UCURR_UNCOMMON = 2,
+    /**
+     * Select only deprecated ISO-4217 codes.
+     * These codes are no longer in general public use.
+     * @stable ICU 3.2
+     */
+    UCURR_DEPRECATED = 4,
+    /**
+     * Select only non-deprecated ISO-4217 codes.
+     * These codes are in general public use.
+     * @stable ICU 3.2
+     */
+    UCURR_NON_DEPRECATED = 8
+} UCurrCurrencyType;
+
+/**
+ * Provides a UEnumeration object for listing ISO-4217 codes.
+ * @param currType You can use one of several UCurrCurrencyType values for this
+ *      variable. You can also | (or) them together to get a specific list of
+ *      currencies. Most people will want to use the (UCURR_CURRENCY|UCURR_NON_DEPRECATED) value to
+ *      get a list of current currencies.
+ * @param pErrorCode Error code
+ * @stable ICU 3.2
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode);
+
+/** 
+ * Finds the number of valid currency codes for the
+ * given locale and date.
+ * @param locale the locale for which to retrieve the
+ *               currency count.
+ * @param date   the date for which to retrieve the
+ *               currency count for the given locale.
+ * @param ec     error code
+ * @return       the number of currency codes for the
+ *               given locale and date.  If 0, currency
+ *               codes couldn't be found for the input
+ *               values are invalid.
+ * @draft ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+ucurr_countCurrencies(const char* locale, 
+                 UDate date, 
+                 UErrorCode* ec); 
+
+/** 
+ * Finds a currency code for the given locale and date 
+ * @param locale the locale for which to retrieve a currency code.  
+ *               Currency can be specified by the "currency" keyword 
+ *               in which case it overrides the default currency code 
+ * @param date   the date for which to retrieve a currency code for 
+ *               the given locale. 
+ * @param index  the index within the available list of currency codes
+ *               for the given locale on the given date.
+ * @param buff   fill in buffer. Can be NULL for preflighting. 
+ * @param buffCapacity capacity of the fill in buffer. Can be 0 for 
+ *               preflighting. If it is non-zero, the buff parameter 
+ *               must not be NULL. 
+ * @param ec     error code 
+ * @return       length of the currency string. It should always be 3. 
+ *               If 0, currency couldn't be found or the input values are  
+ *               invalid.  
+ * @draft ICU 4.0 
+ */ 
+U_DRAFT int32_t U_EXPORT2 
+ucurr_forLocaleAndDate(const char* locale, 
+                UDate date, 
+                int32_t index,
+                UChar* buff, 
+                int32_t buffCapacity, 
+                UErrorCode* ec); 
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/udat.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/udat.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/udat.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,962 +0,0 @@
-/*
- *******************************************************************************
- * Copyright (C) 1996-2009, International Business Machines
- * Corporation and others. All Rights Reserved.
- *******************************************************************************
-*/
-
-#ifndef UDAT_H
-#define UDAT_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/ucal.h"
-#include "unicode/unum.h"
-/**
- * \file
- * \brief C API: DateFormat
- *
- * <h2> Date Format C API</h2>
- *
- * Date Format C API  consists of functions that convert dates and
- * times from their internal representations to textual form and back again in a
- * language-independent manner. Converting from the internal representation (milliseconds
- * since midnight, January 1, 1970) to text is known as "formatting," and converting
- * from text to millis is known as "parsing."  We currently define only one concrete
- * structure UDateFormat, which can handle pretty much all normal
- * date formatting and parsing actions.
- * <P>
- * Date Format helps you to format and parse dates for any locale. Your code can
- * be completely independent of the locale conventions for months, days of the
- * week, or even the calendar format: lunar vs. solar.
- * <P>
- * To format a date for the current Locale with default time and date style,
- * use one of the static factory methods:
- * <pre>
- * \code
- *  UErrorCode status = U_ZERO_ERROR;
- *  UChar *myString;
- *  int32_t myStrlen = 0;
- *  UDateFormat* dfmt = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, NULL, -1, NULL, -1, &status);
- *  myStrlen = udat_format(dfmt, myDate, NULL, myStrlen, NULL, &status);
- *  if (status==U_BUFFER_OVERFLOW_ERROR){
- *      status=U_ZERO_ERROR;
- *      myString=(UChar*)malloc(sizeof(UChar) * (myStrlen+1) );
- *      udat_format(dfmt, myDate, myString, myStrlen+1, NULL, &status);
- *  }
- * \endcode
- * </pre>
- * If you are formatting multiple numbers, it is more efficient to get the
- * format and use it multiple times so that the system doesn't have to fetch the
- * information about the local language and country conventions multiple times.
- * <pre>
- * \code
- *  UErrorCode status = U_ZERO_ERROR;
- *  int32_t i, myStrlen = 0;
- *  UChar* myString;
- *  char buffer[1024];
- *  UDate myDateArr[] = { 0.0, 100000000.0, 2000000000.0 }; // test values
- *  UDateFormat* df = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, NULL, -1, NULL, 0, &status);
- *  for (i = 0; i < 3; i++) {
- *      myStrlen = udat_format(df, myDateArr[i], NULL, myStrlen, NULL, &status);
- *      if(status == U_BUFFER_OVERFLOW_ERROR){
- *          status = U_ZERO_ERROR;
- *          myString = (UChar*)malloc(sizeof(UChar) * (myStrlen+1) );
- *          udat_format(df, myDateArr[i], myString, myStrlen+1, NULL, &status);
- *          printf("%s\n", u_austrcpy(buffer, myString) );
- *          free(myString);
- *      }
- *  }
- * \endcode
- * </pre>
- * To get specific fields of a date, you can use UFieldPosition to
- * get specific fields.
- * <pre>
- * \code
- *  UErrorCode status = U_ZERO_ERROR;
- *  UFieldPosition pos;
- *  UChar *myString;
- *  int32_t myStrlen = 0;
- *  char buffer[1024];
- *
- *  pos.field = 1;  // Same as the DateFormat::EField enum
- *  UDateFormat* dfmt = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, -1, NULL, 0, &status);
- *  myStrlen = udat_format(dfmt, myDate, NULL, myStrlen, &pos, &status);
- *  if (status==U_BUFFER_OVERFLOW_ERROR){
- *      status=U_ZERO_ERROR;
- *      myString=(UChar*)malloc(sizeof(UChar) * (myStrlen+1) );
- *      udat_format(dfmt, myDate, myString, myStrlen+1, &pos, &status);
- *  }
- *  printf("date format: %s\n", u_austrcpy(buffer, myString));
- *  buffer[pos.endIndex] = 0;   // NULL terminate the string.
- *  printf("UFieldPosition position equals %s\n", &buffer[pos.beginIndex]);
- * \endcode
- * </pre>
- * To format a date for a different Locale, specify it in the call to
- * udat_open()
- * <pre>
- * \code
- *        UDateFormat* df = udat_open(UDAT_SHORT, UDAT_SHORT, "fr_FR", NULL, -1, NULL, 0, &status);
- * \endcode
- * </pre>
- * You can use a DateFormat API udat_parse() to parse.
- * <pre>
- * \code
- *  UErrorCode status = U_ZERO_ERROR;
- *  int32_t parsepos=0;
- *  UDate myDate = udat_parse(df, myString, u_strlen(myString), &parsepos, &status);
- * \endcode
- * </pre>
- *  You can pass in different options for the arguments for date and time style
- *  to control the length of the result; from SHORT to MEDIUM to LONG to FULL.
- *  The exact result depends on the locale, but generally:
- *  see UDateFormatStyle for more details
- * <ul type=round>
- *   <li>   UDAT_SHORT is completely numeric, such as 12/13/52 or 3:30pm
- *   <li>   UDAT_MEDIUM is longer, such as Jan 12, 1952
- *   <li>   UDAT_LONG is longer, such as January 12, 1952 or 3:30:32pm
- *   <li>   UDAT_FULL is pretty completely specified, such as
- *          Tuesday, April 12, 1952 AD or 3:30:42pm PST.
- * </ul>
- * You can also set the time zone on the format if you wish.
- * <P>
- * You can also use forms of the parse and format methods with Parse Position and
- * UFieldPosition to allow you to
- * <ul type=round>
- *   <li>   Progressively parse through pieces of a string.
- *   <li>   Align any particular field, or find out where it is for selection
- *          on the screen.
- * </ul>
- */
-
-/** A date formatter.
- *  For usage in C programs.
- *  @stable ICU 2.6
- */
-typedef void* UDateFormat;
-
-/** The possible date/time format styles 
- *  @stable ICU 2.6
- */
-typedef enum UDateFormatStyle {
-    /** Full style */
-    UDAT_FULL,
-    /** Long style */
-    UDAT_LONG,
-    /** Medium style */
-    UDAT_MEDIUM,
-    /** Short style */
-    UDAT_SHORT,
-    /** Default style */
-    UDAT_DEFAULT = UDAT_MEDIUM,
-
-    /** Bitfield for relative date */
-    UDAT_RELATIVE = (1 << 7),
-    
-    UDAT_FULL_RELATIVE = UDAT_FULL | UDAT_RELATIVE,
-        
-    UDAT_LONG_RELATIVE = UDAT_LONG | UDAT_RELATIVE,
-    
-    UDAT_MEDIUM_RELATIVE = UDAT_MEDIUM | UDAT_RELATIVE,
-    
-    UDAT_SHORT_RELATIVE = UDAT_SHORT | UDAT_RELATIVE,
-    
-    
-    /** No style */
-    UDAT_NONE = -1,
-    /** for internal API use only */
-    UDAT_IGNORE = -2
-
-} UDateFormatStyle;
-
-
-/**
- * @{
- * Below are a set of pre-defined skeletons.
- *
- * <P>
- * A skeleton 
- * <ol>
- * <li>
- *    only keeps the field pattern letter and ignores all other parts 
- *    in a pattern, such as space, punctuations, and string literals.
- * </li>
- * <li>
- *    hides the order of fields. 
- * </li>
- * <li>
- *    might hide a field's pattern letter length.
- *
- *    For those non-digit calendar fields, the pattern letter length is 
- *    important, such as MMM, MMMM, and MMMMM; EEE and EEEE, 
- *    and the field's pattern letter length is honored.
- *    
- *    For the digit calendar fields,  such as M or MM, d or dd, yy or yyyy, 
- *    the field pattern length is ignored and the best match, which is defined 
- *    in date time patterns, will be returned without honor the field pattern
- *    letter length in skeleton.
- * </li>
- * </ol>
- *
- * @stable ICU 4.0
- */
-
-#define UDAT_MINUTE_SECOND              "ms"
-#define UDAT_HOUR24_MINUTE              "Hm"
-#define UDAT_HOUR24_MINUTE_SECOND       "Hms"      
-#define UDAT_HOUR_MINUTE_SECOND         "hms"
-#define UDAT_STANDALONE_MONTH           "LLLL"
-#define UDAT_ABBR_STANDALONE_MONTH      "LLL"
-#define UDAT_YEAR_QUARTER               "yQQQ"
-#define UDAT_YEAR_ABBR_QUARTER          "yQ"
-
-/** @} */
-
-/**
- * @{
- * Below are a set of pre-defined skeletons that 
- * have pre-defined interval patterns in resource files.
- * Users are encouraged to use them in date interval format factory methods.
- *
- */
-#define UDAT_HOUR_MINUTE                "hm"
-#define UDAT_YEAR                       "y"
-#define UDAT_DAY                        "d"
-#define UDAT_NUM_MONTH_WEEKDAY_DAY      "MEd"
-#define UDAT_YEAR_NUM_MONTH             "yM"              
-#define UDAT_NUM_MONTH_DAY              "Md"
-#define UDAT_YEAR_NUM_MONTH_WEEKDAY_DAY "yMEd"
-#define UDAT_ABBR_MONTH_WEEKDAY_DAY     "MMMEd"
-#define UDAT_YEAR_MONTH                 "yMMMM"
-#define UDAT_YEAR_ABBR_MONTH            "yMMM"
-#define UDAT_MONTH_DAY                  "MMMMd"
-#define UDAT_ABBR_MONTH_DAY             "MMMd" 
-#define UDAT_MONTH_WEEKDAY_DAY          "MMMMEEEEd"
-#define UDAT_YEAR_ABBR_MONTH_WEEKDAY_DAY "yMMMEd" 
-#define UDAT_YEAR_MONTH_WEEKDAY_DAY     "yMMMMEEEEd"
-#define UDAT_YEAR_MONTH_DAY             "yMMMMd"
-#define UDAT_YEAR_ABBR_MONTH_DAY        "yMMMd"
-#define UDAT_YEAR_NUM_MONTH_DAY         "yMd"
-#define UDAT_NUM_MONTH                  "M"
-#define UDAT_ABBR_MONTH                 "MMM"
-#define UDAT_MONTH                      "MMMM"
-#define UDAT_HOUR_MINUTE_GENERIC_TZ     "hmv"
-#define UDAT_HOUR_MINUTE_TZ             "hmz"
-#define UDAT_HOUR                       "h"
-#define UDAT_HOUR_GENERIC_TZ            "hv"
-#define UDAT_HOUR_TZ                    "hz"
-
-/** @} */
-
-
-/**
- * FieldPosition and UFieldPosition selectors for format fields
- * defined by DateFormat and UDateFormat.
- * @stable ICU 3.0
- */
-typedef enum UDateFormatField {
-    /**
-     * FieldPosition and UFieldPosition selector for 'G' field alignment,
-     * corresponding to the UCAL_ERA field.
-     * @stable ICU 3.0
-     */
-    UDAT_ERA_FIELD = 0,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'y' field alignment,
-     * corresponding to the UCAL_YEAR field.
-     * @stable ICU 3.0
-     */
-    UDAT_YEAR_FIELD = 1,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'M' field alignment,
-     * corresponding to the UCAL_MONTH field.
-     * @stable ICU 3.0
-     */
-    UDAT_MONTH_FIELD = 2,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'd' field alignment,
-     * corresponding to the UCAL_DATE field.
-     * @stable ICU 3.0
-     */
-    UDAT_DATE_FIELD = 3,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'k' field alignment,
-     * corresponding to the UCAL_HOUR_OF_DAY field.
-     * UDAT_HOUR_OF_DAY1_FIELD is used for the one-based 24-hour clock.
-     * For example, 23:59 + 01:00 results in 24:59.
-     * @stable ICU 3.0
-     */
-    UDAT_HOUR_OF_DAY1_FIELD = 4,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'H' field alignment,
-     * corresponding to the UCAL_HOUR_OF_DAY field.
-     * UDAT_HOUR_OF_DAY0_FIELD is used for the zero-based 24-hour clock.
-     * For example, 23:59 + 01:00 results in 00:59.
-     * @stable ICU 3.0
-     */
-    UDAT_HOUR_OF_DAY0_FIELD = 5,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'm' field alignment,
-     * corresponding to the UCAL_MINUTE field.
-     * @stable ICU 3.0
-     */
-    UDAT_MINUTE_FIELD = 6,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 's' field alignment,
-     * corresponding to the UCAL_SECOND field.
-     * @stable ICU 3.0
-     */
-    UDAT_SECOND_FIELD = 7,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'S' field alignment,
-     * corresponding to the UCAL_MILLISECOND field.
-     * @stable ICU 3.0
-     */
-    UDAT_FRACTIONAL_SECOND_FIELD = 8,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'E' field alignment,
-     * corresponding to the UCAL_DAY_OF_WEEK field.
-     * @stable ICU 3.0
-     */
-    UDAT_DAY_OF_WEEK_FIELD = 9,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'D' field alignment,
-     * corresponding to the UCAL_DAY_OF_YEAR field.
-     * @stable ICU 3.0
-     */
-    UDAT_DAY_OF_YEAR_FIELD = 10,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'F' field alignment,
-     * corresponding to the UCAL_DAY_OF_WEEK_IN_MONTH field.
-     * @stable ICU 3.0
-     */
-    UDAT_DAY_OF_WEEK_IN_MONTH_FIELD = 11,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'w' field alignment,
-     * corresponding to the UCAL_WEEK_OF_YEAR field.
-     * @stable ICU 3.0
-     */
-    UDAT_WEEK_OF_YEAR_FIELD = 12,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'W' field alignment,
-     * corresponding to the UCAL_WEEK_OF_MONTH field.
-     * @stable ICU 3.0
-     */
-    UDAT_WEEK_OF_MONTH_FIELD = 13,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'a' field alignment,
-     * corresponding to the UCAL_AM_PM field.
-     * @stable ICU 3.0
-     */
-    UDAT_AM_PM_FIELD = 14,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'h' field alignment,
-     * corresponding to the UCAL_HOUR field.
-     * UDAT_HOUR1_FIELD is used for the one-based 12-hour clock.
-     * For example, 11:30 PM + 1 hour results in 12:30 AM.
-     * @stable ICU 3.0
-     */
-    UDAT_HOUR1_FIELD = 15,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'K' field alignment,
-     * corresponding to the UCAL_HOUR field.
-     * UDAT_HOUR0_FIELD is used for the zero-based 12-hour clock.
-     * For example, 11:30 PM + 1 hour results in 00:30 AM.
-     * @stable ICU 3.0
-     */
-    UDAT_HOUR0_FIELD = 16,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'z' field alignment,
-     * corresponding to the UCAL_ZONE_OFFSET and
-     * UCAL_DST_OFFSET fields.
-     * @stable ICU 3.0
-     */
-    UDAT_TIMEZONE_FIELD = 17,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'Y' field alignment,
-     * corresponding to the UCAL_YEAR_WOY field.
-     * @stable ICU 3.0
-     */
-    UDAT_YEAR_WOY_FIELD = 18,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'e' field alignment,
-     * corresponding to the UCAL_DOW_LOCAL field.
-     * @stable ICU 3.0
-     */
-    UDAT_DOW_LOCAL_FIELD = 19,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'u' field alignment,
-     * corresponding to the UCAL_EXTENDED_YEAR field.
-     * @stable ICU 3.0
-     */
-    UDAT_EXTENDED_YEAR_FIELD = 20,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'g' field alignment,
-     * corresponding to the UCAL_JULIAN_DAY field.
-     * @stable ICU 3.0
-     */
-    UDAT_JULIAN_DAY_FIELD = 21,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'A' field alignment,
-     * corresponding to the UCAL_MILLISECONDS_IN_DAY field.
-     * @stable ICU 3.0
-     */
-    UDAT_MILLISECONDS_IN_DAY_FIELD = 22,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'Z' field alignment,
-     * corresponding to the UCAL_ZONE_OFFSET and
-     * UCAL_DST_OFFSET fields.
-     * @stable ICU 3.0
-     */
-    UDAT_TIMEZONE_RFC_FIELD = 23,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'v' field alignment,
-     * corresponding to the UCAL_ZONE_OFFSET field.
-     * @stable ICU 3.4
-     */
-    UDAT_TIMEZONE_GENERIC_FIELD = 24,
-    /**
-     * FieldPosition selector for 'c' field alignment,
-     * corresponding to the {@link #UCAL_DATE} field. 
-     * This displays the stand alone day name, if available.
-     * @stable ICU 3.4
-     */
-    UDAT_STANDALONE_DAY_FIELD = 25,
-    
-    /**
-     * FieldPosition selector for 'L' field alignment,
-     * corresponding to the {@link #UCAL_MONTH} field.  
-     * This displays the stand alone month name, if available.
-     * @stable ICU 3.4
-     */
-    UDAT_STANDALONE_MONTH_FIELD = 26,
-
-    /**
-     * FieldPosition selector for "Q" field alignment,
-     * corresponding to quarters. This is implemented
-     * using the {@link #UCAL_MONTH} field. This
-     * displays the quarter.
-     * @stable ICU 3.6
-     */
-    UDAT_QUARTER_FIELD = 27,
-
-    /**
-     * FieldPosition selector for the "q" field alignment,
-     * corresponding to stand-alone quarters. This is
-     * implemented using the {@link #UCAL_MONTH} field.
-     * This displays the stand-alone quarter.
-     * @stable ICU 3.6
-     */
-    UDAT_STANDALONE_QUARTER_FIELD = 28,
-
-    /**
-     * FieldPosition and UFieldPosition selector for 'V' field alignment,
-     * corresponding to the UCAL_ZONE_OFFSET field.
-     * @stable ICU 3.8
-     */
-    UDAT_TIMEZONE_SPECIAL_FIELD = 29,
-
-   /**
-     * Number of FieldPosition and UFieldPosition selectors for 
-     * DateFormat and UDateFormat.
-     * Valid selectors range from 0 to UDAT_FIELD_COUNT-1.
-     * This value is subject to change if new fields are defined
-     * in the future.
-     * @stable ICU 3.0
-     */
-    UDAT_FIELD_COUNT = 30
-
-} UDateFormatField;
-
-/**
- * Open a new UDateFormat for formatting and parsing dates and times.
- * A UDateFormat may be used to format dates in calls to {@link #udat_format },
- * and to parse dates in calls to {@link #udat_parse }.
- * @param timeStyle The style used to format times; one of UDAT_FULL, UDAT_LONG,
- * UDAT_MEDIUM, UDAT_SHORT, UDAT_DEFAULT, or UDAT_NONE (relative time styles
- * are not currently supported)
- * @param dateStyle The style used to format dates; one of UDAT_FULL, UDAT_LONG,
- * UDAT_MEDIUM, UDAT_SHORT, UDAT_DEFAULT, UDAT_FULL_RELATIVE, UDAT_LONG_RELATIVE,
- * UDAT_MEDIUM_RELATIVE, UDAT_SHORT_RELATIVE, or UDAT_NONE
- * @param locale The locale specifying the formatting conventions
- * @param tzID A timezone ID specifying the timezone to use.  If 0, use
- * the default timezone.
- * @param tzIDLength The length of tzID, or -1 if null-terminated.
- * @param pattern A pattern specifying the format to use.
- * @param patternLength The number of characters in the pattern, or -1 if null-terminated.
- * @param status A pointer to an UErrorCode to receive any errors
- * @return A pointer to a UDateFormat to use for formatting dates and times, or 0 if
- * an error occurred.
- * @stable ICU 2.0
- */
-U_STABLE UDateFormat* U_EXPORT2 
-udat_open(UDateFormatStyle  timeStyle,
-          UDateFormatStyle  dateStyle,
-          const char        *locale,
-          const UChar       *tzID,
-          int32_t           tzIDLength,
-          const UChar       *pattern,
-          int32_t           patternLength,
-          UErrorCode        *status);
-
-
-/**
-* Close a UDateFormat.
-* Once closed, a UDateFormat may no longer be used.
-* @param format The formatter to close.
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2 
-udat_close(UDateFormat* format);
-
-/**
- * Open a copy of a UDateFormat.
- * This function performs a deep copy.
- * @param fmt The format to copy
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return A pointer to a UDateFormat identical to fmt.
- * @stable ICU 2.0
- */
-U_STABLE UDateFormat* U_EXPORT2 
-udat_clone(const UDateFormat *fmt,
-       UErrorCode *status);
-
-/**
-* Format a date using an UDateFormat.
-* The date will be formatted using the conventions specified in {@link #udat_open }
-* @param format The formatter to use
-* @param dateToFormat The date to format
-* @param result A pointer to a buffer to receive the formatted number.
-* @param resultLength The maximum size of result.
-* @param position A pointer to a UFieldPosition.  On input, position->field
-* is read.  On output, position->beginIndex and position->endIndex indicate
-* the beginning and ending indices of field number position->field, if such
-* a field exists.  This parameter may be NULL, in which case no field
-* position data is returned.
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The total buffer size needed; if greater than resultLength, the output was truncated.
-* @see udat_parse
-* @see UFieldPosition
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2 
-udat_format(    const    UDateFormat*    format,
-                        UDate           dateToFormat,
-                        UChar*          result,
-                        int32_t         resultLength,
-                        UFieldPosition* position,
-                        UErrorCode*     status);
-
-/**
-* Parse a string into an date/time using a UDateFormat.
-* The date will be parsed using the conventions specified in {@link #udat_open }
-* @param format The formatter to use.
-* @param text The text to parse.
-* @param textLength The length of text, or -1 if null-terminated.
-* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
-* to begin parsing.  If not 0, on output the offset at which parsing ended.
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The value of the parsed date/time
-* @see udat_format
-* @stable ICU 2.0
-*/
-U_STABLE UDate U_EXPORT2 
-udat_parse(    const    UDateFormat*    format,
-            const    UChar*          text,
-                    int32_t         textLength,
-                    int32_t         *parsePos,
-                    UErrorCode      *status);
-
-/**
-* Parse a string into an date/time using a UDateFormat.
-* The date will be parsed using the conventions specified in {@link #udat_open }
-* @param format The formatter to use.
-* @param calendar The calendar in which to store the parsed data.
-* @param text The text to parse.
-* @param textLength The length of text, or -1 if null-terminated.
-* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
-* to begin parsing.  If not 0, on output the offset at which parsing ended.
-* @param status A pointer to an UErrorCode to receive any errors
-* @see udat_format
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2 
-udat_parseCalendar(const    UDateFormat*    format,
-                            UCalendar*      calendar,
-                   const    UChar*          text,
-                            int32_t         textLength,
-                            int32_t         *parsePos,
-                            UErrorCode      *status);
-
-/**
-* Determine if an UDateFormat will perform lenient parsing.
-* With lenient parsing, the parser may use heuristics to interpret inputs that do not
-* precisely match the pattern. With strict parsing, inputs must match the pattern.
-* @param fmt The formatter to query
-* @return TRUE if fmt is set to perform lenient parsing, FALSE otherwise.
-* @see udat_setLenient
-* @stable ICU 2.0
-*/
-U_STABLE UBool U_EXPORT2 
-udat_isLenient(const UDateFormat* fmt);
-
-/**
-* Specify whether an UDateFormat will perform lenient parsing.
-* With lenient parsing, the parser may use heuristics to interpret inputs that do not
-* precisely match the pattern. With strict parsing, inputs must match the pattern.
-* @param fmt The formatter to set
-* @param isLenient TRUE if fmt should perform lenient parsing, FALSE otherwise.
-* @see dat_isLenient
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2 
-udat_setLenient(    UDateFormat*    fmt,
-                    UBool          isLenient);
-
-/**
-* Get the UCalendar associated with an UDateFormat.
-* A UDateFormat uses a UCalendar to convert a raw value to, for example,
-* the day of the week.
-* @param fmt The formatter to query.
-* @return A pointer to the UCalendar used by fmt.
-* @see udat_setCalendar
-* @stable ICU 2.0
-*/
-U_STABLE const UCalendar* U_EXPORT2 
-udat_getCalendar(const UDateFormat* fmt);
-
-/**
-* Set the UCalendar associated with an UDateFormat.
-* A UDateFormat uses a UCalendar to convert a raw value to, for example,
-* the day of the week.
-* @param fmt The formatter to set.
-* @param calendarToSet A pointer to an UCalendar to be used by fmt.
-* @see udat_setCalendar
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2 
-udat_setCalendar(            UDateFormat*    fmt,
-                    const   UCalendar*      calendarToSet);
-
-/**
-* Get the UNumberFormat associated with an UDateFormat.
-* A UDateFormat uses a UNumberFormat to format numbers within a date,
-* for example the day number.
-* @param fmt The formatter to query.
-* @return A pointer to the UNumberFormat used by fmt to format numbers.
-* @see udat_setNumberFormat
-* @stable ICU 2.0
-*/
-U_STABLE const UNumberFormat* U_EXPORT2 
-udat_getNumberFormat(const UDateFormat* fmt);
-
-/**
-* Set the UNumberFormat associated with an UDateFormat.
-* A UDateFormat uses a UNumberFormat to format numbers within a date,
-* for example the day number.
-* @param fmt The formatter to set.
-* @param numberFormatToSet A pointer to the UNumberFormat to be used by fmt to format numbers.
-* @see udat_getNumberFormat
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2 
-udat_setNumberFormat(            UDateFormat*    fmt,
-                        const   UNumberFormat*  numberFormatToSet);
-
-/**
-* Get a locale for which date/time formatting patterns are available.
-* A UDateFormat in a locale returned by this function will perform the correct
-* formatting and parsing for the locale.
-* @param localeIndex The index of the desired locale.
-* @return A locale for which date/time formatting patterns are available, or 0 if none.
-* @see udat_countAvailable
-* @stable ICU 2.0
-*/
-U_STABLE const char* U_EXPORT2 
-udat_getAvailable(int32_t localeIndex);
-
-/**
-* Determine how many locales have date/time  formatting patterns available.
-* This function is most useful as determining the loop ending condition for
-* calls to {@link #udat_getAvailable }.
-* @return The number of locales for which date/time formatting patterns are available.
-* @see udat_getAvailable
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2 
-udat_countAvailable(void);
-
-/**
-* Get the year relative to which all 2-digit years are interpreted.
-* For example, if the 2-digit start year is 2100, the year 99 will be
-* interpreted as 2199.
-* @param fmt The formatter to query.
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The year relative to which all 2-digit years are interpreted.
-* @see udat_Set2DigitYearStart
-* @stable ICU 2.0
-*/
-U_STABLE UDate U_EXPORT2 
-udat_get2DigitYearStart(    const   UDateFormat     *fmt,
-                                    UErrorCode      *status);
-
-/**
-* Set the year relative to which all 2-digit years will be interpreted.
-* For example, if the 2-digit start year is 2100, the year 99 will be
-* interpreted as 2199.
-* @param fmt The formatter to set.
-* @param d The year relative to which all 2-digit years will be interpreted.
-* @param status A pointer to an UErrorCode to receive any errors
-* @see udat_Set2DigitYearStart
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2 
-udat_set2DigitYearStart(    UDateFormat     *fmt,
-                            UDate           d,
-                            UErrorCode      *status);
-
-/**
-* Extract the pattern from a UDateFormat.
-* The pattern will follow the pattern syntax rules.
-* @param fmt The formatter to query.
-* @param localized TRUE if the pattern should be localized, FALSE otherwise.
-* @param result A pointer to a buffer to receive the pattern.
-* @param resultLength The maximum size of result.
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The total buffer size needed; if greater than resultLength, the output was truncated.
-* @see udat_applyPattern
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2 
-udat_toPattern(    const   UDateFormat     *fmt,
-                        UBool          localized,
-                        UChar           *result,
-                        int32_t         resultLength,
-                        UErrorCode      *status);
-
-/**
-* Set the pattern used by an UDateFormat.
-* The pattern should follow the pattern syntax rules.
-* @param format The formatter to set.
-* @param localized TRUE if the pattern is localized, FALSE otherwise.
-* @param pattern The new pattern
-* @param patternLength The length of pattern, or -1 if null-terminated.
-* @see udat_toPattern
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2 
-udat_applyPattern(            UDateFormat     *format,
-                            UBool          localized,
-                    const   UChar           *pattern,
-                            int32_t         patternLength);
-
-/** 
- * The possible types of date format symbols 
- * @stable ICU 2.6
- */
-typedef enum UDateFormatSymbolType {
-    /** The era names, for example AD */
-    UDAT_ERAS,
-    /** The month names, for example February */
-    UDAT_MONTHS,
-    /** The short month names, for example Feb. */
-    UDAT_SHORT_MONTHS,
-    /** The weekday names, for example Monday */
-    UDAT_WEEKDAYS,
-    /** The short weekday names, for example Mon. */
-    UDAT_SHORT_WEEKDAYS,
-    /** The AM/PM names, for example AM */
-    UDAT_AM_PMS,
-    /** The localized characters */
-    UDAT_LOCALIZED_CHARS,
-    /** The long era names, for example Anno Domini */
-    UDAT_ERA_NAMES,
-    /** The narrow month names, for example F */
-    UDAT_NARROW_MONTHS,
-    /** The narrow weekday names, for example N */
-    UDAT_NARROW_WEEKDAYS,
-    /** Standalone context versions of months */
-    UDAT_STANDALONE_MONTHS,
-    UDAT_STANDALONE_SHORT_MONTHS,
-    UDAT_STANDALONE_NARROW_MONTHS,
-    /** Standalone context versions of weekdays */
-    UDAT_STANDALONE_WEEKDAYS,
-    UDAT_STANDALONE_SHORT_WEEKDAYS,
-    UDAT_STANDALONE_NARROW_WEEKDAYS,
-    /** The quarters, for example 1st Quarter */
-    UDAT_QUARTERS,
-    /** The short quarter names, for example Q1 */
-    UDAT_SHORT_QUARTERS,
-    /** Standalone context versions of quarters */
-    UDAT_STANDALONE_QUARTERS,
-    UDAT_STANDALONE_SHORT_QUARTERS
-
-} UDateFormatSymbolType;
-
-struct UDateFormatSymbols;
-/** Date format symbols.
- *  For usage in C programs.
- *  @stable ICU 2.6
- */
-typedef struct UDateFormatSymbols UDateFormatSymbols;
-
-/**
-* Get the symbols associated with an UDateFormat.
-* The symbols are what a UDateFormat uses to represent locale-specific data,
-* for example month or day names.
-* @param fmt The formatter to query.
-* @param type The type of symbols to get.  One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS,
-* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS
-* @param symbolIndex The desired symbol of type type.
-* @param result A pointer to a buffer to receive the pattern.
-* @param resultLength The maximum size of result.
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The total buffer size needed; if greater than resultLength, the output was truncated.
-* @see udat_countSymbols
-* @see udat_setSymbols
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2 
-udat_getSymbols(const   UDateFormat             *fmt,
-                        UDateFormatSymbolType   type,
-                        int32_t                 symbolIndex,
-                        UChar                   *result,
-                        int32_t                 resultLength,
-                        UErrorCode              *status);
-
-/**
-* Count the number of particular symbols for an UDateFormat.
-* This function is most useful as for detemining the loop termination condition
-* for calls to {@link #udat_getSymbols }.
-* @param fmt The formatter to query.
-* @param type The type of symbols to count.  One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS,
-* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS
-* @return The number of symbols of type type.
-* @see udat_getSymbols
-* @see udat_setSymbols
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2 
-udat_countSymbols(    const    UDateFormat                *fmt,
-                            UDateFormatSymbolType    type);
-
-/**
-* Set the symbols associated with an UDateFormat.
-* The symbols are what a UDateFormat uses to represent locale-specific data,
-* for example month or day names.
-* @param format The formatter to set
-* @param type The type of symbols to set.  One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS,
-* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS
-* @param symbolIndex The index of the symbol to set of type type.
-* @param value The new value
-* @param valueLength The length of value, or -1 if null-terminated
-* @param status A pointer to an UErrorCode to receive any errors
-* @see udat_getSymbols
-* @see udat_countSymbols
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2 
-udat_setSymbols(    UDateFormat             *format,
-                    UDateFormatSymbolType   type,
-                    int32_t                 symbolIndex,
-                    UChar                   *value,
-                    int32_t                 valueLength,
-                    UErrorCode              *status);
-
-/**
- * Get the locale for this date format object.
- * You can choose between valid and actual locale.
- * @param fmt The formatter to get the locale from
- * @param type type of the locale we're looking for (valid or actual) 
- * @param status error code for the operation
- * @return the locale name
- * @stable ICU 2.8
- */
-U_STABLE const char* U_EXPORT2
-udat_getLocaleByType(const UDateFormat *fmt,
-                     ULocDataLocaleType type,
-                     UErrorCode* status); 
-
-/**
-* Extract the date pattern from a UDateFormat set for relative date formatting.
-* The pattern will follow the pattern syntax rules.
-* @param fmt The formatter to query.
-* @param result A pointer to a buffer to receive the pattern.
-* @param resultLength The maximum size of result.
-* @param status A pointer to a UErrorCode to receive any errors
-* @return The total buffer size needed; if greater than resultLength, the output was truncated.
-* @see udat_applyPatternRelative
-* @internal ICU 4.2 technology preview
-*/
-U_INTERNAL int32_t U_EXPORT2 
-udat_toPatternRelativeDate(const UDateFormat *fmt,
-                           UChar             *result,
-                           int32_t           resultLength,
-                           UErrorCode        *status);
-
-/**
-* Extract the time pattern from a UDateFormat set for relative date formatting.
-* The pattern will follow the pattern syntax rules.
-* @param fmt The formatter to query.
-* @param result A pointer to a buffer to receive the pattern.
-* @param resultLength The maximum size of result.
-* @param status A pointer to a UErrorCode to receive any errors
-* @return The total buffer size needed; if greater than resultLength, the output was truncated.
-* @see udat_applyPatternRelative
-* @internal ICU 4.2 technology preview
-*/
-U_INTERNAL int32_t U_EXPORT2 
-udat_toPatternRelativeTime(const UDateFormat *fmt,
-                           UChar             *result,
-                           int32_t           resultLength,
-                           UErrorCode        *status);
-
-/**
-* Set the date & time patterns used by a UDateFormat set for relative date formatting.
-* The patterns should follow the pattern syntax rules.
-* @param format The formatter to set.
-* @param datePattern The new date pattern
-* @param datePatternLength The length of datePattern, or -1 if null-terminated.
-* @param timePattern The new time pattern
-* @param timePatternLength The length of timePattern, or -1 if null-terminated.
-* @param status A pointer to a UErrorCode to receive any errors
-* @see udat_toPatternRelativeDate, udat_toPatternRelativeTime
-* @internal ICU 4.2 technology preview
-*/
-U_INTERNAL void U_EXPORT2 
-udat_applyPatternRelative(UDateFormat *format,
-                          const UChar *datePattern,
-                          int32_t     datePatternLength,
-                          const UChar *timePattern,
-                          int32_t     timePatternLength,
-                          UErrorCode  *status);
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/udat.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/udat.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/udat.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/udat.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,962 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2009, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *******************************************************************************
+*/
+
+#ifndef UDAT_H
+#define UDAT_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/ucal.h"
+#include "unicode/unum.h"
+/**
+ * \file
+ * \brief C API: DateFormat
+ *
+ * <h2> Date Format C API</h2>
+ *
+ * Date Format C API  consists of functions that convert dates and
+ * times from their internal representations to textual form and back again in a
+ * language-independent manner. Converting from the internal representation (milliseconds
+ * since midnight, January 1, 1970) to text is known as "formatting," and converting
+ * from text to millis is known as "parsing."  We currently define only one concrete
+ * structure UDateFormat, which can handle pretty much all normal
+ * date formatting and parsing actions.
+ * <P>
+ * Date Format helps you to format and parse dates for any locale. Your code can
+ * be completely independent of the locale conventions for months, days of the
+ * week, or even the calendar format: lunar vs. solar.
+ * <P>
+ * To format a date for the current Locale with default time and date style,
+ * use one of the static factory methods:
+ * <pre>
+ * \code
+ *  UErrorCode status = U_ZERO_ERROR;
+ *  UChar *myString;
+ *  int32_t myStrlen = 0;
+ *  UDateFormat* dfmt = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, NULL, -1, NULL, -1, &status);
+ *  myStrlen = udat_format(dfmt, myDate, NULL, myStrlen, NULL, &status);
+ *  if (status==U_BUFFER_OVERFLOW_ERROR){
+ *      status=U_ZERO_ERROR;
+ *      myString=(UChar*)malloc(sizeof(UChar) * (myStrlen+1) );
+ *      udat_format(dfmt, myDate, myString, myStrlen+1, NULL, &status);
+ *  }
+ * \endcode
+ * </pre>
+ * If you are formatting multiple numbers, it is more efficient to get the
+ * format and use it multiple times so that the system doesn't have to fetch the
+ * information about the local language and country conventions multiple times.
+ * <pre>
+ * \code
+ *  UErrorCode status = U_ZERO_ERROR;
+ *  int32_t i, myStrlen = 0;
+ *  UChar* myString;
+ *  char buffer[1024];
+ *  UDate myDateArr[] = { 0.0, 100000000.0, 2000000000.0 }; // test values
+ *  UDateFormat* df = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, NULL, -1, NULL, 0, &status);
+ *  for (i = 0; i < 3; i++) {
+ *      myStrlen = udat_format(df, myDateArr[i], NULL, myStrlen, NULL, &status);
+ *      if(status == U_BUFFER_OVERFLOW_ERROR){
+ *          status = U_ZERO_ERROR;
+ *          myString = (UChar*)malloc(sizeof(UChar) * (myStrlen+1) );
+ *          udat_format(df, myDateArr[i], myString, myStrlen+1, NULL, &status);
+ *          printf("%s\n", u_austrcpy(buffer, myString) );
+ *          free(myString);
+ *      }
+ *  }
+ * \endcode
+ * </pre>
+ * To get specific fields of a date, you can use UFieldPosition to
+ * get specific fields.
+ * <pre>
+ * \code
+ *  UErrorCode status = U_ZERO_ERROR;
+ *  UFieldPosition pos;
+ *  UChar *myString;
+ *  int32_t myStrlen = 0;
+ *  char buffer[1024];
+ *
+ *  pos.field = 1;  // Same as the DateFormat::EField enum
+ *  UDateFormat* dfmt = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, -1, NULL, 0, &status);
+ *  myStrlen = udat_format(dfmt, myDate, NULL, myStrlen, &pos, &status);
+ *  if (status==U_BUFFER_OVERFLOW_ERROR){
+ *      status=U_ZERO_ERROR;
+ *      myString=(UChar*)malloc(sizeof(UChar) * (myStrlen+1) );
+ *      udat_format(dfmt, myDate, myString, myStrlen+1, &pos, &status);
+ *  }
+ *  printf("date format: %s\n", u_austrcpy(buffer, myString));
+ *  buffer[pos.endIndex] = 0;   // NULL terminate the string.
+ *  printf("UFieldPosition position equals %s\n", &buffer[pos.beginIndex]);
+ * \endcode
+ * </pre>
+ * To format a date for a different Locale, specify it in the call to
+ * udat_open()
+ * <pre>
+ * \code
+ *        UDateFormat* df = udat_open(UDAT_SHORT, UDAT_SHORT, "fr_FR", NULL, -1, NULL, 0, &status);
+ * \endcode
+ * </pre>
+ * You can use a DateFormat API udat_parse() to parse.
+ * <pre>
+ * \code
+ *  UErrorCode status = U_ZERO_ERROR;
+ *  int32_t parsepos=0;
+ *  UDate myDate = udat_parse(df, myString, u_strlen(myString), &parsepos, &status);
+ * \endcode
+ * </pre>
+ *  You can pass in different options for the arguments for date and time style
+ *  to control the length of the result; from SHORT to MEDIUM to LONG to FULL.
+ *  The exact result depends on the locale, but generally:
+ *  see UDateFormatStyle for more details
+ * <ul type=round>
+ *   <li>   UDAT_SHORT is completely numeric, such as 12/13/52 or 3:30pm
+ *   <li>   UDAT_MEDIUM is longer, such as Jan 12, 1952
+ *   <li>   UDAT_LONG is longer, such as January 12, 1952 or 3:30:32pm
+ *   <li>   UDAT_FULL is pretty completely specified, such as
+ *          Tuesday, April 12, 1952 AD or 3:30:42pm PST.
+ * </ul>
+ * You can also set the time zone on the format if you wish.
+ * <P>
+ * You can also use forms of the parse and format methods with Parse Position and
+ * UFieldPosition to allow you to
+ * <ul type=round>
+ *   <li>   Progressively parse through pieces of a string.
+ *   <li>   Align any particular field, or find out where it is for selection
+ *          on the screen.
+ * </ul>
+ */
+
+/** A date formatter.
+ *  For usage in C programs.
+ *  @stable ICU 2.6
+ */
+typedef void* UDateFormat;
+
+/** The possible date/time format styles 
+ *  @stable ICU 2.6
+ */
+typedef enum UDateFormatStyle {
+    /** Full style */
+    UDAT_FULL,
+    /** Long style */
+    UDAT_LONG,
+    /** Medium style */
+    UDAT_MEDIUM,
+    /** Short style */
+    UDAT_SHORT,
+    /** Default style */
+    UDAT_DEFAULT = UDAT_MEDIUM,
+
+    /** Bitfield for relative date */
+    UDAT_RELATIVE = (1 << 7),
+    
+    UDAT_FULL_RELATIVE = UDAT_FULL | UDAT_RELATIVE,
+        
+    UDAT_LONG_RELATIVE = UDAT_LONG | UDAT_RELATIVE,
+    
+    UDAT_MEDIUM_RELATIVE = UDAT_MEDIUM | UDAT_RELATIVE,
+    
+    UDAT_SHORT_RELATIVE = UDAT_SHORT | UDAT_RELATIVE,
+    
+    
+    /** No style */
+    UDAT_NONE = -1,
+    /** for internal API use only */
+    UDAT_IGNORE = -2
+
+} UDateFormatStyle;
+
+
+/**
+ * @{
+ * Below are a set of pre-defined skeletons.
+ *
+ * <P>
+ * A skeleton 
+ * <ol>
+ * <li>
+ *    only keeps the field pattern letter and ignores all other parts 
+ *    in a pattern, such as space, punctuations, and string literals.
+ * </li>
+ * <li>
+ *    hides the order of fields. 
+ * </li>
+ * <li>
+ *    might hide a field's pattern letter length.
+ *
+ *    For those non-digit calendar fields, the pattern letter length is 
+ *    important, such as MMM, MMMM, and MMMMM; EEE and EEEE, 
+ *    and the field's pattern letter length is honored.
+ *    
+ *    For the digit calendar fields,  such as M or MM, d or dd, yy or yyyy, 
+ *    the field pattern length is ignored and the best match, which is defined 
+ *    in date time patterns, will be returned without honor the field pattern
+ *    letter length in skeleton.
+ * </li>
+ * </ol>
+ *
+ * @stable ICU 4.0
+ */
+
+#define UDAT_MINUTE_SECOND              "ms"
+#define UDAT_HOUR24_MINUTE              "Hm"
+#define UDAT_HOUR24_MINUTE_SECOND       "Hms"      
+#define UDAT_HOUR_MINUTE_SECOND         "hms"
+#define UDAT_STANDALONE_MONTH           "LLLL"
+#define UDAT_ABBR_STANDALONE_MONTH      "LLL"
+#define UDAT_YEAR_QUARTER               "yQQQ"
+#define UDAT_YEAR_ABBR_QUARTER          "yQ"
+
+/** @} */
+
+/**
+ * @{
+ * Below are a set of pre-defined skeletons that 
+ * have pre-defined interval patterns in resource files.
+ * Users are encouraged to use them in date interval format factory methods.
+ *
+ */
+#define UDAT_HOUR_MINUTE                "hm"
+#define UDAT_YEAR                       "y"
+#define UDAT_DAY                        "d"
+#define UDAT_NUM_MONTH_WEEKDAY_DAY      "MEd"
+#define UDAT_YEAR_NUM_MONTH             "yM"              
+#define UDAT_NUM_MONTH_DAY              "Md"
+#define UDAT_YEAR_NUM_MONTH_WEEKDAY_DAY "yMEd"
+#define UDAT_ABBR_MONTH_WEEKDAY_DAY     "MMMEd"
+#define UDAT_YEAR_MONTH                 "yMMMM"
+#define UDAT_YEAR_ABBR_MONTH            "yMMM"
+#define UDAT_MONTH_DAY                  "MMMMd"
+#define UDAT_ABBR_MONTH_DAY             "MMMd" 
+#define UDAT_MONTH_WEEKDAY_DAY          "MMMMEEEEd"
+#define UDAT_YEAR_ABBR_MONTH_WEEKDAY_DAY "yMMMEd" 
+#define UDAT_YEAR_MONTH_WEEKDAY_DAY     "yMMMMEEEEd"
+#define UDAT_YEAR_MONTH_DAY             "yMMMMd"
+#define UDAT_YEAR_ABBR_MONTH_DAY        "yMMMd"
+#define UDAT_YEAR_NUM_MONTH_DAY         "yMd"
+#define UDAT_NUM_MONTH                  "M"
+#define UDAT_ABBR_MONTH                 "MMM"
+#define UDAT_MONTH                      "MMMM"
+#define UDAT_HOUR_MINUTE_GENERIC_TZ     "hmv"
+#define UDAT_HOUR_MINUTE_TZ             "hmz"
+#define UDAT_HOUR                       "h"
+#define UDAT_HOUR_GENERIC_TZ            "hv"
+#define UDAT_HOUR_TZ                    "hz"
+
+/** @} */
+
+
+/**
+ * FieldPosition and UFieldPosition selectors for format fields
+ * defined by DateFormat and UDateFormat.
+ * @stable ICU 3.0
+ */
+typedef enum UDateFormatField {
+    /**
+     * FieldPosition and UFieldPosition selector for 'G' field alignment,
+     * corresponding to the UCAL_ERA field.
+     * @stable ICU 3.0
+     */
+    UDAT_ERA_FIELD = 0,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'y' field alignment,
+     * corresponding to the UCAL_YEAR field.
+     * @stable ICU 3.0
+     */
+    UDAT_YEAR_FIELD = 1,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'M' field alignment,
+     * corresponding to the UCAL_MONTH field.
+     * @stable ICU 3.0
+     */
+    UDAT_MONTH_FIELD = 2,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'd' field alignment,
+     * corresponding to the UCAL_DATE field.
+     * @stable ICU 3.0
+     */
+    UDAT_DATE_FIELD = 3,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'k' field alignment,
+     * corresponding to the UCAL_HOUR_OF_DAY field.
+     * UDAT_HOUR_OF_DAY1_FIELD is used for the one-based 24-hour clock.
+     * For example, 23:59 + 01:00 results in 24:59.
+     * @stable ICU 3.0
+     */
+    UDAT_HOUR_OF_DAY1_FIELD = 4,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'H' field alignment,
+     * corresponding to the UCAL_HOUR_OF_DAY field.
+     * UDAT_HOUR_OF_DAY0_FIELD is used for the zero-based 24-hour clock.
+     * For example, 23:59 + 01:00 results in 00:59.
+     * @stable ICU 3.0
+     */
+    UDAT_HOUR_OF_DAY0_FIELD = 5,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'm' field alignment,
+     * corresponding to the UCAL_MINUTE field.
+     * @stable ICU 3.0
+     */
+    UDAT_MINUTE_FIELD = 6,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 's' field alignment,
+     * corresponding to the UCAL_SECOND field.
+     * @stable ICU 3.0
+     */
+    UDAT_SECOND_FIELD = 7,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'S' field alignment,
+     * corresponding to the UCAL_MILLISECOND field.
+     * @stable ICU 3.0
+     */
+    UDAT_FRACTIONAL_SECOND_FIELD = 8,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'E' field alignment,
+     * corresponding to the UCAL_DAY_OF_WEEK field.
+     * @stable ICU 3.0
+     */
+    UDAT_DAY_OF_WEEK_FIELD = 9,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'D' field alignment,
+     * corresponding to the UCAL_DAY_OF_YEAR field.
+     * @stable ICU 3.0
+     */
+    UDAT_DAY_OF_YEAR_FIELD = 10,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'F' field alignment,
+     * corresponding to the UCAL_DAY_OF_WEEK_IN_MONTH field.
+     * @stable ICU 3.0
+     */
+    UDAT_DAY_OF_WEEK_IN_MONTH_FIELD = 11,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'w' field alignment,
+     * corresponding to the UCAL_WEEK_OF_YEAR field.
+     * @stable ICU 3.0
+     */
+    UDAT_WEEK_OF_YEAR_FIELD = 12,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'W' field alignment,
+     * corresponding to the UCAL_WEEK_OF_MONTH field.
+     * @stable ICU 3.0
+     */
+    UDAT_WEEK_OF_MONTH_FIELD = 13,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'a' field alignment,
+     * corresponding to the UCAL_AM_PM field.
+     * @stable ICU 3.0
+     */
+    UDAT_AM_PM_FIELD = 14,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'h' field alignment,
+     * corresponding to the UCAL_HOUR field.
+     * UDAT_HOUR1_FIELD is used for the one-based 12-hour clock.
+     * For example, 11:30 PM + 1 hour results in 12:30 AM.
+     * @stable ICU 3.0
+     */
+    UDAT_HOUR1_FIELD = 15,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'K' field alignment,
+     * corresponding to the UCAL_HOUR field.
+     * UDAT_HOUR0_FIELD is used for the zero-based 12-hour clock.
+     * For example, 11:30 PM + 1 hour results in 00:30 AM.
+     * @stable ICU 3.0
+     */
+    UDAT_HOUR0_FIELD = 16,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'z' field alignment,
+     * corresponding to the UCAL_ZONE_OFFSET and
+     * UCAL_DST_OFFSET fields.
+     * @stable ICU 3.0
+     */
+    UDAT_TIMEZONE_FIELD = 17,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'Y' field alignment,
+     * corresponding to the UCAL_YEAR_WOY field.
+     * @stable ICU 3.0
+     */
+    UDAT_YEAR_WOY_FIELD = 18,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'e' field alignment,
+     * corresponding to the UCAL_DOW_LOCAL field.
+     * @stable ICU 3.0
+     */
+    UDAT_DOW_LOCAL_FIELD = 19,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'u' field alignment,
+     * corresponding to the UCAL_EXTENDED_YEAR field.
+     * @stable ICU 3.0
+     */
+    UDAT_EXTENDED_YEAR_FIELD = 20,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'g' field alignment,
+     * corresponding to the UCAL_JULIAN_DAY field.
+     * @stable ICU 3.0
+     */
+    UDAT_JULIAN_DAY_FIELD = 21,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'A' field alignment,
+     * corresponding to the UCAL_MILLISECONDS_IN_DAY field.
+     * @stable ICU 3.0
+     */
+    UDAT_MILLISECONDS_IN_DAY_FIELD = 22,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'Z' field alignment,
+     * corresponding to the UCAL_ZONE_OFFSET and
+     * UCAL_DST_OFFSET fields.
+     * @stable ICU 3.0
+     */
+    UDAT_TIMEZONE_RFC_FIELD = 23,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'v' field alignment,
+     * corresponding to the UCAL_ZONE_OFFSET field.
+     * @stable ICU 3.4
+     */
+    UDAT_TIMEZONE_GENERIC_FIELD = 24,
+    /**
+     * FieldPosition selector for 'c' field alignment,
+     * corresponding to the {@link #UCAL_DATE} field. 
+     * This displays the stand alone day name, if available.
+     * @stable ICU 3.4
+     */
+    UDAT_STANDALONE_DAY_FIELD = 25,
+    
+    /**
+     * FieldPosition selector for 'L' field alignment,
+     * corresponding to the {@link #UCAL_MONTH} field.  
+     * This displays the stand alone month name, if available.
+     * @stable ICU 3.4
+     */
+    UDAT_STANDALONE_MONTH_FIELD = 26,
+
+    /**
+     * FieldPosition selector for "Q" field alignment,
+     * corresponding to quarters. This is implemented
+     * using the {@link #UCAL_MONTH} field. This
+     * displays the quarter.
+     * @stable ICU 3.6
+     */
+    UDAT_QUARTER_FIELD = 27,
+
+    /**
+     * FieldPosition selector for the "q" field alignment,
+     * corresponding to stand-alone quarters. This is
+     * implemented using the {@link #UCAL_MONTH} field.
+     * This displays the stand-alone quarter.
+     * @stable ICU 3.6
+     */
+    UDAT_STANDALONE_QUARTER_FIELD = 28,
+
+    /**
+     * FieldPosition and UFieldPosition selector for 'V' field alignment,
+     * corresponding to the UCAL_ZONE_OFFSET field.
+     * @stable ICU 3.8
+     */
+    UDAT_TIMEZONE_SPECIAL_FIELD = 29,
+
+   /**
+     * Number of FieldPosition and UFieldPosition selectors for 
+     * DateFormat and UDateFormat.
+     * Valid selectors range from 0 to UDAT_FIELD_COUNT-1.
+     * This value is subject to change if new fields are defined
+     * in the future.
+     * @stable ICU 3.0
+     */
+    UDAT_FIELD_COUNT = 30
+
+} UDateFormatField;
+
+/**
+ * Open a new UDateFormat for formatting and parsing dates and times.
+ * A UDateFormat may be used to format dates in calls to {@link #udat_format },
+ * and to parse dates in calls to {@link #udat_parse }.
+ * @param timeStyle The style used to format times; one of UDAT_FULL, UDAT_LONG,
+ * UDAT_MEDIUM, UDAT_SHORT, UDAT_DEFAULT, or UDAT_NONE (relative time styles
+ * are not currently supported)
+ * @param dateStyle The style used to format dates; one of UDAT_FULL, UDAT_LONG,
+ * UDAT_MEDIUM, UDAT_SHORT, UDAT_DEFAULT, UDAT_FULL_RELATIVE, UDAT_LONG_RELATIVE,
+ * UDAT_MEDIUM_RELATIVE, UDAT_SHORT_RELATIVE, or UDAT_NONE
+ * @param locale The locale specifying the formatting conventions
+ * @param tzID A timezone ID specifying the timezone to use.  If 0, use
+ * the default timezone.
+ * @param tzIDLength The length of tzID, or -1 if null-terminated.
+ * @param pattern A pattern specifying the format to use.
+ * @param patternLength The number of characters in the pattern, or -1 if null-terminated.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return A pointer to a UDateFormat to use for formatting dates and times, or 0 if
+ * an error occurred.
+ * @stable ICU 2.0
+ */
+U_STABLE UDateFormat* U_EXPORT2 
+udat_open(UDateFormatStyle  timeStyle,
+          UDateFormatStyle  dateStyle,
+          const char        *locale,
+          const UChar       *tzID,
+          int32_t           tzIDLength,
+          const UChar       *pattern,
+          int32_t           patternLength,
+          UErrorCode        *status);
+
+
+/**
+* Close a UDateFormat.
+* Once closed, a UDateFormat may no longer be used.
+* @param format The formatter to close.
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2 
+udat_close(UDateFormat* format);
+
+/**
+ * Open a copy of a UDateFormat.
+ * This function performs a deep copy.
+ * @param fmt The format to copy
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UDateFormat identical to fmt.
+ * @stable ICU 2.0
+ */
+U_STABLE UDateFormat* U_EXPORT2 
+udat_clone(const UDateFormat *fmt,
+       UErrorCode *status);
+
+/**
+* Format a date using an UDateFormat.
+* The date will be formatted using the conventions specified in {@link #udat_open }
+* @param format The formatter to use
+* @param dateToFormat The date to format
+* @param result A pointer to a buffer to receive the formatted number.
+* @param resultLength The maximum size of result.
+* @param position A pointer to a UFieldPosition.  On input, position->field
+* is read.  On output, position->beginIndex and position->endIndex indicate
+* the beginning and ending indices of field number position->field, if such
+* a field exists.  This parameter may be NULL, in which case no field
+* position data is returned.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see udat_parse
+* @see UFieldPosition
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2 
+udat_format(    const    UDateFormat*    format,
+                        UDate           dateToFormat,
+                        UChar*          result,
+                        int32_t         resultLength,
+                        UFieldPosition* position,
+                        UErrorCode*     status);
+
+/**
+* Parse a string into an date/time using a UDateFormat.
+* The date will be parsed using the conventions specified in {@link #udat_open }
+* @param format The formatter to use.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
+* to begin parsing.  If not 0, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The value of the parsed date/time
+* @see udat_format
+* @stable ICU 2.0
+*/
+U_STABLE UDate U_EXPORT2 
+udat_parse(    const    UDateFormat*    format,
+            const    UChar*          text,
+                    int32_t         textLength,
+                    int32_t         *parsePos,
+                    UErrorCode      *status);
+
+/**
+* Parse a string into an date/time using a UDateFormat.
+* The date will be parsed using the conventions specified in {@link #udat_open }
+* @param format The formatter to use.
+* @param calendar The calendar in which to store the parsed data.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
+* to begin parsing.  If not 0, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @see udat_format
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2 
+udat_parseCalendar(const    UDateFormat*    format,
+                            UCalendar*      calendar,
+                   const    UChar*          text,
+                            int32_t         textLength,
+                            int32_t         *parsePos,
+                            UErrorCode      *status);
+
+/**
+* Determine if an UDateFormat will perform lenient parsing.
+* With lenient parsing, the parser may use heuristics to interpret inputs that do not
+* precisely match the pattern. With strict parsing, inputs must match the pattern.
+* @param fmt The formatter to query
+* @return TRUE if fmt is set to perform lenient parsing, FALSE otherwise.
+* @see udat_setLenient
+* @stable ICU 2.0
+*/
+U_STABLE UBool U_EXPORT2 
+udat_isLenient(const UDateFormat* fmt);
+
+/**
+* Specify whether an UDateFormat will perform lenient parsing.
+* With lenient parsing, the parser may use heuristics to interpret inputs that do not
+* precisely match the pattern. With strict parsing, inputs must match the pattern.
+* @param fmt The formatter to set
+* @param isLenient TRUE if fmt should perform lenient parsing, FALSE otherwise.
+* @see dat_isLenient
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2 
+udat_setLenient(    UDateFormat*    fmt,
+                    UBool          isLenient);
+
+/**
+* Get the UCalendar associated with an UDateFormat.
+* A UDateFormat uses a UCalendar to convert a raw value to, for example,
+* the day of the week.
+* @param fmt The formatter to query.
+* @return A pointer to the UCalendar used by fmt.
+* @see udat_setCalendar
+* @stable ICU 2.0
+*/
+U_STABLE const UCalendar* U_EXPORT2 
+udat_getCalendar(const UDateFormat* fmt);
+
+/**
+* Set the UCalendar associated with an UDateFormat.
+* A UDateFormat uses a UCalendar to convert a raw value to, for example,
+* the day of the week.
+* @param fmt The formatter to set.
+* @param calendarToSet A pointer to an UCalendar to be used by fmt.
+* @see udat_setCalendar
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2 
+udat_setCalendar(            UDateFormat*    fmt,
+                    const   UCalendar*      calendarToSet);
+
+/**
+* Get the UNumberFormat associated with an UDateFormat.
+* A UDateFormat uses a UNumberFormat to format numbers within a date,
+* for example the day number.
+* @param fmt The formatter to query.
+* @return A pointer to the UNumberFormat used by fmt to format numbers.
+* @see udat_setNumberFormat
+* @stable ICU 2.0
+*/
+U_STABLE const UNumberFormat* U_EXPORT2 
+udat_getNumberFormat(const UDateFormat* fmt);
+
+/**
+* Set the UNumberFormat associated with an UDateFormat.
+* A UDateFormat uses a UNumberFormat to format numbers within a date,
+* for example the day number.
+* @param fmt The formatter to set.
+* @param numberFormatToSet A pointer to the UNumberFormat to be used by fmt to format numbers.
+* @see udat_getNumberFormat
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2 
+udat_setNumberFormat(            UDateFormat*    fmt,
+                        const   UNumberFormat*  numberFormatToSet);
+
+/**
+* Get a locale for which date/time formatting patterns are available.
+* A UDateFormat in a locale returned by this function will perform the correct
+* formatting and parsing for the locale.
+* @param localeIndex The index of the desired locale.
+* @return A locale for which date/time formatting patterns are available, or 0 if none.
+* @see udat_countAvailable
+* @stable ICU 2.0
+*/
+U_STABLE const char* U_EXPORT2 
+udat_getAvailable(int32_t localeIndex);
+
+/**
+* Determine how many locales have date/time  formatting patterns available.
+* This function is most useful as determining the loop ending condition for
+* calls to {@link #udat_getAvailable }.
+* @return The number of locales for which date/time formatting patterns are available.
+* @see udat_getAvailable
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2 
+udat_countAvailable(void);
+
+/**
+* Get the year relative to which all 2-digit years are interpreted.
+* For example, if the 2-digit start year is 2100, the year 99 will be
+* interpreted as 2199.
+* @param fmt The formatter to query.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The year relative to which all 2-digit years are interpreted.
+* @see udat_Set2DigitYearStart
+* @stable ICU 2.0
+*/
+U_STABLE UDate U_EXPORT2 
+udat_get2DigitYearStart(    const   UDateFormat     *fmt,
+                                    UErrorCode      *status);
+
+/**
+* Set the year relative to which all 2-digit years will be interpreted.
+* For example, if the 2-digit start year is 2100, the year 99 will be
+* interpreted as 2199.
+* @param fmt The formatter to set.
+* @param d The year relative to which all 2-digit years will be interpreted.
+* @param status A pointer to an UErrorCode to receive any errors
+* @see udat_Set2DigitYearStart
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2 
+udat_set2DigitYearStart(    UDateFormat     *fmt,
+                            UDate           d,
+                            UErrorCode      *status);
+
+/**
+* Extract the pattern from a UDateFormat.
+* The pattern will follow the pattern syntax rules.
+* @param fmt The formatter to query.
+* @param localized TRUE if the pattern should be localized, FALSE otherwise.
+* @param result A pointer to a buffer to receive the pattern.
+* @param resultLength The maximum size of result.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see udat_applyPattern
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2 
+udat_toPattern(    const   UDateFormat     *fmt,
+                        UBool          localized,
+                        UChar           *result,
+                        int32_t         resultLength,
+                        UErrorCode      *status);
+
+/**
+* Set the pattern used by an UDateFormat.
+* The pattern should follow the pattern syntax rules.
+* @param format The formatter to set.
+* @param localized TRUE if the pattern is localized, FALSE otherwise.
+* @param pattern The new pattern
+* @param patternLength The length of pattern, or -1 if null-terminated.
+* @see udat_toPattern
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2 
+udat_applyPattern(            UDateFormat     *format,
+                            UBool          localized,
+                    const   UChar           *pattern,
+                            int32_t         patternLength);
+
+/** 
+ * The possible types of date format symbols 
+ * @stable ICU 2.6
+ */
+typedef enum UDateFormatSymbolType {
+    /** The era names, for example AD */
+    UDAT_ERAS,
+    /** The month names, for example February */
+    UDAT_MONTHS,
+    /** The short month names, for example Feb. */
+    UDAT_SHORT_MONTHS,
+    /** The weekday names, for example Monday */
+    UDAT_WEEKDAYS,
+    /** The short weekday names, for example Mon. */
+    UDAT_SHORT_WEEKDAYS,
+    /** The AM/PM names, for example AM */
+    UDAT_AM_PMS,
+    /** The localized characters */
+    UDAT_LOCALIZED_CHARS,
+    /** The long era names, for example Anno Domini */
+    UDAT_ERA_NAMES,
+    /** The narrow month names, for example F */
+    UDAT_NARROW_MONTHS,
+    /** The narrow weekday names, for example N */
+    UDAT_NARROW_WEEKDAYS,
+    /** Standalone context versions of months */
+    UDAT_STANDALONE_MONTHS,
+    UDAT_STANDALONE_SHORT_MONTHS,
+    UDAT_STANDALONE_NARROW_MONTHS,
+    /** Standalone context versions of weekdays */
+    UDAT_STANDALONE_WEEKDAYS,
+    UDAT_STANDALONE_SHORT_WEEKDAYS,
+    UDAT_STANDALONE_NARROW_WEEKDAYS,
+    /** The quarters, for example 1st Quarter */
+    UDAT_QUARTERS,
+    /** The short quarter names, for example Q1 */
+    UDAT_SHORT_QUARTERS,
+    /** Standalone context versions of quarters */
+    UDAT_STANDALONE_QUARTERS,
+    UDAT_STANDALONE_SHORT_QUARTERS
+
+} UDateFormatSymbolType;
+
+struct UDateFormatSymbols;
+/** Date format symbols.
+ *  For usage in C programs.
+ *  @stable ICU 2.6
+ */
+typedef struct UDateFormatSymbols UDateFormatSymbols;
+
+/**
+* Get the symbols associated with an UDateFormat.
+* The symbols are what a UDateFormat uses to represent locale-specific data,
+* for example month or day names.
+* @param fmt The formatter to query.
+* @param type The type of symbols to get.  One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS,
+* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS
+* @param symbolIndex The desired symbol of type type.
+* @param result A pointer to a buffer to receive the pattern.
+* @param resultLength The maximum size of result.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see udat_countSymbols
+* @see udat_setSymbols
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2 
+udat_getSymbols(const   UDateFormat             *fmt,
+                        UDateFormatSymbolType   type,
+                        int32_t                 symbolIndex,
+                        UChar                   *result,
+                        int32_t                 resultLength,
+                        UErrorCode              *status);
+
+/**
+* Count the number of particular symbols for an UDateFormat.
+* This function is most useful as for detemining the loop termination condition
+* for calls to {@link #udat_getSymbols }.
+* @param fmt The formatter to query.
+* @param type The type of symbols to count.  One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS,
+* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS
+* @return The number of symbols of type type.
+* @see udat_getSymbols
+* @see udat_setSymbols
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2 
+udat_countSymbols(    const    UDateFormat                *fmt,
+                            UDateFormatSymbolType    type);
+
+/**
+* Set the symbols associated with an UDateFormat.
+* The symbols are what a UDateFormat uses to represent locale-specific data,
+* for example month or day names.
+* @param format The formatter to set
+* @param type The type of symbols to set.  One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS,
+* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS
+* @param symbolIndex The index of the symbol to set of type type.
+* @param value The new value
+* @param valueLength The length of value, or -1 if null-terminated
+* @param status A pointer to an UErrorCode to receive any errors
+* @see udat_getSymbols
+* @see udat_countSymbols
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2 
+udat_setSymbols(    UDateFormat             *format,
+                    UDateFormatSymbolType   type,
+                    int32_t                 symbolIndex,
+                    UChar                   *value,
+                    int32_t                 valueLength,
+                    UErrorCode              *status);
+
+/**
+ * Get the locale for this date format object.
+ * You can choose between valid and actual locale.
+ * @param fmt The formatter to get the locale from
+ * @param type type of the locale we're looking for (valid or actual) 
+ * @param status error code for the operation
+ * @return the locale name
+ * @stable ICU 2.8
+ */
+U_STABLE const char* U_EXPORT2
+udat_getLocaleByType(const UDateFormat *fmt,
+                     ULocDataLocaleType type,
+                     UErrorCode* status); 
+
+/**
+* Extract the date pattern from a UDateFormat set for relative date formatting.
+* The pattern will follow the pattern syntax rules.
+* @param fmt The formatter to query.
+* @param result A pointer to a buffer to receive the pattern.
+* @param resultLength The maximum size of result.
+* @param status A pointer to a UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see udat_applyPatternRelative
+* @internal ICU 4.2 technology preview
+*/
+U_INTERNAL int32_t U_EXPORT2 
+udat_toPatternRelativeDate(const UDateFormat *fmt,
+                           UChar             *result,
+                           int32_t           resultLength,
+                           UErrorCode        *status);
+
+/**
+* Extract the time pattern from a UDateFormat set for relative date formatting.
+* The pattern will follow the pattern syntax rules.
+* @param fmt The formatter to query.
+* @param result A pointer to a buffer to receive the pattern.
+* @param resultLength The maximum size of result.
+* @param status A pointer to a UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see udat_applyPatternRelative
+* @internal ICU 4.2 technology preview
+*/
+U_INTERNAL int32_t U_EXPORT2 
+udat_toPatternRelativeTime(const UDateFormat *fmt,
+                           UChar             *result,
+                           int32_t           resultLength,
+                           UErrorCode        *status);
+
+/**
+* Set the date & time patterns used by a UDateFormat set for relative date formatting.
+* The patterns should follow the pattern syntax rules.
+* @param format The formatter to set.
+* @param datePattern The new date pattern
+* @param datePatternLength The length of datePattern, or -1 if null-terminated.
+* @param timePattern The new time pattern
+* @param timePatternLength The length of timePattern, or -1 if null-terminated.
+* @param status A pointer to a UErrorCode to receive any errors
+* @see udat_toPatternRelativeDate, udat_toPatternRelativeTime
+* @internal ICU 4.2 technology preview
+*/
+U_INTERNAL void U_EXPORT2 
+udat_applyPatternRelative(UDateFormat *format,
+                          const UChar *datePattern,
+                          int32_t     datePatternLength,
+                          const UChar *timePattern,
+                          int32_t     timePatternLength,
+                          UErrorCode  *status);
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/udata.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/udata.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/udata.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,389 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 1999-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-******************************************************************************
-*   file name:  udata.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 1999oct25
-*   created by: Markus W. Scherer
-*/
-
-#ifndef __UDATA_H__
-#define __UDATA_H__
-
-#include "unicode/utypes.h"
-
-U_CDECL_BEGIN
-
-/**
- * \file
- * \brief C API: Data loading interface
- *
- * <h2>Information about data loading interface</h2>
- *
- * This API is used to find and efficiently load data for ICU and applications
- * using ICU. It provides an abstract interface that specifies a data type and
- * name to find and load the data. Normally this API is used by other ICU APIs
- * to load required data out of the ICU data library, but it can be used to
- * load data out of other places.
- *
- * See the User Guide Data Management chapter.
- */
- 
-#ifndef U_HIDE_INTERNAL_API
-/**
- * Character used to separate package names from tree names 
- * @internal ICU 3.0
- */
-#define U_TREE_SEPARATOR '-'
-
-/**
- * String used to separate package names from tree names 
- * @internal ICU 3.0
- */
-#define U_TREE_SEPARATOR_STRING "-"
-
-/**
- * Character used to separate parts of entry names
- * @internal ICU 3.0
- */
-#define U_TREE_ENTRY_SEP_CHAR '/'
-
-/**
- * String used to separate parts of entry names
- * @internal ICU 3.0
- */
-#define U_TREE_ENTRY_SEP_STRING "/"
-
-/**
- * Alias for standard ICU data 
- * @internal ICU 3.0
- */
-#define U_ICUDATA_ALIAS "ICUDATA"
-
-#endif /* U_HIDE_INTERNAL_API */
-
-/**
- * UDataInfo contains the properties about the requested data.
- * This is meta data.
- *
- * <p>This structure may grow in the future, indicated by the
- * <code>size</code> field.</p>
- *
- * <p>The platform data property fields help determine if a data
- * file can be efficiently used on a given machine.
- * The particular fields are of importance only if the data
- * is affected by the properties - if there is integer data
- * with word sizes > 1 byte, char* text, or UChar* text.</p>
- *
- * <p>The implementation for the <code>udata_open[Choice]()</code>
- * functions may reject data based on the value in <code>isBigEndian</code>.
- * No other field is used by the <code>udata</code> API implementation.</p>
- *
- * <p>The <code>dataFormat</code> may be used to identify
- * the kind of data, e.g. a converter table.</p>
- *
- * <p>The <code>formatVersion</code> field should be used to
- * make sure that the format can be interpreted.
- * I may be a good idea to check only for the one or two highest
- * of the version elements to allow the data memory to
- * get more or somewhat rearranged contents, for as long
- * as the using code can still interpret the older contents.</p>
- *
- * <p>The <code>dataVersion</code> field is intended to be a
- * common place to store the source version of the data;
- * for data from the Unicode character database, this could
- * reflect the Unicode version.</p>
- * @stable ICU 2.0
- */
-typedef struct {
-    /** sizeof(UDataInfo)
-     *  @stable ICU 2.0 */
-    uint16_t size;
-
-    /** unused, set to 0 
-     *  @stable ICU 2.0*/
-    uint16_t reservedWord;
-
-    /* platform data properties */
-    /** 0 for little-endian machine, 1 for big-endian
-     *  @stable ICU 2.0 */
-    uint8_t isBigEndian;
-
-    /** see U_CHARSET_FAMILY values in utypes.h 
-     *  @stable ICU 2.0*/
-    uint8_t charsetFamily;
-
-    /** sizeof(UChar), one of { 1, 2, 4 } 
-     *  @stable ICU 2.0*/
-    uint8_t sizeofUChar;
-
-    /** unused, set to 0 
-     *  @stable ICU 2.0*/
-    uint8_t reservedByte;
-
-    /** data format identifier 
-     *  @stable ICU 2.0*/
-    uint8_t dataFormat[4];
-
-    /** versions: [0] major [1] minor [2] milli [3] micro 
-     *  @stable ICU 2.0*/
-    uint8_t formatVersion[4];
-
-    /** versions: [0] major [1] minor [2] milli [3] micro 
-     *  @stable ICU 2.0*/
-    uint8_t dataVersion[4];
-} UDataInfo;
-
-/* API for reading data -----------------------------------------------------*/
-
-/**
- * Forward declaration of the data memory type.
- * @stable ICU 2.0
- */
-typedef struct UDataMemory UDataMemory;
-
-/**
- * Callback function for udata_openChoice().
- * @param context parameter passed into <code>udata_openChoice()</code>.
- * @param type The type of the data as passed into <code>udata_openChoice()</code>.
- *             It may be <code>NULL</code>.
- * @param name The name of the data as passed into <code>udata_openChoice()</code>.
- * @param pInfo A pointer to the <code>UDataInfo</code> structure
- *              of data that has been loaded and will be returned
- *              by <code>udata_openChoice()</code> if this function
- *              returns <code>TRUE</code>.
- * @return TRUE if the current data memory is acceptable
- * @stable ICU 2.0
- */
-typedef UBool U_CALLCONV
-UDataMemoryIsAcceptable(void *context,
-                        const char *type, const char *name,
-                        const UDataInfo *pInfo);
-
-
-/**
- * Convenience function.
- * This function works the same as <code>udata_openChoice</code>
- * except that any data that matches the type and name
- * is assumed to be acceptable.
- * @param path Specifies an absolute path and/or a basename for the
- *             finding of the data in the file system.
- *             <code>NULL</code> for ICU data.
- * @param type A string that specifies the type of data to be loaded.
- *             For example, resource bundles are loaded with type "res",
- *             conversion tables with type "cnv".
- *             This may be <code>NULL</code> or empty.
- * @param name A string that specifies the name of the data.
- * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
- * @return A pointer (handle) to a data memory object, or <code>NULL</code>
- *         if an error occurs. Call <code>udata_getMemory()</code>
- *         to get a pointer to the actual data.
- *
- * @see udata_openChoice
- * @stable ICU 2.0
- */
-U_STABLE UDataMemory * U_EXPORT2
-udata_open(const char *path, const char *type, const char *name,
-           UErrorCode *pErrorCode);
-
-/**
- * Data loading function.
- * This function is used to find and load efficiently data for
- * ICU and applications using ICU.
- * It provides an abstract interface that allows to specify a data
- * type and name to find and load the data.
- *
- * <p>The implementation depends on platform properties and user preferences
- * and may involve loading shared libraries (DLLs), mapping
- * files into memory, or fopen()/fread() files.
- * It may also involve using static memory or database queries etc.
- * Several or all data items may be combined into one entity
- * (DLL, memory-mappable file).</p>
- *
- * <p>The data is always preceded by a header that includes
- * a <code>UDataInfo</code> structure.
- * The caller's <code>isAcceptable()</code> function is called to make
- * sure that the data is useful. It may be called several times if it
- * rejects the data and there is more than one location with data
- * matching the type and name.</p>
- *
- * <p>If <code>path==NULL</code>, then ICU data is loaded.
- * Otherwise, it is separated into a basename and a basename-less directory string.
- * The basename is used as the data package name, and the directory is
- * logically prepended to the ICU data directory string.</p>
- *
- * <p>For details about ICU data loading see the User Guide
- * Data Management chapter. (http://icu-project.org/userguide/icudata.html)</p>
- *
- * @param path Specifies an absolute path and/or a basename for the
- *             finding of the data in the file system.
- *             <code>NULL</code> for ICU data.
- * @param type A string that specifies the type of data to be loaded.
- *             For example, resource bundles are loaded with type "res",
- *             conversion tables with type "cnv".
- *             This may be <code>NULL</code> or empty.
- * @param name A string that specifies the name of the data.
- * @param isAcceptable This function is called to verify that loaded data
- *                     is useful for the client code. If it returns FALSE
- *                     for all data items, then <code>udata_openChoice()</code>
- *                     will return with an error.
- * @param context Arbitrary parameter to be passed into isAcceptable.
- * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
- * @return A pointer (handle) to a data memory object, or <code>NULL</code>
- *         if an error occurs. Call <code>udata_getMemory()</code>
- *         to get a pointer to the actual data.
- * @stable ICU 2.0
- */
-U_STABLE UDataMemory * U_EXPORT2
-udata_openChoice(const char *path, const char *type, const char *name,
-                 UDataMemoryIsAcceptable *isAcceptable, void *context,
-                 UErrorCode *pErrorCode);
-
-/**
- * Close the data memory.
- * This function must be called to allow the system to
- * release resources associated with this data memory.
- * @param pData The pointer to data memory object
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-udata_close(UDataMemory *pData);
-
-/**
- * Get the pointer to the actual data inside the data memory.
- * The data is read-only.
- * @param pData The pointer to data memory object
- * @stable ICU 2.0
- */
-U_STABLE const void * U_EXPORT2
-udata_getMemory(UDataMemory *pData);
-
-/**
- * Get the information from the data memory header.
- * This allows to get access to the header containing
- * platform data properties etc. which is not part of
- * the data itself and can therefore not be accessed
- * via the pointer that <code>udata_getMemory()</code> returns.
- *
- * @param pData pointer to the data memory object
- * @param pInfo pointer to a UDataInfo object;
- *              its <code>size</code> field must be set correctly,
- *              typically to <code>sizeof(UDataInfo)</code>.
- *
- * <code>*pInfo</code> will be filled with the UDataInfo structure
- * in the data memory object. If this structure is smaller than
- * <code>pInfo->size</code>, then the <code>size</code> will be
- * adjusted and only part of the structure will be filled.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
-
-/**
- * This function bypasses the normal ICU data loading process and
- * allows you to force ICU's system data to come out of a user-specified
- * area in memory.
- *
- * The format of this data is that of the icu common data file, as is
- * generated by the pkgdata tool with mode=common or mode=dll.
- * You can read in a whole common mode file and pass the address to the start of the
- * data, or (with the appropriate link options) pass in the pointer to
- * the data that has been loaded from a dll by the operating system,
- * as shown in this code:
- *
- *       extern const  char U_IMPORT U_ICUDATA_ENTRY_POINT []; 
- *        // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool
- *       UErrorCode  status = U_ZERO_ERROR;
- *
- *       udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status);
- *
- * Warning: ICU must NOT have even attempted to access its data yet
- * when this call is made, or U_USING_DEFAULT_WARNING code will
- * be returned. Be careful of UnicodeStrings in static initialization which
- * may attempt to load a converter (use the UNICODE_STRING(x) macro instead).
- *
- * Also note that it is important that the declaration be as above. The entry point
- * must not be declared as an extern void*.
- *
- * This function has no effect on application (non ICU) data.  See udata_setAppData()
- * for similar functionality for application data.
- *
- * @param data pointer to ICU common data
- * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
- * @stable ICU 2.0
- */
-
-U_STABLE void U_EXPORT2
-udata_setCommonData(const void *data, UErrorCode *err);
-
-
-/**
- * This function bypasses the normal ICU data loading process for application-specific
- * data and allows you to force the it to come out of a user-specified
- * pointer.
- *
- * The format of this data is that of the icu common data file, like 'icudt26l.dat'
- * or the corresponding shared library (DLL) file.
- * The application must read in or otherwise construct an image of the data and then
- * pass the address of it to this function.
- *
- *
- * Warning:  setAppData will set a U_USING_DEFAULT_WARNING code if
- *           data with the specifed path that has already been opened, or
- *           if setAppData with the same path has already been called.
- *           Any such calls to setAppData will have no effect.
- *
- *
- * @param packageName the package name by which the application will refer
- *             to (open) this data
- * @param data pointer to the data
- * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
- * @see udata_setCommonData
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-udata_setAppData(const char *packageName, const void *data, UErrorCode *err);
-
-/**
- * Possible settings for udata_setFileAccess()
- * @see udata_setFileAccess
- * @stable ICU 3.4
- */
-typedef enum UDataFileAccess {
-    /** ICU looks for data in single files first, then in packages. (default) */
-    UDATA_FILES_FIRST,
-    /** ICU only loads data from packages, not from single files. */
-    UDATA_ONLY_PACKAGES,
-    /** ICU loads data from packages first, and only from single files
-        if the data cannot be found in a package. */
-    UDATA_PACKAGES_FIRST,
-    /** ICU does not access the file system for data loading. */
-    UDATA_NO_FILES,
-    /** An alias for the default access mode. */
-    UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST,
-    UDATA_FILE_ACCESS_COUNT
-} UDataFileAccess;
-
-/**
- * This function may be called to control how ICU loads data. It must be called
- * before any ICU data is loaded, including application data loaded with ures/ResourceBundle or
- * udata APIs. It should be called before u_init.  This function is not multithread safe.  
- * The results of calling it while other threads are loading data are undefined.
- * @param access The type of file access to be used
- * @param status Error code.
- * @see UDataFileAccess
- * @stable ICU 3.4 
- */
-U_STABLE void U_EXPORT2
-udata_setFileAccess(UDataFileAccess access, UErrorCode *status);
-
-U_CDECL_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/udata.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/udata.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/udata.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/udata.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,389 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1999-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  udata.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999oct25
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UDATA_H__
+#define __UDATA_H__
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ * \brief C API: Data loading interface
+ *
+ * <h2>Information about data loading interface</h2>
+ *
+ * This API is used to find and efficiently load data for ICU and applications
+ * using ICU. It provides an abstract interface that specifies a data type and
+ * name to find and load the data. Normally this API is used by other ICU APIs
+ * to load required data out of the ICU data library, but it can be used to
+ * load data out of other places.
+ *
+ * See the User Guide Data Management chapter.
+ */
+ 
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Character used to separate package names from tree names 
+ * @internal ICU 3.0
+ */
+#define U_TREE_SEPARATOR '-'
+
+/**
+ * String used to separate package names from tree names 
+ * @internal ICU 3.0
+ */
+#define U_TREE_SEPARATOR_STRING "-"
+
+/**
+ * Character used to separate parts of entry names
+ * @internal ICU 3.0
+ */
+#define U_TREE_ENTRY_SEP_CHAR '/'
+
+/**
+ * String used to separate parts of entry names
+ * @internal ICU 3.0
+ */
+#define U_TREE_ENTRY_SEP_STRING "/"
+
+/**
+ * Alias for standard ICU data 
+ * @internal ICU 3.0
+ */
+#define U_ICUDATA_ALIAS "ICUDATA"
+
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * UDataInfo contains the properties about the requested data.
+ * This is meta data.
+ *
+ * <p>This structure may grow in the future, indicated by the
+ * <code>size</code> field.</p>
+ *
+ * <p>The platform data property fields help determine if a data
+ * file can be efficiently used on a given machine.
+ * The particular fields are of importance only if the data
+ * is affected by the properties - if there is integer data
+ * with word sizes > 1 byte, char* text, or UChar* text.</p>
+ *
+ * <p>The implementation for the <code>udata_open[Choice]()</code>
+ * functions may reject data based on the value in <code>isBigEndian</code>.
+ * No other field is used by the <code>udata</code> API implementation.</p>
+ *
+ * <p>The <code>dataFormat</code> may be used to identify
+ * the kind of data, e.g. a converter table.</p>
+ *
+ * <p>The <code>formatVersion</code> field should be used to
+ * make sure that the format can be interpreted.
+ * I may be a good idea to check only for the one or two highest
+ * of the version elements to allow the data memory to
+ * get more or somewhat rearranged contents, for as long
+ * as the using code can still interpret the older contents.</p>
+ *
+ * <p>The <code>dataVersion</code> field is intended to be a
+ * common place to store the source version of the data;
+ * for data from the Unicode character database, this could
+ * reflect the Unicode version.</p>
+ * @stable ICU 2.0
+ */
+typedef struct {
+    /** sizeof(UDataInfo)
+     *  @stable ICU 2.0 */
+    uint16_t size;
+
+    /** unused, set to 0 
+     *  @stable ICU 2.0*/
+    uint16_t reservedWord;
+
+    /* platform data properties */
+    /** 0 for little-endian machine, 1 for big-endian
+     *  @stable ICU 2.0 */
+    uint8_t isBigEndian;
+
+    /** see U_CHARSET_FAMILY values in utypes.h 
+     *  @stable ICU 2.0*/
+    uint8_t charsetFamily;
+
+    /** sizeof(UChar), one of { 1, 2, 4 } 
+     *  @stable ICU 2.0*/
+    uint8_t sizeofUChar;
+
+    /** unused, set to 0 
+     *  @stable ICU 2.0*/
+    uint8_t reservedByte;
+
+    /** data format identifier 
+     *  @stable ICU 2.0*/
+    uint8_t dataFormat[4];
+
+    /** versions: [0] major [1] minor [2] milli [3] micro 
+     *  @stable ICU 2.0*/
+    uint8_t formatVersion[4];
+
+    /** versions: [0] major [1] minor [2] milli [3] micro 
+     *  @stable ICU 2.0*/
+    uint8_t dataVersion[4];
+} UDataInfo;
+
+/* API for reading data -----------------------------------------------------*/
+
+/**
+ * Forward declaration of the data memory type.
+ * @stable ICU 2.0
+ */
+typedef struct UDataMemory UDataMemory;
+
+/**
+ * Callback function for udata_openChoice().
+ * @param context parameter passed into <code>udata_openChoice()</code>.
+ * @param type The type of the data as passed into <code>udata_openChoice()</code>.
+ *             It may be <code>NULL</code>.
+ * @param name The name of the data as passed into <code>udata_openChoice()</code>.
+ * @param pInfo A pointer to the <code>UDataInfo</code> structure
+ *              of data that has been loaded and will be returned
+ *              by <code>udata_openChoice()</code> if this function
+ *              returns <code>TRUE</code>.
+ * @return TRUE if the current data memory is acceptable
+ * @stable ICU 2.0
+ */
+typedef UBool U_CALLCONV
+UDataMemoryIsAcceptable(void *context,
+                        const char *type, const char *name,
+                        const UDataInfo *pInfo);
+
+
+/**
+ * Convenience function.
+ * This function works the same as <code>udata_openChoice</code>
+ * except that any data that matches the type and name
+ * is assumed to be acceptable.
+ * @param path Specifies an absolute path and/or a basename for the
+ *             finding of the data in the file system.
+ *             <code>NULL</code> for ICU data.
+ * @param type A string that specifies the type of data to be loaded.
+ *             For example, resource bundles are loaded with type "res",
+ *             conversion tables with type "cnv".
+ *             This may be <code>NULL</code> or empty.
+ * @param name A string that specifies the name of the data.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * @return A pointer (handle) to a data memory object, or <code>NULL</code>
+ *         if an error occurs. Call <code>udata_getMemory()</code>
+ *         to get a pointer to the actual data.
+ *
+ * @see udata_openChoice
+ * @stable ICU 2.0
+ */
+U_STABLE UDataMemory * U_EXPORT2
+udata_open(const char *path, const char *type, const char *name,
+           UErrorCode *pErrorCode);
+
+/**
+ * Data loading function.
+ * This function is used to find and load efficiently data for
+ * ICU and applications using ICU.
+ * It provides an abstract interface that allows to specify a data
+ * type and name to find and load the data.
+ *
+ * <p>The implementation depends on platform properties and user preferences
+ * and may involve loading shared libraries (DLLs), mapping
+ * files into memory, or fopen()/fread() files.
+ * It may also involve using static memory or database queries etc.
+ * Several or all data items may be combined into one entity
+ * (DLL, memory-mappable file).</p>
+ *
+ * <p>The data is always preceded by a header that includes
+ * a <code>UDataInfo</code> structure.
+ * The caller's <code>isAcceptable()</code> function is called to make
+ * sure that the data is useful. It may be called several times if it
+ * rejects the data and there is more than one location with data
+ * matching the type and name.</p>
+ *
+ * <p>If <code>path==NULL</code>, then ICU data is loaded.
+ * Otherwise, it is separated into a basename and a basename-less directory string.
+ * The basename is used as the data package name, and the directory is
+ * logically prepended to the ICU data directory string.</p>
+ *
+ * <p>For details about ICU data loading see the User Guide
+ * Data Management chapter. (http://icu-project.org/userguide/icudata.html)</p>
+ *
+ * @param path Specifies an absolute path and/or a basename for the
+ *             finding of the data in the file system.
+ *             <code>NULL</code> for ICU data.
+ * @param type A string that specifies the type of data to be loaded.
+ *             For example, resource bundles are loaded with type "res",
+ *             conversion tables with type "cnv".
+ *             This may be <code>NULL</code> or empty.
+ * @param name A string that specifies the name of the data.
+ * @param isAcceptable This function is called to verify that loaded data
+ *                     is useful for the client code. If it returns FALSE
+ *                     for all data items, then <code>udata_openChoice()</code>
+ *                     will return with an error.
+ * @param context Arbitrary parameter to be passed into isAcceptable.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * @return A pointer (handle) to a data memory object, or <code>NULL</code>
+ *         if an error occurs. Call <code>udata_getMemory()</code>
+ *         to get a pointer to the actual data.
+ * @stable ICU 2.0
+ */
+U_STABLE UDataMemory * U_EXPORT2
+udata_openChoice(const char *path, const char *type, const char *name,
+                 UDataMemoryIsAcceptable *isAcceptable, void *context,
+                 UErrorCode *pErrorCode);
+
+/**
+ * Close the data memory.
+ * This function must be called to allow the system to
+ * release resources associated with this data memory.
+ * @param pData The pointer to data memory object
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_close(UDataMemory *pData);
+
+/**
+ * Get the pointer to the actual data inside the data memory.
+ * The data is read-only.
+ * @param pData The pointer to data memory object
+ * @stable ICU 2.0
+ */
+U_STABLE const void * U_EXPORT2
+udata_getMemory(UDataMemory *pData);
+
+/**
+ * Get the information from the data memory header.
+ * This allows to get access to the header containing
+ * platform data properties etc. which is not part of
+ * the data itself and can therefore not be accessed
+ * via the pointer that <code>udata_getMemory()</code> returns.
+ *
+ * @param pData pointer to the data memory object
+ * @param pInfo pointer to a UDataInfo object;
+ *              its <code>size</code> field must be set correctly,
+ *              typically to <code>sizeof(UDataInfo)</code>.
+ *
+ * <code>*pInfo</code> will be filled with the UDataInfo structure
+ * in the data memory object. If this structure is smaller than
+ * <code>pInfo->size</code>, then the <code>size</code> will be
+ * adjusted and only part of the structure will be filled.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
+
+/**
+ * This function bypasses the normal ICU data loading process and
+ * allows you to force ICU's system data to come out of a user-specified
+ * area in memory.
+ *
+ * The format of this data is that of the icu common data file, as is
+ * generated by the pkgdata tool with mode=common or mode=dll.
+ * You can read in a whole common mode file and pass the address to the start of the
+ * data, or (with the appropriate link options) pass in the pointer to
+ * the data that has been loaded from a dll by the operating system,
+ * as shown in this code:
+ *
+ *       extern const  char U_IMPORT U_ICUDATA_ENTRY_POINT []; 
+ *        // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool
+ *       UErrorCode  status = U_ZERO_ERROR;
+ *
+ *       udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status);
+ *
+ * Warning: ICU must NOT have even attempted to access its data yet
+ * when this call is made, or U_USING_DEFAULT_WARNING code will
+ * be returned. Be careful of UnicodeStrings in static initialization which
+ * may attempt to load a converter (use the UNICODE_STRING(x) macro instead).
+ *
+ * Also note that it is important that the declaration be as above. The entry point
+ * must not be declared as an extern void*.
+ *
+ * This function has no effect on application (non ICU) data.  See udata_setAppData()
+ * for similar functionality for application data.
+ *
+ * @param data pointer to ICU common data
+ * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
+ * @stable ICU 2.0
+ */
+
+U_STABLE void U_EXPORT2
+udata_setCommonData(const void *data, UErrorCode *err);
+
+
+/**
+ * This function bypasses the normal ICU data loading process for application-specific
+ * data and allows you to force the it to come out of a user-specified
+ * pointer.
+ *
+ * The format of this data is that of the icu common data file, like 'icudt26l.dat'
+ * or the corresponding shared library (DLL) file.
+ * The application must read in or otherwise construct an image of the data and then
+ * pass the address of it to this function.
+ *
+ *
+ * Warning:  setAppData will set a U_USING_DEFAULT_WARNING code if
+ *           data with the specifed path that has already been opened, or
+ *           if setAppData with the same path has already been called.
+ *           Any such calls to setAppData will have no effect.
+ *
+ *
+ * @param packageName the package name by which the application will refer
+ *             to (open) this data
+ * @param data pointer to the data
+ * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
+ * @see udata_setCommonData
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+udata_setAppData(const char *packageName, const void *data, UErrorCode *err);
+
+/**
+ * Possible settings for udata_setFileAccess()
+ * @see udata_setFileAccess
+ * @stable ICU 3.4
+ */
+typedef enum UDataFileAccess {
+    /** ICU looks for data in single files first, then in packages. (default) */
+    UDATA_FILES_FIRST,
+    /** ICU only loads data from packages, not from single files. */
+    UDATA_ONLY_PACKAGES,
+    /** ICU loads data from packages first, and only from single files
+        if the data cannot be found in a package. */
+    UDATA_PACKAGES_FIRST,
+    /** ICU does not access the file system for data loading. */
+    UDATA_NO_FILES,
+    /** An alias for the default access mode. */
+    UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST,
+    UDATA_FILE_ACCESS_COUNT
+} UDataFileAccess;
+
+/**
+ * This function may be called to control how ICU loads data. It must be called
+ * before any ICU data is loaded, including application data loaded with ures/ResourceBundle or
+ * udata APIs. It should be called before u_init.  This function is not multithread safe.  
+ * The results of calling it while other threads are loading data are undefined.
+ * @param access The type of file access to be used
+ * @param status Error code.
+ * @see UDataFileAccess
+ * @stable ICU 3.4 
+ */
+U_STABLE void U_EXPORT2
+udata_setFileAccess(UDataFileAccess access, UErrorCode *status);
+
+U_CDECL_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/udatpg.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/udatpg.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/udatpg.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,471 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 2007-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  udatpg.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2007jul30
-*   created by: Markus W. Scherer
-*/
-
-#ifndef __UDATPG_H__
-#define __UDATPG_H__
-
-#include "unicode/utypes.h"
-#include "unicode/uenum.h"
-
-/**
- * \file
- * \brief C API: Wrapper for DateTimePatternGenerator (unicode/dtptngen.h).
- *
- * UDateTimePatternGenerator provides flexible generation of date format patterns, 
- * like "yy-MM-dd". The user can build up the generator by adding successive 
- * patterns. Once that is done, a query can be made using a "skeleton", which is 
- * a pattern which just includes the desired fields and lengths. The generator 
- * will return the "best fit" pattern corresponding to that skeleton.
- * <p>The main method people will use is udatpg_getBestPattern, since normally
- * UDateTimePatternGenerator is pre-built with data from a particular locale. 
- * However, generators can be built directly from other data as well.
- * <p><i>Issue: may be useful to also have a function that returns the list of 
- * fields in a pattern, in order, since we have that internally.
- * That would be useful for getting the UI order of field elements.</i>
- */
-
-/**
- * Opaque type for a date/time pattern generator object.
- * @stable ICU 4.0
- */
-typedef void *UDateTimePatternGenerator;
-
-/**
- * Field number constants for udatpg_getAppendItemFormats() and similar functions.
- * These constants are separate from UDateFormatField despite semantic overlap
- * because some fields are merged for the date/time pattern generator.
- * @stable ICU 4.0
- */
-typedef enum UDateTimePatternField {
-    /** @stable ICU 4.0 */
-    UDATPG_ERA_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_YEAR_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_QUARTER_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_MONTH_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_WEEK_OF_YEAR_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_WEEK_OF_MONTH_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_WEEKDAY_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_DAY_OF_YEAR_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_DAY_OF_WEEK_IN_MONTH_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_DAY_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_DAYPERIOD_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_HOUR_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_MINUTE_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_SECOND_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_FRACTIONAL_SECOND_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_ZONE_FIELD,
-    /** @stable ICU 4.0 */
-    UDATPG_FIELD_COUNT
-} UDateTimePatternField;
-
-/**
- * Status return values from udatpg_addPattern().
- * @stable ICU 4.0
- */
-typedef enum UDateTimePatternConflict {
-    /** @stable ICU 4.0 */
-    UDATPG_NO_CONFLICT,
-    /** @stable ICU 4.0 */
-    UDATPG_BASE_CONFLICT,
-    /** @stable ICU 4.0 */
-    UDATPG_CONFLICT,
-    /** @stable ICU 4.0 */
-    UDATPG_CONFLICT_COUNT
-} UDateTimePatternConflict;
-
-/**
-  * Open a generator according to a given locale.
-  * @param locale
-  * @param pErrorCode a pointer to the UErrorCode which must not indicate a
-  *                   failure before the function call.
-  * @return a pointer to UDateTimePatternGenerator.
-  * @stable ICU 4.0
-  */
-U_DRAFT UDateTimePatternGenerator * U_EXPORT2
-udatpg_open(const char *locale, UErrorCode *pErrorCode);
-
-/**
-  * Open an empty generator, to be constructed with udatpg_addPattern(...) etc.
-  * @param pErrorCode a pointer to the UErrorCode which must not indicate a
-  *                   failure before the function call.
-  * @return a pointer to UDateTimePatternGenerator.
-  * @stable ICU 4.0
-  */
-U_DRAFT UDateTimePatternGenerator * U_EXPORT2
-udatpg_openEmpty(UErrorCode *pErrorCode);
-
-/**
-  * Close a generator.
-  * @param dtpg a pointer to UDateTimePatternGenerator.
-  * @stable ICU 4.0
-  */
-U_DRAFT void U_EXPORT2
-udatpg_close(UDateTimePatternGenerator *dtpg);
-
-/**
-  * Create a copy pf a generator.
-  * @param dtpg a pointer to UDateTimePatternGenerator to be copied.
-  * @param pErrorCode a pointer to the UErrorCode which must not indicate a
-  *                   failure before the function call.
-  * @return a pointer to a new UDateTimePatternGenerator.
-  * @stable ICU 4.0
- */
-U_DRAFT UDateTimePatternGenerator * U_EXPORT2
-udatpg_clone(const UDateTimePatternGenerator *dtpg, UErrorCode *pErrorCode);
-
-/**
- * Get the best pattern matching the input skeleton. It is guaranteed to
- * have all of the fields in the skeleton.
- * 
- * Note that this function uses a non-const UDateTimePatternGenerator:
- * It uses a stateful pattern parser which is set up for each generator object,
- * rather than creating one for each function call.
- * Consecutive calls to this function do not affect each other,
- * but this function cannot be used concurrently on a single generator object.
- * 
- * @param dtpg a pointer to UDateTimePatternGenerator.
- * @param skeleton
- *            The skeleton is a pattern containing only the variable fields.
- *            For example, "MMMdd" and "mmhh" are skeletons.
- * @param length the length of skeleton
- * @param bestPattern
- *            The best pattern found from the given skeleton.
- * @param capacity the capacity of bestPattern.
- * @param pErrorCode a pointer to the UErrorCode which must not indicate a
- *                   failure before the function call.
- * @return the length of bestPattern.
- * @stable ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-udatpg_getBestPattern(UDateTimePatternGenerator *dtpg,
-                      const UChar *skeleton, int32_t length,
-                      UChar *bestPattern, int32_t capacity,
-                      UErrorCode *pErrorCode);
-
-/**
-  * Get a unique skeleton from a given pattern. For example,
-  * both "MMM-dd" and "dd/MMM" produce the skeleton "MMMdd".
-  * 
-  * Note that this function uses a non-const UDateTimePatternGenerator:
-  * It uses a stateful pattern parser which is set up for each generator object,
-  * rather than creating one for each function call.
-  * Consecutive calls to this function do not affect each other,
-  * but this function cannot be used concurrently on a single generator object.
-  *
-  * @param dtpg     a pointer to UDateTimePatternGenerator.
-  * @param pattern  input pattern, such as "dd/MMM".
-  * @param length   the length of pattern.
-  * @param skeleton such as "MMMdd"
-  * @param capacity the capacity of skeleton.
-  * @param pErrorCode a pointer to the UErrorCode which must not indicate a
-  *                  failure before the function call.
-  * @return the length of skeleton.
-  * @stable ICU 4.0
-  */
-U_DRAFT int32_t U_EXPORT2
-udatpg_getSkeleton(UDateTimePatternGenerator *dtpg,
-                   const UChar *pattern, int32_t length,
-                   UChar *skeleton, int32_t capacity,
-                   UErrorCode *pErrorCode);
-
-/**
- * Get a unique base skeleton from a given pattern. This is the same
- * as the skeleton, except that differences in length are minimized so
- * as to only preserve the difference between string and numeric form. So
- * for example, both "MMM-dd" and "d/MMM" produce the skeleton "MMMd"
- * (notice the single d).
- *
- * Note that this function uses a non-const UDateTimePatternGenerator:
- * It uses a stateful pattern parser which is set up for each generator object,
- * rather than creating one for each function call.
- * Consecutive calls to this function do not affect each other,
- * but this function cannot be used concurrently on a single generator object.
- *
- * @param dtpg     a pointer to UDateTimePatternGenerator.
- * @param pattern  input pattern, such as "dd/MMM".
- * @param length   the length of pattern.
- * @param baseSkeleton such as "Md"
- * @param capacity the capacity of base skeleton.
- * @param pErrorCode a pointer to the UErrorCode which must not indicate a
- *                  failure before the function call.
- * @return the length of baseSkeleton.
- * @stable ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-udatpg_getBaseSkeleton(UDateTimePatternGenerator *dtpg,
-                       const UChar *pattern, int32_t length,
-                       UChar *baseSkeleton, int32_t capacity,
-                       UErrorCode *pErrorCode);
-
-/**
- * Adds a pattern to the generator. If the pattern has the same skeleton as
- * an existing pattern, and the override parameter is set, then the previous
- * value is overriden. Otherwise, the previous value is retained. In either
- * case, the conflicting status is set and previous vale is stored in 
- * conflicting pattern.
- * <p>
- * Note that single-field patterns (like "MMM") are automatically added, and
- * don't need to be added explicitly!
- *
- * @param dtpg     a pointer to UDateTimePatternGenerator.
- * @param pattern  input pattern, such as "dd/MMM"
- * @param patternLength the length of pattern.
- * @param override  When existing values are to be overridden use true, 
- *                  otherwise use false.
- * @param conflictingPattern  Previous pattern with the same skeleton.
- * @param capacity the capacity of conflictingPattern.
- * @param pLength a pointer to the length of conflictingPattern.
- * @param pErrorCode a pointer to the UErrorCode which must not indicate a
- *                  failure before the function call.
- * @return conflicting status. The value could be UDATPG_NO_CONFLICT, 
- *                  UDATPG_BASE_CONFLICT or UDATPG_CONFLICT.
- * @stable ICU 4.0
- */
-U_DRAFT UDateTimePatternConflict U_EXPORT2
-udatpg_addPattern(UDateTimePatternGenerator *dtpg,
-                  const UChar *pattern, int32_t patternLength,
-                  UBool override,
-                  UChar *conflictingPattern, int32_t capacity, int32_t *pLength,
-                  UErrorCode *pErrorCode);
-
-/**
-  * An AppendItem format is a pattern used to append a field if there is no
-  * good match. For example, suppose that the input skeleton is "GyyyyMMMd",
-  * and there is no matching pattern internally, but there is a pattern
-  * matching "yyyyMMMd", say "d-MM-yyyy". Then that pattern is used, plus the
-  * G. The way these two are conjoined is by using the AppendItemFormat for G
-  * (era). So if that value is, say "{0}, {1}" then the final resulting
-  * pattern is "d-MM-yyyy, G".
-  * <p>
-  * There are actually three available variables: {0} is the pattern so far,
-  * {1} is the element we are adding, and {2} is the name of the element.
-  * <p>
-  * This reflects the way that the CLDR data is organized.
-  *
-  * @param dtpg   a pointer to UDateTimePatternGenerator.
-  * @param field  UDateTimePatternField, such as UDATPG_ERA_FIELD
-  * @param value  pattern, such as "{0}, {1}"
-  * @param length the length of value.
-  * @stable ICU 4.0
-  */
-U_DRAFT void U_EXPORT2
-udatpg_setAppendItemFormat(UDateTimePatternGenerator *dtpg,
-                           UDateTimePatternField field,
-                           const UChar *value, int32_t length);
-
-/**
- * Getter corresponding to setAppendItemFormat. Values below 0 or at or
- * above UDATPG_FIELD_COUNT are illegal arguments.
- *
- * @param dtpg   A pointer to UDateTimePatternGenerator.
- * @param field  UDateTimePatternField, such as UDATPG_ERA_FIELD
- * @param pLength A pointer that will receive the length of appendItemFormat.
- * @return appendItemFormat for field.
- * @stable ICU 4.0
- */
-U_DRAFT const UChar * U_EXPORT2
-udatpg_getAppendItemFormat(const UDateTimePatternGenerator *dtpg,
-                           UDateTimePatternField field,
-                           int32_t *pLength);
-
-/**
-   * Set the name of field, eg "era" in English for ERA. These are only
-   * used if the corresponding AppendItemFormat is used, and if it contains a
-   * {2} variable.
-   * <p>
-   * This reflects the way that the CLDR data is organized.
-   *
-   * @param dtpg   a pointer to UDateTimePatternGenerator.
-   * @param field  UDateTimePatternField
-   * @param value  name for the field.
-   * @param length the length of value.
-   * @stable ICU 4.0
-   */
-U_DRAFT void U_EXPORT2
-udatpg_setAppendItemName(UDateTimePatternGenerator *dtpg,
-                         UDateTimePatternField field,
-                         const UChar *value, int32_t length);
-
-/**
- * Getter corresponding to setAppendItemNames. Values below 0 or at or above
- * UDATPG_FIELD_COUNT are illegal arguments.
- *
- * @param dtpg   a pointer to UDateTimePatternGenerator.
- * @param field  UDateTimePatternField, such as UDATPG_ERA_FIELD
- * @param pLength A pointer that will receive the length of the name for field.
- * @return name for field
- * @stable ICU 4.0
- */
-U_DRAFT const UChar * U_EXPORT2
-udatpg_getAppendItemName(const UDateTimePatternGenerator *dtpg,
-                         UDateTimePatternField field,
-                         int32_t *pLength);
-
-/**
- * The date time format is a message format pattern used to compose date and
- * time patterns. The default value is "{0} {1}", where {0} will be replaced
- * by the date pattern and {1} will be replaced by the time pattern.
- * <p>
- * This is used when the input skeleton contains both date and time fields,
- * but there is not a close match among the added patterns. For example,
- * suppose that this object was created by adding "dd-MMM" and "hh:mm", and
- * its datetimeFormat is the default "{0} {1}". Then if the input skeleton
- * is "MMMdhmm", there is not an exact match, so the input skeleton is
- * broken up into two components "MMMd" and "hmm". There are close matches
- * for those two skeletons, so the result is put together with this pattern,
- * resulting in "d-MMM h:mm".
- *
- * @param dtpg a pointer to UDateTimePatternGenerator.
- * @param dtFormat
- *            message format pattern, here {0} will be replaced by the date
- *            pattern and {1} will be replaced by the time pattern.
- * @param length the length of dtFormat.
- * @stable ICU 4.0
- */
-U_DRAFT void U_EXPORT2
-udatpg_setDateTimeFormat(const UDateTimePatternGenerator *dtpg,
-                         const UChar *dtFormat, int32_t length);
-
-/**
- * Getter corresponding to setDateTimeFormat.
- * @param dtpg   a pointer to UDateTimePatternGenerator.
- * @param pLength A pointer that will receive the length of the format
- * @return dateTimeFormat.
- * @stable ICU 4.0
- */
-U_DRAFT const UChar * U_EXPORT2
-udatpg_getDateTimeFormat(const UDateTimePatternGenerator *dtpg,
-                         int32_t *pLength);
-
-/**
- * The decimal value is used in formatting fractions of seconds. If the
- * skeleton contains fractional seconds, then this is used with the
- * fractional seconds. For example, suppose that the input pattern is
- * "hhmmssSSSS", and the best matching pattern internally is "H:mm:ss", and
- * the decimal string is ",". Then the resulting pattern is modified to be
- * "H:mm:ss,SSSS"
- *
- * @param dtpg a pointer to UDateTimePatternGenerator.
- * @param decimal
- * @param length the length of decimal.
- * @stable ICU 4.0
- */
-U_DRAFT void U_EXPORT2
-udatpg_setDecimal(UDateTimePatternGenerator *dtpg,
-                  const UChar *decimal, int32_t length);
-
-/**
- * Getter corresponding to setDecimal.
- * 
- * @param dtpg a pointer to UDateTimePatternGenerator.
- * @param pLength A pointer that will receive the length of the decimal string.
- * @return corresponding to the decimal point.
- * @stable ICU 4.0
- */
-U_DRAFT const UChar * U_EXPORT2
-udatpg_getDecimal(const UDateTimePatternGenerator *dtpg,
-                  int32_t *pLength);
-
-/**
- * Adjusts the field types (width and subtype) of a pattern to match what is
- * in a skeleton. That is, if you supply a pattern like "d-M H:m", and a
- * skeleton of "MMMMddhhmm", then the input pattern is adjusted to be
- * "dd-MMMM hh:mm". This is used internally to get the best match for the
- * input skeleton, but can also be used externally.
- *
- * Note that this function uses a non-const UDateTimePatternGenerator:
- * It uses a stateful pattern parser which is set up for each generator object,
- * rather than creating one for each function call.
- * Consecutive calls to this function do not affect each other,
- * but this function cannot be used concurrently on a single generator object.
- *
- * @param dtpg a pointer to UDateTimePatternGenerator.
- * @param pattern Input pattern
- * @param patternLength the length of input pattern.
- * @param skeleton
- * @param skeletonLength the length of input skeleton.
- * @param dest  pattern adjusted to match the skeleton fields widths and subtypes.
- * @param destCapacity the capacity of dest.
- * @param pErrorCode a pointer to the UErrorCode which must not indicate a
- *                  failure before the function call.
- * @return the length of dest.
- * @stable ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-udatpg_replaceFieldTypes(UDateTimePatternGenerator *dtpg,
-                         const UChar *pattern, int32_t patternLength,
-                         const UChar *skeleton, int32_t skeletonLength,
-                         UChar *dest, int32_t destCapacity,
-                         UErrorCode *pErrorCode);
-
-/**
- * Return a UEnumeration list of all the skeletons in canonical form.
- * Call udatpg_getPatternForSkeleton() to get the corresponding pattern.
- * 
- * @param dtpg a pointer to UDateTimePatternGenerator.
- * @param pErrorCode a pointer to the UErrorCode which must not indicate a
- *                  failure before the function call
- * @return a UEnumeration list of all the skeletons
- *         The caller must close the object.
- * @stable ICU 4.0
- */
-U_DRAFT UEnumeration * U_EXPORT2
-udatpg_openSkeletons(const UDateTimePatternGenerator *dtpg, UErrorCode *pErrorCode);
-
-/**
- * Return a UEnumeration list of all the base skeletons in canonical form.
- *
- * @param dtpg a pointer to UDateTimePatternGenerator.
- * @param pErrorCode a pointer to the UErrorCode which must not indicate a
- *             failure before the function call.
- * @return a UEnumeration list of all the base skeletons
- *             The caller must close the object.
- * @stable ICU 4.0
- */
-U_DRAFT UEnumeration * U_EXPORT2
-udatpg_openBaseSkeletons(const UDateTimePatternGenerator *dtpg, UErrorCode *pErrorCode);
-
-/**
- * Get the pattern corresponding to a given skeleton.
- * 
- * @param dtpg a pointer to UDateTimePatternGenerator.
- * @param skeleton 
- * @param skeletonLength pointer to the length of skeleton.
- * @param pLength pointer to the length of return pattern.
- * @return pattern corresponding to a given skeleton.
- * @stable ICU 4.0
- */
-U_DRAFT const UChar * U_EXPORT2
-udatpg_getPatternForSkeleton(const UDateTimePatternGenerator *dtpg,
-                             const UChar *skeleton, int32_t skeletonLength,
-                             int32_t *pLength);
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/udatpg.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/udatpg.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/udatpg.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/udatpg.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,471 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2007-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  udatpg.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2007jul30
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UDATPG_H__
+#define __UDATPG_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file
+ * \brief C API: Wrapper for DateTimePatternGenerator (unicode/dtptngen.h).
+ *
+ * UDateTimePatternGenerator provides flexible generation of date format patterns, 
+ * like "yy-MM-dd". The user can build up the generator by adding successive 
+ * patterns. Once that is done, a query can be made using a "skeleton", which is 
+ * a pattern which just includes the desired fields and lengths. The generator 
+ * will return the "best fit" pattern corresponding to that skeleton.
+ * <p>The main method people will use is udatpg_getBestPattern, since normally
+ * UDateTimePatternGenerator is pre-built with data from a particular locale. 
+ * However, generators can be built directly from other data as well.
+ * <p><i>Issue: may be useful to also have a function that returns the list of 
+ * fields in a pattern, in order, since we have that internally.
+ * That would be useful for getting the UI order of field elements.</i>
+ */
+
+/**
+ * Opaque type for a date/time pattern generator object.
+ * @stable ICU 4.0
+ */
+typedef void *UDateTimePatternGenerator;
+
+/**
+ * Field number constants for udatpg_getAppendItemFormats() and similar functions.
+ * These constants are separate from UDateFormatField despite semantic overlap
+ * because some fields are merged for the date/time pattern generator.
+ * @stable ICU 4.0
+ */
+typedef enum UDateTimePatternField {
+    /** @stable ICU 4.0 */
+    UDATPG_ERA_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_YEAR_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_QUARTER_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_MONTH_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_WEEK_OF_YEAR_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_WEEK_OF_MONTH_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_WEEKDAY_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_DAY_OF_YEAR_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_DAY_OF_WEEK_IN_MONTH_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_DAY_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_DAYPERIOD_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_HOUR_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_MINUTE_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_SECOND_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_FRACTIONAL_SECOND_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_ZONE_FIELD,
+    /** @stable ICU 4.0 */
+    UDATPG_FIELD_COUNT
+} UDateTimePatternField;
+
+/**
+ * Status return values from udatpg_addPattern().
+ * @stable ICU 4.0
+ */
+typedef enum UDateTimePatternConflict {
+    /** @stable ICU 4.0 */
+    UDATPG_NO_CONFLICT,
+    /** @stable ICU 4.0 */
+    UDATPG_BASE_CONFLICT,
+    /** @stable ICU 4.0 */
+    UDATPG_CONFLICT,
+    /** @stable ICU 4.0 */
+    UDATPG_CONFLICT_COUNT
+} UDateTimePatternConflict;
+
+/**
+  * Open a generator according to a given locale.
+  * @param locale
+  * @param pErrorCode a pointer to the UErrorCode which must not indicate a
+  *                   failure before the function call.
+  * @return a pointer to UDateTimePatternGenerator.
+  * @stable ICU 4.0
+  */
+U_DRAFT UDateTimePatternGenerator * U_EXPORT2
+udatpg_open(const char *locale, UErrorCode *pErrorCode);
+
+/**
+  * Open an empty generator, to be constructed with udatpg_addPattern(...) etc.
+  * @param pErrorCode a pointer to the UErrorCode which must not indicate a
+  *                   failure before the function call.
+  * @return a pointer to UDateTimePatternGenerator.
+  * @stable ICU 4.0
+  */
+U_DRAFT UDateTimePatternGenerator * U_EXPORT2
+udatpg_openEmpty(UErrorCode *pErrorCode);
+
+/**
+  * Close a generator.
+  * @param dtpg a pointer to UDateTimePatternGenerator.
+  * @stable ICU 4.0
+  */
+U_DRAFT void U_EXPORT2
+udatpg_close(UDateTimePatternGenerator *dtpg);
+
+/**
+  * Create a copy pf a generator.
+  * @param dtpg a pointer to UDateTimePatternGenerator to be copied.
+  * @param pErrorCode a pointer to the UErrorCode which must not indicate a
+  *                   failure before the function call.
+  * @return a pointer to a new UDateTimePatternGenerator.
+  * @stable ICU 4.0
+ */
+U_DRAFT UDateTimePatternGenerator * U_EXPORT2
+udatpg_clone(const UDateTimePatternGenerator *dtpg, UErrorCode *pErrorCode);
+
+/**
+ * Get the best pattern matching the input skeleton. It is guaranteed to
+ * have all of the fields in the skeleton.
+ * 
+ * Note that this function uses a non-const UDateTimePatternGenerator:
+ * It uses a stateful pattern parser which is set up for each generator object,
+ * rather than creating one for each function call.
+ * Consecutive calls to this function do not affect each other,
+ * but this function cannot be used concurrently on a single generator object.
+ * 
+ * @param dtpg a pointer to UDateTimePatternGenerator.
+ * @param skeleton
+ *            The skeleton is a pattern containing only the variable fields.
+ *            For example, "MMMdd" and "mmhh" are skeletons.
+ * @param length the length of skeleton
+ * @param bestPattern
+ *            The best pattern found from the given skeleton.
+ * @param capacity the capacity of bestPattern.
+ * @param pErrorCode a pointer to the UErrorCode which must not indicate a
+ *                   failure before the function call.
+ * @return the length of bestPattern.
+ * @stable ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+udatpg_getBestPattern(UDateTimePatternGenerator *dtpg,
+                      const UChar *skeleton, int32_t length,
+                      UChar *bestPattern, int32_t capacity,
+                      UErrorCode *pErrorCode);
+
+/**
+  * Get a unique skeleton from a given pattern. For example,
+  * both "MMM-dd" and "dd/MMM" produce the skeleton "MMMdd".
+  * 
+  * Note that this function uses a non-const UDateTimePatternGenerator:
+  * It uses a stateful pattern parser which is set up for each generator object,
+  * rather than creating one for each function call.
+  * Consecutive calls to this function do not affect each other,
+  * but this function cannot be used concurrently on a single generator object.
+  *
+  * @param dtpg     a pointer to UDateTimePatternGenerator.
+  * @param pattern  input pattern, such as "dd/MMM".
+  * @param length   the length of pattern.
+  * @param skeleton such as "MMMdd"
+  * @param capacity the capacity of skeleton.
+  * @param pErrorCode a pointer to the UErrorCode which must not indicate a
+  *                  failure before the function call.
+  * @return the length of skeleton.
+  * @stable ICU 4.0
+  */
+U_DRAFT int32_t U_EXPORT2
+udatpg_getSkeleton(UDateTimePatternGenerator *dtpg,
+                   const UChar *pattern, int32_t length,
+                   UChar *skeleton, int32_t capacity,
+                   UErrorCode *pErrorCode);
+
+/**
+ * Get a unique base skeleton from a given pattern. This is the same
+ * as the skeleton, except that differences in length are minimized so
+ * as to only preserve the difference between string and numeric form. So
+ * for example, both "MMM-dd" and "d/MMM" produce the skeleton "MMMd"
+ * (notice the single d).
+ *
+ * Note that this function uses a non-const UDateTimePatternGenerator:
+ * It uses a stateful pattern parser which is set up for each generator object,
+ * rather than creating one for each function call.
+ * Consecutive calls to this function do not affect each other,
+ * but this function cannot be used concurrently on a single generator object.
+ *
+ * @param dtpg     a pointer to UDateTimePatternGenerator.
+ * @param pattern  input pattern, such as "dd/MMM".
+ * @param length   the length of pattern.
+ * @param baseSkeleton such as "Md"
+ * @param capacity the capacity of base skeleton.
+ * @param pErrorCode a pointer to the UErrorCode which must not indicate a
+ *                  failure before the function call.
+ * @return the length of baseSkeleton.
+ * @stable ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+udatpg_getBaseSkeleton(UDateTimePatternGenerator *dtpg,
+                       const UChar *pattern, int32_t length,
+                       UChar *baseSkeleton, int32_t capacity,
+                       UErrorCode *pErrorCode);
+
+/**
+ * Adds a pattern to the generator. If the pattern has the same skeleton as
+ * an existing pattern, and the override parameter is set, then the previous
+ * value is overriden. Otherwise, the previous value is retained. In either
+ * case, the conflicting status is set and previous vale is stored in 
+ * conflicting pattern.
+ * <p>
+ * Note that single-field patterns (like "MMM") are automatically added, and
+ * don't need to be added explicitly!
+ *
+ * @param dtpg     a pointer to UDateTimePatternGenerator.
+ * @param pattern  input pattern, such as "dd/MMM"
+ * @param patternLength the length of pattern.
+ * @param override  When existing values are to be overridden use true, 
+ *                  otherwise use false.
+ * @param conflictingPattern  Previous pattern with the same skeleton.
+ * @param capacity the capacity of conflictingPattern.
+ * @param pLength a pointer to the length of conflictingPattern.
+ * @param pErrorCode a pointer to the UErrorCode which must not indicate a
+ *                  failure before the function call.
+ * @return conflicting status. The value could be UDATPG_NO_CONFLICT, 
+ *                  UDATPG_BASE_CONFLICT or UDATPG_CONFLICT.
+ * @stable ICU 4.0
+ */
+U_DRAFT UDateTimePatternConflict U_EXPORT2
+udatpg_addPattern(UDateTimePatternGenerator *dtpg,
+                  const UChar *pattern, int32_t patternLength,
+                  UBool override,
+                  UChar *conflictingPattern, int32_t capacity, int32_t *pLength,
+                  UErrorCode *pErrorCode);
+
+/**
+  * An AppendItem format is a pattern used to append a field if there is no
+  * good match. For example, suppose that the input skeleton is "GyyyyMMMd",
+  * and there is no matching pattern internally, but there is a pattern
+  * matching "yyyyMMMd", say "d-MM-yyyy". Then that pattern is used, plus the
+  * G. The way these two are conjoined is by using the AppendItemFormat for G
+  * (era). So if that value is, say "{0}, {1}" then the final resulting
+  * pattern is "d-MM-yyyy, G".
+  * <p>
+  * There are actually three available variables: {0} is the pattern so far,
+  * {1} is the element we are adding, and {2} is the name of the element.
+  * <p>
+  * This reflects the way that the CLDR data is organized.
+  *
+  * @param dtpg   a pointer to UDateTimePatternGenerator.
+  * @param field  UDateTimePatternField, such as UDATPG_ERA_FIELD
+  * @param value  pattern, such as "{0}, {1}"
+  * @param length the length of value.
+  * @stable ICU 4.0
+  */
+U_DRAFT void U_EXPORT2
+udatpg_setAppendItemFormat(UDateTimePatternGenerator *dtpg,
+                           UDateTimePatternField field,
+                           const UChar *value, int32_t length);
+
+/**
+ * Getter corresponding to setAppendItemFormat. Values below 0 or at or
+ * above UDATPG_FIELD_COUNT are illegal arguments.
+ *
+ * @param dtpg   A pointer to UDateTimePatternGenerator.
+ * @param field  UDateTimePatternField, such as UDATPG_ERA_FIELD
+ * @param pLength A pointer that will receive the length of appendItemFormat.
+ * @return appendItemFormat for field.
+ * @stable ICU 4.0
+ */
+U_DRAFT const UChar * U_EXPORT2
+udatpg_getAppendItemFormat(const UDateTimePatternGenerator *dtpg,
+                           UDateTimePatternField field,
+                           int32_t *pLength);
+
+/**
+   * Set the name of field, eg "era" in English for ERA. These are only
+   * used if the corresponding AppendItemFormat is used, and if it contains a
+   * {2} variable.
+   * <p>
+   * This reflects the way that the CLDR data is organized.
+   *
+   * @param dtpg   a pointer to UDateTimePatternGenerator.
+   * @param field  UDateTimePatternField
+   * @param value  name for the field.
+   * @param length the length of value.
+   * @stable ICU 4.0
+   */
+U_DRAFT void U_EXPORT2
+udatpg_setAppendItemName(UDateTimePatternGenerator *dtpg,
+                         UDateTimePatternField field,
+                         const UChar *value, int32_t length);
+
+/**
+ * Getter corresponding to setAppendItemNames. Values below 0 or at or above
+ * UDATPG_FIELD_COUNT are illegal arguments.
+ *
+ * @param dtpg   a pointer to UDateTimePatternGenerator.
+ * @param field  UDateTimePatternField, such as UDATPG_ERA_FIELD
+ * @param pLength A pointer that will receive the length of the name for field.
+ * @return name for field
+ * @stable ICU 4.0
+ */
+U_DRAFT const UChar * U_EXPORT2
+udatpg_getAppendItemName(const UDateTimePatternGenerator *dtpg,
+                         UDateTimePatternField field,
+                         int32_t *pLength);
+
+/**
+ * The date time format is a message format pattern used to compose date and
+ * time patterns. The default value is "{0} {1}", where {0} will be replaced
+ * by the date pattern and {1} will be replaced by the time pattern.
+ * <p>
+ * This is used when the input skeleton contains both date and time fields,
+ * but there is not a close match among the added patterns. For example,
+ * suppose that this object was created by adding "dd-MMM" and "hh:mm", and
+ * its datetimeFormat is the default "{0} {1}". Then if the input skeleton
+ * is "MMMdhmm", there is not an exact match, so the input skeleton is
+ * broken up into two components "MMMd" and "hmm". There are close matches
+ * for those two skeletons, so the result is put together with this pattern,
+ * resulting in "d-MMM h:mm".
+ *
+ * @param dtpg a pointer to UDateTimePatternGenerator.
+ * @param dtFormat
+ *            message format pattern, here {0} will be replaced by the date
+ *            pattern and {1} will be replaced by the time pattern.
+ * @param length the length of dtFormat.
+ * @stable ICU 4.0
+ */
+U_DRAFT void U_EXPORT2
+udatpg_setDateTimeFormat(const UDateTimePatternGenerator *dtpg,
+                         const UChar *dtFormat, int32_t length);
+
+/**
+ * Getter corresponding to setDateTimeFormat.
+ * @param dtpg   a pointer to UDateTimePatternGenerator.
+ * @param pLength A pointer that will receive the length of the format
+ * @return dateTimeFormat.
+ * @stable ICU 4.0
+ */
+U_DRAFT const UChar * U_EXPORT2
+udatpg_getDateTimeFormat(const UDateTimePatternGenerator *dtpg,
+                         int32_t *pLength);
+
+/**
+ * The decimal value is used in formatting fractions of seconds. If the
+ * skeleton contains fractional seconds, then this is used with the
+ * fractional seconds. For example, suppose that the input pattern is
+ * "hhmmssSSSS", and the best matching pattern internally is "H:mm:ss", and
+ * the decimal string is ",". Then the resulting pattern is modified to be
+ * "H:mm:ss,SSSS"
+ *
+ * @param dtpg a pointer to UDateTimePatternGenerator.
+ * @param decimal
+ * @param length the length of decimal.
+ * @stable ICU 4.0
+ */
+U_DRAFT void U_EXPORT2
+udatpg_setDecimal(UDateTimePatternGenerator *dtpg,
+                  const UChar *decimal, int32_t length);
+
+/**
+ * Getter corresponding to setDecimal.
+ * 
+ * @param dtpg a pointer to UDateTimePatternGenerator.
+ * @param pLength A pointer that will receive the length of the decimal string.
+ * @return corresponding to the decimal point.
+ * @stable ICU 4.0
+ */
+U_DRAFT const UChar * U_EXPORT2
+udatpg_getDecimal(const UDateTimePatternGenerator *dtpg,
+                  int32_t *pLength);
+
+/**
+ * Adjusts the field types (width and subtype) of a pattern to match what is
+ * in a skeleton. That is, if you supply a pattern like "d-M H:m", and a
+ * skeleton of "MMMMddhhmm", then the input pattern is adjusted to be
+ * "dd-MMMM hh:mm". This is used internally to get the best match for the
+ * input skeleton, but can also be used externally.
+ *
+ * Note that this function uses a non-const UDateTimePatternGenerator:
+ * It uses a stateful pattern parser which is set up for each generator object,
+ * rather than creating one for each function call.
+ * Consecutive calls to this function do not affect each other,
+ * but this function cannot be used concurrently on a single generator object.
+ *
+ * @param dtpg a pointer to UDateTimePatternGenerator.
+ * @param pattern Input pattern
+ * @param patternLength the length of input pattern.
+ * @param skeleton
+ * @param skeletonLength the length of input skeleton.
+ * @param dest  pattern adjusted to match the skeleton fields widths and subtypes.
+ * @param destCapacity the capacity of dest.
+ * @param pErrorCode a pointer to the UErrorCode which must not indicate a
+ *                  failure before the function call.
+ * @return the length of dest.
+ * @stable ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+udatpg_replaceFieldTypes(UDateTimePatternGenerator *dtpg,
+                         const UChar *pattern, int32_t patternLength,
+                         const UChar *skeleton, int32_t skeletonLength,
+                         UChar *dest, int32_t destCapacity,
+                         UErrorCode *pErrorCode);
+
+/**
+ * Return a UEnumeration list of all the skeletons in canonical form.
+ * Call udatpg_getPatternForSkeleton() to get the corresponding pattern.
+ * 
+ * @param dtpg a pointer to UDateTimePatternGenerator.
+ * @param pErrorCode a pointer to the UErrorCode which must not indicate a
+ *                  failure before the function call
+ * @return a UEnumeration list of all the skeletons
+ *         The caller must close the object.
+ * @stable ICU 4.0
+ */
+U_DRAFT UEnumeration * U_EXPORT2
+udatpg_openSkeletons(const UDateTimePatternGenerator *dtpg, UErrorCode *pErrorCode);
+
+/**
+ * Return a UEnumeration list of all the base skeletons in canonical form.
+ *
+ * @param dtpg a pointer to UDateTimePatternGenerator.
+ * @param pErrorCode a pointer to the UErrorCode which must not indicate a
+ *             failure before the function call.
+ * @return a UEnumeration list of all the base skeletons
+ *             The caller must close the object.
+ * @stable ICU 4.0
+ */
+U_DRAFT UEnumeration * U_EXPORT2
+udatpg_openBaseSkeletons(const UDateTimePatternGenerator *dtpg, UErrorCode *pErrorCode);
+
+/**
+ * Get the pattern corresponding to a given skeleton.
+ * 
+ * @param dtpg a pointer to UDateTimePatternGenerator.
+ * @param skeleton 
+ * @param skeletonLength pointer to the length of skeleton.
+ * @param pLength pointer to the length of return pattern.
+ * @return pattern corresponding to a given skeleton.
+ * @stable ICU 4.0
+ */
+U_DRAFT const UChar * U_EXPORT2
+udatpg_getPatternForSkeleton(const UDateTimePatternGenerator *dtpg,
+                             const UChar *skeleton, int32_t skeletonLength,
+                             int32_t *pLength);
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/udeprctd.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/udeprctd.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/udeprctd.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,50 +0,0 @@
-/*
-*******************************************************************************
-*   Copyright (C) 2004-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*******************************************************************************
-*
-*   file name:  
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   Created by: genheaders.pl, a perl script written by Ram Viswanadha
-*
-*  Contains data for commenting out APIs.
-*  Gets included by umachine.h
-*
-*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
-*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
-*/
-
-#ifndef UDEPRCTD_H
-#define UDEPRCTD_H
-
-#ifdef U_HIDE_DEPRECATED_API
-
-#    if U_DISABLE_RENAMING
-#        define ucol_getContractions ucol_getContractions_DEPRECATED_API_DO_NOT_USE
-#        define ucol_getLocale ucol_getLocale_DEPRECATED_API_DO_NOT_USE
-#        define ures_countArrayItems ures_countArrayItems_DEPRECATED_API_DO_NOT_USE
-#        define ures_getLocale ures_getLocale_DEPRECATED_API_DO_NOT_USE
-#        define ures_getVersionNumber ures_getVersionNumber_DEPRECATED_API_DO_NOT_USE
-#        define utrans_getAvailableID utrans_getAvailableID_DEPRECATED_API_DO_NOT_USE
-#        define utrans_getID utrans_getID_DEPRECATED_API_DO_NOT_USE
-#        define utrans_open utrans_open_DEPRECATED_API_DO_NOT_USE
-#        define utrans_unregister utrans_unregister_DEPRECATED_API_DO_NOT_USE
-#    else
-#        define ucol_getContractions_4_0 ucol_getContractions_DEPRECATED_API_DO_NOT_USE
-#        define ucol_getLocale_4_0 ucol_getLocale_DEPRECATED_API_DO_NOT_USE
-#        define ures_countArrayItems_4_0 ures_countArrayItems_DEPRECATED_API_DO_NOT_USE
-#        define ures_getLocale_4_0 ures_getLocale_DEPRECATED_API_DO_NOT_USE
-#        define ures_getVersionNumber_4_0 ures_getVersionNumber_DEPRECATED_API_DO_NOT_USE
-#        define utrans_getAvailableID_4_0 utrans_getAvailableID_DEPRECATED_API_DO_NOT_USE
-#        define utrans_getID_4_0 utrans_getID_DEPRECATED_API_DO_NOT_USE
-#        define utrans_open_4_0 utrans_open_DEPRECATED_API_DO_NOT_USE
-#        define utrans_unregister_4_0 utrans_unregister_DEPRECATED_API_DO_NOT_USE
-#    endif /* U_DISABLE_RENAMING */
-
-#endif /* U_HIDE_DEPRECATED_API */
-#endif /* UDEPRCTD_H */
-

Copied: MacRuby/trunk/icu-1060/unicode/udeprctd.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/udeprctd.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/udeprctd.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/udeprctd.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,50 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2004-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*
+*   file name:  
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   Created by: genheaders.pl, a perl script written by Ram Viswanadha
+*
+*  Contains data for commenting out APIs.
+*  Gets included by umachine.h
+*
+*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef UDEPRCTD_H
+#define UDEPRCTD_H
+
+#ifdef U_HIDE_DEPRECATED_API
+
+#    if U_DISABLE_RENAMING
+#        define ucol_getContractions ucol_getContractions_DEPRECATED_API_DO_NOT_USE
+#        define ucol_getLocale ucol_getLocale_DEPRECATED_API_DO_NOT_USE
+#        define ures_countArrayItems ures_countArrayItems_DEPRECATED_API_DO_NOT_USE
+#        define ures_getLocale ures_getLocale_DEPRECATED_API_DO_NOT_USE
+#        define ures_getVersionNumber ures_getVersionNumber_DEPRECATED_API_DO_NOT_USE
+#        define utrans_getAvailableID utrans_getAvailableID_DEPRECATED_API_DO_NOT_USE
+#        define utrans_getID utrans_getID_DEPRECATED_API_DO_NOT_USE
+#        define utrans_open utrans_open_DEPRECATED_API_DO_NOT_USE
+#        define utrans_unregister utrans_unregister_DEPRECATED_API_DO_NOT_USE
+#    else
+#        define ucol_getContractions_4_0 ucol_getContractions_DEPRECATED_API_DO_NOT_USE
+#        define ucol_getLocale_4_0 ucol_getLocale_DEPRECATED_API_DO_NOT_USE
+#        define ures_countArrayItems_4_0 ures_countArrayItems_DEPRECATED_API_DO_NOT_USE
+#        define ures_getLocale_4_0 ures_getLocale_DEPRECATED_API_DO_NOT_USE
+#        define ures_getVersionNumber_4_0 ures_getVersionNumber_DEPRECATED_API_DO_NOT_USE
+#        define utrans_getAvailableID_4_0 utrans_getAvailableID_DEPRECATED_API_DO_NOT_USE
+#        define utrans_getID_4_0 utrans_getID_DEPRECATED_API_DO_NOT_USE
+#        define utrans_open_4_0 utrans_open_DEPRECATED_API_DO_NOT_USE
+#        define utrans_unregister_4_0 utrans_unregister_DEPRECATED_API_DO_NOT_USE
+#    endif /* U_DISABLE_RENAMING */
+
+#endif /* U_HIDE_DEPRECATED_API */
+#endif /* UDEPRCTD_H */
+

Deleted: MacRuby/trunk/icu-1060/unicode/udraft.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/udraft.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/udraft.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,166 +0,0 @@
-/*
-*******************************************************************************
-*   Copyright (C) 2004-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*******************************************************************************
-*
-*   file name:  
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   Created by: genheaders.pl, a perl script written by Ram Viswanadha
-*
-*  Contains data for commenting out APIs.
-*  Gets included by umachine.h
-*
-*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
-*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
-*/
-
-#ifndef UDRAFT_H
-#define UDRAFT_H
-
-#ifdef U_HIDE_DRAFT_API
-
-#    if U_DISABLE_RENAMING
-#        define afkLanguageCode afkLanguageCode_DRAFT_API_DO_NOT_USE
-#        define armiScriptCode armiScriptCode_DRAFT_API_DO_NOT_USE
-#        define u_fclose u_fclose_DRAFT_API_DO_NOT_USE
-#        define u_feof u_feof_DRAFT_API_DO_NOT_USE
-#        define u_fflush u_fflush_DRAFT_API_DO_NOT_USE
-#        define u_fgetConverter u_fgetConverter_DRAFT_API_DO_NOT_USE
-#        define u_fgetc u_fgetc_DRAFT_API_DO_NOT_USE
-#        define u_fgetcodepage u_fgetcodepage_DRAFT_API_DO_NOT_USE
-#        define u_fgetcx u_fgetcx_DRAFT_API_DO_NOT_USE
-#        define u_fgetfile u_fgetfile_DRAFT_API_DO_NOT_USE
-#        define u_fgetlocale u_fgetlocale_DRAFT_API_DO_NOT_USE
-#        define u_fgets u_fgets_DRAFT_API_DO_NOT_USE
-#        define u_file_read u_file_read_DRAFT_API_DO_NOT_USE
-#        define u_file_write u_file_write_DRAFT_API_DO_NOT_USE
-#        define u_finit u_finit_DRAFT_API_DO_NOT_USE
-#        define u_fopen u_fopen_DRAFT_API_DO_NOT_USE
-#        define u_fprintf u_fprintf_DRAFT_API_DO_NOT_USE
-#        define u_fprintf_u u_fprintf_u_DRAFT_API_DO_NOT_USE
-#        define u_fputc u_fputc_DRAFT_API_DO_NOT_USE
-#        define u_fputs u_fputs_DRAFT_API_DO_NOT_USE
-#        define u_frewind u_frewind_DRAFT_API_DO_NOT_USE
-#        define u_fscanf u_fscanf_DRAFT_API_DO_NOT_USE
-#        define u_fscanf_u u_fscanf_u_DRAFT_API_DO_NOT_USE
-#        define u_fsetcodepage u_fsetcodepage_DRAFT_API_DO_NOT_USE
-#        define u_fsetlocale u_fsetlocale_DRAFT_API_DO_NOT_USE
-#        define u_fsettransliterator u_fsettransliterator_DRAFT_API_DO_NOT_USE
-#        define u_fstropen u_fstropen_DRAFT_API_DO_NOT_USE
-#        define u_fungetc u_fungetc_DRAFT_API_DO_NOT_USE
-#        define u_snprintf u_snprintf_DRAFT_API_DO_NOT_USE
-#        define u_snprintf_u u_snprintf_u_DRAFT_API_DO_NOT_USE
-#        define u_sprintf u_sprintf_DRAFT_API_DO_NOT_USE
-#        define u_sprintf_u u_sprintf_u_DRAFT_API_DO_NOT_USE
-#        define u_sscanf u_sscanf_DRAFT_API_DO_NOT_USE
-#        define u_sscanf_u u_sscanf_u_DRAFT_API_DO_NOT_USE
-#        define u_vfprintf u_vfprintf_DRAFT_API_DO_NOT_USE
-#        define u_vfprintf_u u_vfprintf_u_DRAFT_API_DO_NOT_USE
-#        define u_vfscanf u_vfscanf_DRAFT_API_DO_NOT_USE
-#        define u_vfscanf_u u_vfscanf_u_DRAFT_API_DO_NOT_USE
-#        define u_vsnprintf u_vsnprintf_DRAFT_API_DO_NOT_USE
-#        define u_vsnprintf_u u_vsnprintf_u_DRAFT_API_DO_NOT_USE
-#        define u_vsprintf u_vsprintf_DRAFT_API_DO_NOT_USE
-#        define u_vsprintf_u u_vsprintf_u_DRAFT_API_DO_NOT_USE
-#        define u_vsscanf u_vsscanf_DRAFT_API_DO_NOT_USE
-#        define u_vsscanf_u u_vsscanf_u_DRAFT_API_DO_NOT_USE
-#        define ucal_clone ucal_clone_DRAFT_API_DO_NOT_USE
-#        define ucal_getCanonicalTimeZoneID ucal_getCanonicalTimeZoneID_DRAFT_API_DO_NOT_USE
-#        define ucurr_countCurrencies ucurr_countCurrencies_DRAFT_API_DO_NOT_USE
-#        define ucurr_forLocaleAndDate ucurr_forLocaleAndDate_DRAFT_API_DO_NOT_USE
-#        define uloc_addLikelySubtags uloc_addLikelySubtags_DRAFT_API_DO_NOT_USE
-#        define uloc_getCharacterOrientation uloc_getCharacterOrientation_DRAFT_API_DO_NOT_USE
-#        define uloc_getLineOrientation uloc_getLineOrientation_DRAFT_API_DO_NOT_USE
-#        define uloc_minimizeSubtags uloc_minimizeSubtags_DRAFT_API_DO_NOT_USE
-#        define uregex_getMatchCallback uregex_getMatchCallback_DRAFT_API_DO_NOT_USE
-#        define uregex_getStackLimit uregex_getStackLimit_DRAFT_API_DO_NOT_USE
-#        define uregex_getTimeLimit uregex_getTimeLimit_DRAFT_API_DO_NOT_USE
-#        define uregex_hasAnchoringBounds uregex_hasAnchoringBounds_DRAFT_API_DO_NOT_USE
-#        define uregex_hasTransparentBounds uregex_hasTransparentBounds_DRAFT_API_DO_NOT_USE
-#        define uregex_hitEnd uregex_hitEnd_DRAFT_API_DO_NOT_USE
-#        define uregex_regionEnd uregex_regionEnd_DRAFT_API_DO_NOT_USE
-#        define uregex_regionStart uregex_regionStart_DRAFT_API_DO_NOT_USE
-#        define uregex_requireEnd uregex_requireEnd_DRAFT_API_DO_NOT_USE
-#        define uregex_setMatchCallback uregex_setMatchCallback_DRAFT_API_DO_NOT_USE
-#        define uregex_setRegion uregex_setRegion_DRAFT_API_DO_NOT_USE
-#        define uregex_setStackLimit uregex_setStackLimit_DRAFT_API_DO_NOT_USE
-#        define uregex_setTimeLimit uregex_setTimeLimit_DRAFT_API_DO_NOT_USE
-#        define uregex_useAnchoringBounds uregex_useAnchoringBounds_DRAFT_API_DO_NOT_USE
-#        define uregex_useTransparentBounds uregex_useTransparentBounds_DRAFT_API_DO_NOT_USE
-#    else
-#        define afkLanguageCode_4_0 afkLanguageCode_DRAFT_API_DO_NOT_USE
-#        define armiScriptCode_4_0 armiScriptCode_DRAFT_API_DO_NOT_USE
-#        define u_fclose_4_0 u_fclose_DRAFT_API_DO_NOT_USE
-#        define u_feof_4_0 u_feof_DRAFT_API_DO_NOT_USE
-#        define u_fflush_4_0 u_fflush_DRAFT_API_DO_NOT_USE
-#        define u_fgetConverter_4_0 u_fgetConverter_DRAFT_API_DO_NOT_USE
-#        define u_fgetc_4_0 u_fgetc_DRAFT_API_DO_NOT_USE
-#        define u_fgetcodepage_4_0 u_fgetcodepage_DRAFT_API_DO_NOT_USE
-#        define u_fgetcx_4_0 u_fgetcx_DRAFT_API_DO_NOT_USE
-#        define u_fgetfile_4_0 u_fgetfile_DRAFT_API_DO_NOT_USE
-#        define u_fgetlocale_4_0 u_fgetlocale_DRAFT_API_DO_NOT_USE
-#        define u_fgets_4_0 u_fgets_DRAFT_API_DO_NOT_USE
-#        define u_file_read_4_0 u_file_read_DRAFT_API_DO_NOT_USE
-#        define u_file_write_4_0 u_file_write_DRAFT_API_DO_NOT_USE
-#        define u_finit_4_0 u_finit_DRAFT_API_DO_NOT_USE
-#        define u_fopen_4_0 u_fopen_DRAFT_API_DO_NOT_USE
-#        define u_fprintf_4_0 u_fprintf_DRAFT_API_DO_NOT_USE
-#        define u_fprintf_u_4_0 u_fprintf_u_DRAFT_API_DO_NOT_USE
-#        define u_fputc_4_0 u_fputc_DRAFT_API_DO_NOT_USE
-#        define u_fputs_4_0 u_fputs_DRAFT_API_DO_NOT_USE
-#        define u_frewind_4_0 u_frewind_DRAFT_API_DO_NOT_USE
-#        define u_fscanf_4_0 u_fscanf_DRAFT_API_DO_NOT_USE
-#        define u_fscanf_u_4_0 u_fscanf_u_DRAFT_API_DO_NOT_USE
-#        define u_fsetcodepage_4_0 u_fsetcodepage_DRAFT_API_DO_NOT_USE
-#        define u_fsetlocale_4_0 u_fsetlocale_DRAFT_API_DO_NOT_USE
-#        define u_fsettransliterator_4_0 u_fsettransliterator_DRAFT_API_DO_NOT_USE
-#        define u_fstropen_4_0 u_fstropen_DRAFT_API_DO_NOT_USE
-#        define u_fungetc_4_0 u_fungetc_DRAFT_API_DO_NOT_USE
-#        define u_snprintf_4_0 u_snprintf_DRAFT_API_DO_NOT_USE
-#        define u_snprintf_u_4_0 u_snprintf_u_DRAFT_API_DO_NOT_USE
-#        define u_sprintf_4_0 u_sprintf_DRAFT_API_DO_NOT_USE
-#        define u_sprintf_u_4_0 u_sprintf_u_DRAFT_API_DO_NOT_USE
-#        define u_sscanf_4_0 u_sscanf_DRAFT_API_DO_NOT_USE
-#        define u_sscanf_u_4_0 u_sscanf_u_DRAFT_API_DO_NOT_USE
-#        define u_vfprintf_4_0 u_vfprintf_DRAFT_API_DO_NOT_USE
-#        define u_vfprintf_u_4_0 u_vfprintf_u_DRAFT_API_DO_NOT_USE
-#        define u_vfscanf_4_0 u_vfscanf_DRAFT_API_DO_NOT_USE
-#        define u_vfscanf_u_4_0 u_vfscanf_u_DRAFT_API_DO_NOT_USE
-#        define u_vsnprintf_4_0 u_vsnprintf_DRAFT_API_DO_NOT_USE
-#        define u_vsnprintf_u_4_0 u_vsnprintf_u_DRAFT_API_DO_NOT_USE
-#        define u_vsprintf_4_0 u_vsprintf_DRAFT_API_DO_NOT_USE
-#        define u_vsprintf_u_4_0 u_vsprintf_u_DRAFT_API_DO_NOT_USE
-#        define u_vsscanf_4_0 u_vsscanf_DRAFT_API_DO_NOT_USE
-#        define u_vsscanf_u_4_0 u_vsscanf_u_DRAFT_API_DO_NOT_USE
-#        define ucal_clone_4_0 ucal_clone_DRAFT_API_DO_NOT_USE
-#        define ucal_getCanonicalTimeZoneID_4_0 ucal_getCanonicalTimeZoneID_DRAFT_API_DO_NOT_USE
-#        define ucurr_countCurrencies_4_0 ucurr_countCurrencies_DRAFT_API_DO_NOT_USE
-#        define ucurr_forLocaleAndDate_4_0 ucurr_forLocaleAndDate_DRAFT_API_DO_NOT_USE
-#        define uloc_addLikelySubtags_4_0 uloc_addLikelySubtags_DRAFT_API_DO_NOT_USE
-#        define uloc_getCharacterOrientation_4_0 uloc_getCharacterOrientation_DRAFT_API_DO_NOT_USE
-#        define uloc_getLineOrientation_4_0 uloc_getLineOrientation_DRAFT_API_DO_NOT_USE
-#        define uloc_minimizeSubtags_4_0 uloc_minimizeSubtags_DRAFT_API_DO_NOT_USE
-#        define uregex_getMatchCallback_4_0 uregex_getMatchCallback_DRAFT_API_DO_NOT_USE
-#        define uregex_getStackLimit_4_0 uregex_getStackLimit_DRAFT_API_DO_NOT_USE
-#        define uregex_getTimeLimit_4_0 uregex_getTimeLimit_DRAFT_API_DO_NOT_USE
-#        define uregex_hasAnchoringBounds_4_0 uregex_hasAnchoringBounds_DRAFT_API_DO_NOT_USE
-#        define uregex_hasTransparentBounds_4_0 uregex_hasTransparentBounds_DRAFT_API_DO_NOT_USE
-#        define uregex_hitEnd_4_0 uregex_hitEnd_DRAFT_API_DO_NOT_USE
-#        define uregex_regionEnd_4_0 uregex_regionEnd_DRAFT_API_DO_NOT_USE
-#        define uregex_regionStart_4_0 uregex_regionStart_DRAFT_API_DO_NOT_USE
-#        define uregex_requireEnd_4_0 uregex_requireEnd_DRAFT_API_DO_NOT_USE
-#        define uregex_setMatchCallback_4_0 uregex_setMatchCallback_DRAFT_API_DO_NOT_USE
-#        define uregex_setRegion_4_0 uregex_setRegion_DRAFT_API_DO_NOT_USE
-#        define uregex_setStackLimit_4_0 uregex_setStackLimit_DRAFT_API_DO_NOT_USE
-#        define uregex_setTimeLimit_4_0 uregex_setTimeLimit_DRAFT_API_DO_NOT_USE
-#        define uregex_useAnchoringBounds_4_0 uregex_useAnchoringBounds_DRAFT_API_DO_NOT_USE
-#        define uregex_useTransparentBounds_4_0 uregex_useTransparentBounds_DRAFT_API_DO_NOT_USE
-#    endif /* U_DISABLE_RENAMING */
-
-#endif /* U_HIDE_DRAFT_API */
-#endif /* UDRAFT_H */
-

Copied: MacRuby/trunk/icu-1060/unicode/udraft.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/udraft.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/udraft.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/udraft.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,166 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2004-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*
+*   file name:  
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   Created by: genheaders.pl, a perl script written by Ram Viswanadha
+*
+*  Contains data for commenting out APIs.
+*  Gets included by umachine.h
+*
+*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef UDRAFT_H
+#define UDRAFT_H
+
+#ifdef U_HIDE_DRAFT_API
+
+#    if U_DISABLE_RENAMING
+#        define afkLanguageCode afkLanguageCode_DRAFT_API_DO_NOT_USE
+#        define armiScriptCode armiScriptCode_DRAFT_API_DO_NOT_USE
+#        define u_fclose u_fclose_DRAFT_API_DO_NOT_USE
+#        define u_feof u_feof_DRAFT_API_DO_NOT_USE
+#        define u_fflush u_fflush_DRAFT_API_DO_NOT_USE
+#        define u_fgetConverter u_fgetConverter_DRAFT_API_DO_NOT_USE
+#        define u_fgetc u_fgetc_DRAFT_API_DO_NOT_USE
+#        define u_fgetcodepage u_fgetcodepage_DRAFT_API_DO_NOT_USE
+#        define u_fgetcx u_fgetcx_DRAFT_API_DO_NOT_USE
+#        define u_fgetfile u_fgetfile_DRAFT_API_DO_NOT_USE
+#        define u_fgetlocale u_fgetlocale_DRAFT_API_DO_NOT_USE
+#        define u_fgets u_fgets_DRAFT_API_DO_NOT_USE
+#        define u_file_read u_file_read_DRAFT_API_DO_NOT_USE
+#        define u_file_write u_file_write_DRAFT_API_DO_NOT_USE
+#        define u_finit u_finit_DRAFT_API_DO_NOT_USE
+#        define u_fopen u_fopen_DRAFT_API_DO_NOT_USE
+#        define u_fprintf u_fprintf_DRAFT_API_DO_NOT_USE
+#        define u_fprintf_u u_fprintf_u_DRAFT_API_DO_NOT_USE
+#        define u_fputc u_fputc_DRAFT_API_DO_NOT_USE
+#        define u_fputs u_fputs_DRAFT_API_DO_NOT_USE
+#        define u_frewind u_frewind_DRAFT_API_DO_NOT_USE
+#        define u_fscanf u_fscanf_DRAFT_API_DO_NOT_USE
+#        define u_fscanf_u u_fscanf_u_DRAFT_API_DO_NOT_USE
+#        define u_fsetcodepage u_fsetcodepage_DRAFT_API_DO_NOT_USE
+#        define u_fsetlocale u_fsetlocale_DRAFT_API_DO_NOT_USE
+#        define u_fsettransliterator u_fsettransliterator_DRAFT_API_DO_NOT_USE
+#        define u_fstropen u_fstropen_DRAFT_API_DO_NOT_USE
+#        define u_fungetc u_fungetc_DRAFT_API_DO_NOT_USE
+#        define u_snprintf u_snprintf_DRAFT_API_DO_NOT_USE
+#        define u_snprintf_u u_snprintf_u_DRAFT_API_DO_NOT_USE
+#        define u_sprintf u_sprintf_DRAFT_API_DO_NOT_USE
+#        define u_sprintf_u u_sprintf_u_DRAFT_API_DO_NOT_USE
+#        define u_sscanf u_sscanf_DRAFT_API_DO_NOT_USE
+#        define u_sscanf_u u_sscanf_u_DRAFT_API_DO_NOT_USE
+#        define u_vfprintf u_vfprintf_DRAFT_API_DO_NOT_USE
+#        define u_vfprintf_u u_vfprintf_u_DRAFT_API_DO_NOT_USE
+#        define u_vfscanf u_vfscanf_DRAFT_API_DO_NOT_USE
+#        define u_vfscanf_u u_vfscanf_u_DRAFT_API_DO_NOT_USE
+#        define u_vsnprintf u_vsnprintf_DRAFT_API_DO_NOT_USE
+#        define u_vsnprintf_u u_vsnprintf_u_DRAFT_API_DO_NOT_USE
+#        define u_vsprintf u_vsprintf_DRAFT_API_DO_NOT_USE
+#        define u_vsprintf_u u_vsprintf_u_DRAFT_API_DO_NOT_USE
+#        define u_vsscanf u_vsscanf_DRAFT_API_DO_NOT_USE
+#        define u_vsscanf_u u_vsscanf_u_DRAFT_API_DO_NOT_USE
+#        define ucal_clone ucal_clone_DRAFT_API_DO_NOT_USE
+#        define ucal_getCanonicalTimeZoneID ucal_getCanonicalTimeZoneID_DRAFT_API_DO_NOT_USE
+#        define ucurr_countCurrencies ucurr_countCurrencies_DRAFT_API_DO_NOT_USE
+#        define ucurr_forLocaleAndDate ucurr_forLocaleAndDate_DRAFT_API_DO_NOT_USE
+#        define uloc_addLikelySubtags uloc_addLikelySubtags_DRAFT_API_DO_NOT_USE
+#        define uloc_getCharacterOrientation uloc_getCharacterOrientation_DRAFT_API_DO_NOT_USE
+#        define uloc_getLineOrientation uloc_getLineOrientation_DRAFT_API_DO_NOT_USE
+#        define uloc_minimizeSubtags uloc_minimizeSubtags_DRAFT_API_DO_NOT_USE
+#        define uregex_getMatchCallback uregex_getMatchCallback_DRAFT_API_DO_NOT_USE
+#        define uregex_getStackLimit uregex_getStackLimit_DRAFT_API_DO_NOT_USE
+#        define uregex_getTimeLimit uregex_getTimeLimit_DRAFT_API_DO_NOT_USE
+#        define uregex_hasAnchoringBounds uregex_hasAnchoringBounds_DRAFT_API_DO_NOT_USE
+#        define uregex_hasTransparentBounds uregex_hasTransparentBounds_DRAFT_API_DO_NOT_USE
+#        define uregex_hitEnd uregex_hitEnd_DRAFT_API_DO_NOT_USE
+#        define uregex_regionEnd uregex_regionEnd_DRAFT_API_DO_NOT_USE
+#        define uregex_regionStart uregex_regionStart_DRAFT_API_DO_NOT_USE
+#        define uregex_requireEnd uregex_requireEnd_DRAFT_API_DO_NOT_USE
+#        define uregex_setMatchCallback uregex_setMatchCallback_DRAFT_API_DO_NOT_USE
+#        define uregex_setRegion uregex_setRegion_DRAFT_API_DO_NOT_USE
+#        define uregex_setStackLimit uregex_setStackLimit_DRAFT_API_DO_NOT_USE
+#        define uregex_setTimeLimit uregex_setTimeLimit_DRAFT_API_DO_NOT_USE
+#        define uregex_useAnchoringBounds uregex_useAnchoringBounds_DRAFT_API_DO_NOT_USE
+#        define uregex_useTransparentBounds uregex_useTransparentBounds_DRAFT_API_DO_NOT_USE
+#    else
+#        define afkLanguageCode_4_0 afkLanguageCode_DRAFT_API_DO_NOT_USE
+#        define armiScriptCode_4_0 armiScriptCode_DRAFT_API_DO_NOT_USE
+#        define u_fclose_4_0 u_fclose_DRAFT_API_DO_NOT_USE
+#        define u_feof_4_0 u_feof_DRAFT_API_DO_NOT_USE
+#        define u_fflush_4_0 u_fflush_DRAFT_API_DO_NOT_USE
+#        define u_fgetConverter_4_0 u_fgetConverter_DRAFT_API_DO_NOT_USE
+#        define u_fgetc_4_0 u_fgetc_DRAFT_API_DO_NOT_USE
+#        define u_fgetcodepage_4_0 u_fgetcodepage_DRAFT_API_DO_NOT_USE
+#        define u_fgetcx_4_0 u_fgetcx_DRAFT_API_DO_NOT_USE
+#        define u_fgetfile_4_0 u_fgetfile_DRAFT_API_DO_NOT_USE
+#        define u_fgetlocale_4_0 u_fgetlocale_DRAFT_API_DO_NOT_USE
+#        define u_fgets_4_0 u_fgets_DRAFT_API_DO_NOT_USE
+#        define u_file_read_4_0 u_file_read_DRAFT_API_DO_NOT_USE
+#        define u_file_write_4_0 u_file_write_DRAFT_API_DO_NOT_USE
+#        define u_finit_4_0 u_finit_DRAFT_API_DO_NOT_USE
+#        define u_fopen_4_0 u_fopen_DRAFT_API_DO_NOT_USE
+#        define u_fprintf_4_0 u_fprintf_DRAFT_API_DO_NOT_USE
+#        define u_fprintf_u_4_0 u_fprintf_u_DRAFT_API_DO_NOT_USE
+#        define u_fputc_4_0 u_fputc_DRAFT_API_DO_NOT_USE
+#        define u_fputs_4_0 u_fputs_DRAFT_API_DO_NOT_USE
+#        define u_frewind_4_0 u_frewind_DRAFT_API_DO_NOT_USE
+#        define u_fscanf_4_0 u_fscanf_DRAFT_API_DO_NOT_USE
+#        define u_fscanf_u_4_0 u_fscanf_u_DRAFT_API_DO_NOT_USE
+#        define u_fsetcodepage_4_0 u_fsetcodepage_DRAFT_API_DO_NOT_USE
+#        define u_fsetlocale_4_0 u_fsetlocale_DRAFT_API_DO_NOT_USE
+#        define u_fsettransliterator_4_0 u_fsettransliterator_DRAFT_API_DO_NOT_USE
+#        define u_fstropen_4_0 u_fstropen_DRAFT_API_DO_NOT_USE
+#        define u_fungetc_4_0 u_fungetc_DRAFT_API_DO_NOT_USE
+#        define u_snprintf_4_0 u_snprintf_DRAFT_API_DO_NOT_USE
+#        define u_snprintf_u_4_0 u_snprintf_u_DRAFT_API_DO_NOT_USE
+#        define u_sprintf_4_0 u_sprintf_DRAFT_API_DO_NOT_USE
+#        define u_sprintf_u_4_0 u_sprintf_u_DRAFT_API_DO_NOT_USE
+#        define u_sscanf_4_0 u_sscanf_DRAFT_API_DO_NOT_USE
+#        define u_sscanf_u_4_0 u_sscanf_u_DRAFT_API_DO_NOT_USE
+#        define u_vfprintf_4_0 u_vfprintf_DRAFT_API_DO_NOT_USE
+#        define u_vfprintf_u_4_0 u_vfprintf_u_DRAFT_API_DO_NOT_USE
+#        define u_vfscanf_4_0 u_vfscanf_DRAFT_API_DO_NOT_USE
+#        define u_vfscanf_u_4_0 u_vfscanf_u_DRAFT_API_DO_NOT_USE
+#        define u_vsnprintf_4_0 u_vsnprintf_DRAFT_API_DO_NOT_USE
+#        define u_vsnprintf_u_4_0 u_vsnprintf_u_DRAFT_API_DO_NOT_USE
+#        define u_vsprintf_4_0 u_vsprintf_DRAFT_API_DO_NOT_USE
+#        define u_vsprintf_u_4_0 u_vsprintf_u_DRAFT_API_DO_NOT_USE
+#        define u_vsscanf_4_0 u_vsscanf_DRAFT_API_DO_NOT_USE
+#        define u_vsscanf_u_4_0 u_vsscanf_u_DRAFT_API_DO_NOT_USE
+#        define ucal_clone_4_0 ucal_clone_DRAFT_API_DO_NOT_USE
+#        define ucal_getCanonicalTimeZoneID_4_0 ucal_getCanonicalTimeZoneID_DRAFT_API_DO_NOT_USE
+#        define ucurr_countCurrencies_4_0 ucurr_countCurrencies_DRAFT_API_DO_NOT_USE
+#        define ucurr_forLocaleAndDate_4_0 ucurr_forLocaleAndDate_DRAFT_API_DO_NOT_USE
+#        define uloc_addLikelySubtags_4_0 uloc_addLikelySubtags_DRAFT_API_DO_NOT_USE
+#        define uloc_getCharacterOrientation_4_0 uloc_getCharacterOrientation_DRAFT_API_DO_NOT_USE
+#        define uloc_getLineOrientation_4_0 uloc_getLineOrientation_DRAFT_API_DO_NOT_USE
+#        define uloc_minimizeSubtags_4_0 uloc_minimizeSubtags_DRAFT_API_DO_NOT_USE
+#        define uregex_getMatchCallback_4_0 uregex_getMatchCallback_DRAFT_API_DO_NOT_USE
+#        define uregex_getStackLimit_4_0 uregex_getStackLimit_DRAFT_API_DO_NOT_USE
+#        define uregex_getTimeLimit_4_0 uregex_getTimeLimit_DRAFT_API_DO_NOT_USE
+#        define uregex_hasAnchoringBounds_4_0 uregex_hasAnchoringBounds_DRAFT_API_DO_NOT_USE
+#        define uregex_hasTransparentBounds_4_0 uregex_hasTransparentBounds_DRAFT_API_DO_NOT_USE
+#        define uregex_hitEnd_4_0 uregex_hitEnd_DRAFT_API_DO_NOT_USE
+#        define uregex_regionEnd_4_0 uregex_regionEnd_DRAFT_API_DO_NOT_USE
+#        define uregex_regionStart_4_0 uregex_regionStart_DRAFT_API_DO_NOT_USE
+#        define uregex_requireEnd_4_0 uregex_requireEnd_DRAFT_API_DO_NOT_USE
+#        define uregex_setMatchCallback_4_0 uregex_setMatchCallback_DRAFT_API_DO_NOT_USE
+#        define uregex_setRegion_4_0 uregex_setRegion_DRAFT_API_DO_NOT_USE
+#        define uregex_setStackLimit_4_0 uregex_setStackLimit_DRAFT_API_DO_NOT_USE
+#        define uregex_setTimeLimit_4_0 uregex_setTimeLimit_DRAFT_API_DO_NOT_USE
+#        define uregex_useAnchoringBounds_4_0 uregex_useAnchoringBounds_DRAFT_API_DO_NOT_USE
+#        define uregex_useTransparentBounds_4_0 uregex_useTransparentBounds_DRAFT_API_DO_NOT_USE
+#    endif /* U_DISABLE_RENAMING */
+
+#endif /* U_HIDE_DRAFT_API */
+#endif /* UDRAFT_H */
+

Deleted: MacRuby/trunk/icu-1060/unicode/uenum.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uenum.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uenum.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,134 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 2002-2005, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  uenum.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:2
-*
-*   created on: 2002jul08
-*   created by: Vladimir Weinstein
-*/
-
-#ifndef __UENUM_H
-#define __UENUM_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief C API: String Enumeration 
- */
- 
-/**
- * An enumeration object.
- * For usage in C programs.
- * @stable ICU 2.2
- */
-struct UEnumeration;
-/** structure representing an enumeration object instance @stable ICU 2.2 */
-typedef struct UEnumeration UEnumeration;
-
-/**
- * Disposes of resources in use by the iterator.  If en is NULL,
- * does nothing.  After this call, any char* or UChar* pointer
- * returned by uenum_unext() or uenum_next() is invalid.
- * @param en UEnumeration structure pointer
- * @stable ICU 2.2
- */
-U_STABLE void U_EXPORT2
-uenum_close(UEnumeration* en);
-
-/**
- * Returns the number of elements that the iterator traverses.  If
- * the iterator is out-of-sync with its service, status is set to
- * U_ENUM_OUT_OF_SYNC_ERROR.
- * This is a convenience function. It can end up being very
- * expensive as all the items might have to be pre-fetched (depending
- * on the type of data being traversed). Use with caution and only 
- * when necessary.
- * @param en UEnumeration structure pointer
- * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
- *               iterator is out of sync.
- * @return number of elements in the iterator
- * @stable ICU 2.2
- */
-U_STABLE int32_t U_EXPORT2
-uenum_count(UEnumeration* en, UErrorCode* status);
-
-/**
- * Returns the next element in the iterator's list.  If there are
- * no more elements, returns NULL.  If the iterator is out-of-sync
- * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
- * NULL is returned.  If the native service string is a char* string,
- * it is converted to UChar* with the invariant converter.
- * The result is terminated by (UChar)0.
- * @param en the iterator object
- * @param resultLength pointer to receive the length of the result
- *                     (not including the terminating \\0).
- *                     If the pointer is NULL it is ignored.
- * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
- *               the iterator is out of sync with its service.
- * @return a pointer to the string.  The string will be
- *         zero-terminated.  The return pointer is owned by this iterator
- *         and must not be deleted by the caller.  The pointer is valid
- *         until the next call to any uenum_... method, including
- *         uenum_next() or uenum_unext().  When all strings have been
- *         traversed, returns NULL.
- * @stable ICU 2.2
- */
-U_STABLE const UChar* U_EXPORT2
-uenum_unext(UEnumeration* en,
-            int32_t* resultLength,
-            UErrorCode* status);
-
-/**
- * Returns the next element in the iterator's list.  If there are
- * no more elements, returns NULL.  If the iterator is out-of-sync
- * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
- * NULL is returned.  If the native service string is a UChar*
- * string, it is converted to char* with the invariant converter.
- * The result is terminated by (char)0.  If the conversion fails
- * (because a character cannot be converted) then status is set to
- * U_INVARIANT_CONVERSION_ERROR and the return value is undefined
- * (but non-NULL).
- * @param en the iterator object
- * @param resultLength pointer to receive the length of the result
- *                     (not including the terminating \\0).
- *                     If the pointer is NULL it is ignored.
- * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
- *               the iterator is out of sync with its service.  Set to
- *               U_INVARIANT_CONVERSION_ERROR if the underlying native string is
- *               UChar* and conversion to char* with the invariant converter
- *               fails. This error pertains only to current string, so iteration
- *               might be able to continue successfully.
- * @return a pointer to the string.  The string will be
- *         zero-terminated.  The return pointer is owned by this iterator
- *         and must not be deleted by the caller.  The pointer is valid
- *         until the next call to any uenum_... method, including
- *         uenum_next() or uenum_unext().  When all strings have been
- *         traversed, returns NULL.
- * @stable ICU 2.2
- */
-U_STABLE const char* U_EXPORT2
-uenum_next(UEnumeration* en,
-           int32_t* resultLength,
-           UErrorCode* status);
-
-/**
- * Resets the iterator to the current list of service IDs.  This
- * re-establishes sync with the service and rewinds the iterator
- * to start at the first element.
- * @param en the iterator object
- * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
- *               the iterator is out of sync with its service.  
- * @stable ICU 2.2
- */
-U_STABLE void U_EXPORT2
-uenum_reset(UEnumeration* en, UErrorCode* status);
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/uenum.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uenum.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uenum.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uenum.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,134 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2002-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uenum.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:2
+*
+*   created on: 2002jul08
+*   created by: Vladimir Weinstein
+*/
+
+#ifndef __UENUM_H
+#define __UENUM_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: String Enumeration 
+ */
+ 
+/**
+ * An enumeration object.
+ * For usage in C programs.
+ * @stable ICU 2.2
+ */
+struct UEnumeration;
+/** structure representing an enumeration object instance @stable ICU 2.2 */
+typedef struct UEnumeration UEnumeration;
+
+/**
+ * Disposes of resources in use by the iterator.  If en is NULL,
+ * does nothing.  After this call, any char* or UChar* pointer
+ * returned by uenum_unext() or uenum_next() is invalid.
+ * @param en UEnumeration structure pointer
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uenum_close(UEnumeration* en);
+
+/**
+ * Returns the number of elements that the iterator traverses.  If
+ * the iterator is out-of-sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR.
+ * This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched (depending
+ * on the type of data being traversed). Use with caution and only 
+ * when necessary.
+ * @param en UEnumeration structure pointer
+ * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
+ *               iterator is out of sync.
+ * @return number of elements in the iterator
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+uenum_count(UEnumeration* en, UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list.  If there are
+ * no more elements, returns NULL.  If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned.  If the native service string is a char* string,
+ * it is converted to UChar* with the invariant converter.
+ * The result is terminated by (UChar)0.
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ *                     (not including the terminating \\0).
+ *                     If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ *               the iterator is out of sync with its service.
+ * @return a pointer to the string.  The string will be
+ *         zero-terminated.  The return pointer is owned by this iterator
+ *         and must not be deleted by the caller.  The pointer is valid
+ *         until the next call to any uenum_... method, including
+ *         uenum_next() or uenum_unext().  When all strings have been
+ *         traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_STABLE const UChar* U_EXPORT2
+uenum_unext(UEnumeration* en,
+            int32_t* resultLength,
+            UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list.  If there are
+ * no more elements, returns NULL.  If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned.  If the native service string is a UChar*
+ * string, it is converted to char* with the invariant converter.
+ * The result is terminated by (char)0.  If the conversion fails
+ * (because a character cannot be converted) then status is set to
+ * U_INVARIANT_CONVERSION_ERROR and the return value is undefined
+ * (but non-NULL).
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ *                     (not including the terminating \\0).
+ *                     If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ *               the iterator is out of sync with its service.  Set to
+ *               U_INVARIANT_CONVERSION_ERROR if the underlying native string is
+ *               UChar* and conversion to char* with the invariant converter
+ *               fails. This error pertains only to current string, so iteration
+ *               might be able to continue successfully.
+ * @return a pointer to the string.  The string will be
+ *         zero-terminated.  The return pointer is owned by this iterator
+ *         and must not be deleted by the caller.  The pointer is valid
+ *         until the next call to any uenum_... method, including
+ *         uenum_next() or uenum_unext().  When all strings have been
+ *         traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_STABLE const char* U_EXPORT2
+uenum_next(UEnumeration* en,
+           int32_t* resultLength,
+           UErrorCode* status);
+
+/**
+ * Resets the iterator to the current list of service IDs.  This
+ * re-establishes sync with the service and rewinds the iterator
+ * to start at the first element.
+ * @param en the iterator object
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ *               the iterator is out of sync with its service.  
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uenum_reset(UEnumeration* en, UErrorCode* status);
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/uidna.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uidna.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uidna.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,312 +0,0 @@
-/*
- *******************************************************************************
- *
- *   Copyright (C) 2003-2007, International Business Machines
- *   Corporation and others.  All Rights Reserved.
- *
- *******************************************************************************
- *   file name:  uidna.h
- *   encoding:   US-ASCII
- *   tab size:   8 (not used)
- *   indentation:4
- *
- *   created on: 2003feb1
- *   created by: Ram Viswanadha
- */
-
-#ifndef __UIDNA_H__
-#define __UIDNA_H__
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_IDNA
-
-#include "unicode/parseerr.h"
-  
-/**
- * \file
- * \brief C API: Internationalized Domain Names in Applications Tranformation
- *
- * UIDNA API implements the IDNA protocol as defined in the IDNA RFC 
- * (http://www.ietf.org/rfc/rfc3490.txt).
- * The RFC defines 2 operations: ToASCII and ToUnicode. Domain labels 
- * containing non-ASCII code points are required to be processed by
- * ToASCII operation before passing it to resolver libraries. Domain names
- * that are obtained from resolver libraries are required to be processed by
- * ToUnicode operation before displaying the domain name to the user.
- * IDNA requires that implementations process input strings with Nameprep
- * (http://www.ietf.org/rfc/rfc3491.txt), 
- * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt), 
- * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt). 
- * Implementations of IDNA MUST fully implement Nameprep and Punycode; 
- * neither Nameprep nor Punycode are optional.
- * The input and output of ToASCII and ToUnicode operations are Unicode 
- * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
- * multiple times to an input string will yield the same result as applying the operation
- * once.
- * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) 
- * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
- *
- */
-
-/** 
- * Option to prohibit processing of unassigned codepoints in the input and
- * do not check if the input conforms to STD-3 ASCII rules.
- * 
- * @see  uidna_toASCII uidna_toUnicode
- * @stable ICU 2.6
- */
-#define UIDNA_DEFAULT          0x0000
-/** 
- * Option to allow processing of unassigned codepoints in the input
- * 
- * @see  uidna_toASCII uidna_toUnicode
- * @stable ICU 2.6
- */
-#define UIDNA_ALLOW_UNASSIGNED 0x0001
-/** 
- * Option to check if input conforms to STD-3 ASCII rules
- * 
- * @see  uidna_toASCII uidna_toUnicode
- * @stable ICU 2.6
- */
-#define UIDNA_USE_STD3_RULES   0x0002
-
-/**
- * This function implements the ToASCII operation as defined in the IDNA RFC.
- * This operation is done on <b>single labels</b> before sending it to something that expects
- * ASCII names. A label is an individual part of a domain name. Labels are usually
- * separated by dots; e.g." "www.example.com" is composed of 3 labels 
- * "www","example", and "com".
- *
- *
- * @param src               Input UChar array containing label in Unicode.
- * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
- * @param dest              Output UChar array with ASCII (ACE encoded) label.
- * @param destCapacity      Size of dest.
- * @param options           A bit set of options:
- *
- *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
- *                              and do not use STD3 ASCII rules
- *                              If unassigned code points are found the operation fails with 
- *                              U_UNASSIGNED_ERROR error code.
- *
- *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
- *                              If this option is set, the unassigned code points are in the input 
- *                              are treated as normal Unicode code points.
- *                          
- *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
- *                              If this option is set and the input does not satisfy STD3 rules,  
- *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
- *
- * @param parseError        Pointer to UParseError struct to receive information on position 
- *                          of error if an error is encountered. Can be NULL.
- * @param status            ICU in/out error code parameter.
- *                          U_INVALID_CHAR_FOUND if src contains
- *                          unmatched single surrogates.
- *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
- *                          too many code points.
- *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
- * @return The length of the result string, if successful - or in case of a buffer overflow,
- *         in which case it will be greater than destCapacity.
- * @stable ICU 2.6
- */
-U_STABLE int32_t U_EXPORT2
-uidna_toASCII(const UChar* src, int32_t srcLength, 
-              UChar* dest, int32_t destCapacity,
-              int32_t options,
-              UParseError* parseError,
-              UErrorCode* status);
-
-
-/**
- * This function implements the ToUnicode operation as defined in the IDNA RFC.
- * This operation is done on <b>single labels</b> before sending it to something that expects
- * Unicode names. A label is an individual part of a domain name. Labels are usually
- * separated by dots; for e.g." "www.example.com" is composed of 3 labels 
- * "www","example", and "com".
- *
- * @param src               Input UChar array containing ASCII (ACE encoded) label.
- * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
- * @param dest Output       Converted UChar array containing Unicode equivalent of label.
- * @param destCapacity      Size of dest.
- * @param options           A bit set of options:
- *  
- *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
- *                              and do not use STD3 ASCII rules
- *                              If unassigned code points are found the operation fails with 
- *                              U_UNASSIGNED_ERROR error code.
- *
- *  - UIDNA_ALLOW_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
- *                              If this option is set, the unassigned code points are in the input 
- *                              are treated as normal Unicode code points. <b> Note: </b> This option is 
- *                              required on toUnicode operation because the RFC mandates 
- *                              verification of decoded ACE input by applying toASCII and comparing
- *                              its output with source
- *
- *                          
- *                          
- *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
- *                              If this option is set and the input does not satisfy STD3 rules,  
- *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
- *
- * @param parseError        Pointer to UParseError struct to receive information on position 
- *                          of error if an error is encountered. Can be NULL.
- * @param status            ICU in/out error code parameter.
- *                          U_INVALID_CHAR_FOUND if src contains
- *                          unmatched single surrogates.
- *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
- *                          too many code points.
- *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
- * @return The length of the result string, if successful - or in case of a buffer overflow,
- *         in which case it will be greater than destCapacity.
- * @stable ICU 2.6
- */
-U_STABLE int32_t U_EXPORT2
-uidna_toUnicode(const UChar* src, int32_t srcLength,
-                UChar* dest, int32_t destCapacity,
-                int32_t options,
-                UParseError* parseError,
-                UErrorCode* status);
-
-
-/**
- * Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
- * This operation is done on complete domain names, e.g: "www.example.com". 
- * It is important to note that this operation can fail. If it fails, then the input 
- * domain name cannot be used as an Internationalized Domain Name and the application
- * should have methods defined to deal with the failure.
- * 
- * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
- * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 
- * and then convert. This function does not offer that level of granularity. The options once  
- * set will apply to all labels in the domain name
- *
- * @param src               Input UChar array containing IDN in Unicode.
- * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
- * @param dest              Output UChar array with ASCII (ACE encoded) IDN.
- * @param destCapacity      Size of dest.
- * @param options           A bit set of options:
- *  
- *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
- *                              and do not use STD3 ASCII rules
- *                              If unassigned code points are found the operation fails with 
- *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
- *
- *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
- *                              If this option is set, the unassigned code points are in the input 
- *                              are treated as normal Unicode code points.
- *                          
- *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
- *                              If this option is set and the input does not satisfy STD3 rules,  
- *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
- * 
- * @param parseError        Pointer to UParseError struct to receive information on position 
- *                          of error if an error is encountered. Can be NULL.
- * @param status            ICU in/out error code parameter.
- *                          U_INVALID_CHAR_FOUND if src contains
- *                          unmatched single surrogates.
- *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
- *                          too many code points.
- *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
- * @return The length of the result string, if successful - or in case of a buffer overflow,
- *         in which case it will be greater than destCapacity.
- * @stable ICU 2.6
- */
-U_STABLE int32_t U_EXPORT2
-uidna_IDNToASCII(  const UChar* src, int32_t srcLength,
-                   UChar* dest, int32_t destCapacity,
-                   int32_t options,
-                   UParseError* parseError,
-                   UErrorCode* status);
-
-/**
- * Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
- * This operation is done on complete domain names, e.g: "www.example.com". 
- *
- * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
- * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 
- * and then convert. This function does not offer that level of granularity. The options once  
- * set will apply to all labels in the domain name
- *
- * @param src               Input UChar array containing IDN in ASCII (ACE encoded) form.
- * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
- * @param dest Output       UChar array containing Unicode equivalent of source IDN.
- * @param destCapacity      Size of dest.
- * @param options           A bit set of options:
- *  
- *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
- *                              and do not use STD3 ASCII rules
- *                              If unassigned code points are found the operation fails with 
- *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
- *
- *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
- *                              If this option is set, the unassigned code points are in the input 
- *                              are treated as normal Unicode code points.
- *                          
- *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
- *                              If this option is set and the input does not satisfy STD3 rules,  
- *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
- *
- * @param parseError        Pointer to UParseError struct to receive information on position 
- *                          of error if an error is encountered. Can be NULL.
- * @param status            ICU in/out error code parameter.
- *                          U_INVALID_CHAR_FOUND if src contains
- *                          unmatched single surrogates.
- *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
- *                          too many code points.
- *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
- * @return The length of the result string, if successful - or in case of a buffer overflow,
- *         in which case it will be greater than destCapacity.
- * @stable ICU 2.6
- */
-U_STABLE int32_t U_EXPORT2
-uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
-                     UChar* dest, int32_t destCapacity,
-                     int32_t options,
-                     UParseError* parseError,
-                     UErrorCode* status);
-
-/**
- * Compare two IDN strings for equivalence.
- * This function splits the domain names into labels and compares them.
- * According to IDN RFC, whenever two labels are compared, they are 
- * considered equal if and only if their ASCII forms (obtained by 
- * applying toASCII) match using an case-insensitive ASCII comparison.
- * Two domain names are considered a match if and only if all labels 
- * match regardless of whether label separators match.
- *
- * @param s1                First source string.
- * @param length1           Length of first source string, or -1 if NUL-terminated.
- *
- * @param s2                Second source string.
- * @param length2           Length of second source string, or -1 if NUL-terminated.
- * @param options           A bit set of options:
- *  
- *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
- *                              and do not use STD3 ASCII rules
- *                              If unassigned code points are found the operation fails with 
- *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
- *
- *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
- *                              If this option is set, the unassigned code points are in the input 
- *                              are treated as normal Unicode code points.
- *                          
- *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
- *                              If this option is set and the input does not satisfy STD3 rules,  
- *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
- *
- * @param status            ICU error code in/out parameter.
- *                          Must fulfill U_SUCCESS before the function call.
- * @return <0 or 0 or >0 as usual for string comparisons
- * @stable ICU 2.6
- */
-U_STABLE int32_t U_EXPORT2
-uidna_compare(  const UChar *s1, int32_t length1,
-                const UChar *s2, int32_t length2,
-                int32_t options,
-                UErrorCode* status);
-
-#endif /* #if !UCONFIG_NO_IDNA */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/uidna.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uidna.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uidna.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uidna.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,312 @@
+/*
+ *******************************************************************************
+ *
+ *   Copyright (C) 2003-2007, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *
+ *******************************************************************************
+ *   file name:  uidna.h
+ *   encoding:   US-ASCII
+ *   tab size:   8 (not used)
+ *   indentation:4
+ *
+ *   created on: 2003feb1
+ *   created by: Ram Viswanadha
+ */
+
+#ifndef __UIDNA_H__
+#define __UIDNA_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/parseerr.h"
+  
+/**
+ * \file
+ * \brief C API: Internationalized Domain Names in Applications Tranformation
+ *
+ * UIDNA API implements the IDNA protocol as defined in the IDNA RFC 
+ * (http://www.ietf.org/rfc/rfc3490.txt).
+ * The RFC defines 2 operations: ToASCII and ToUnicode. Domain labels 
+ * containing non-ASCII code points are required to be processed by
+ * ToASCII operation before passing it to resolver libraries. Domain names
+ * that are obtained from resolver libraries are required to be processed by
+ * ToUnicode operation before displaying the domain name to the user.
+ * IDNA requires that implementations process input strings with Nameprep
+ * (http://www.ietf.org/rfc/rfc3491.txt), 
+ * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt), 
+ * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt). 
+ * Implementations of IDNA MUST fully implement Nameprep and Punycode; 
+ * neither Nameprep nor Punycode are optional.
+ * The input and output of ToASCII and ToUnicode operations are Unicode 
+ * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
+ * multiple times to an input string will yield the same result as applying the operation
+ * once.
+ * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) 
+ * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
+ *
+ */
+
+/** 
+ * Option to prohibit processing of unassigned codepoints in the input and
+ * do not check if the input conforms to STD-3 ASCII rules.
+ * 
+ * @see  uidna_toASCII uidna_toUnicode
+ * @stable ICU 2.6
+ */
+#define UIDNA_DEFAULT          0x0000
+/** 
+ * Option to allow processing of unassigned codepoints in the input
+ * 
+ * @see  uidna_toASCII uidna_toUnicode
+ * @stable ICU 2.6
+ */
+#define UIDNA_ALLOW_UNASSIGNED 0x0001
+/** 
+ * Option to check if input conforms to STD-3 ASCII rules
+ * 
+ * @see  uidna_toASCII uidna_toUnicode
+ * @stable ICU 2.6
+ */
+#define UIDNA_USE_STD3_RULES   0x0002
+
+/**
+ * This function implements the ToASCII operation as defined in the IDNA RFC.
+ * This operation is done on <b>single labels</b> before sending it to something that expects
+ * ASCII names. A label is an individual part of a domain name. Labels are usually
+ * separated by dots; e.g." "www.example.com" is composed of 3 labels 
+ * "www","example", and "com".
+ *
+ *
+ * @param src               Input UChar array containing label in Unicode.
+ * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest              Output UChar array with ASCII (ACE encoded) label.
+ * @param destCapacity      Size of dest.
+ * @param options           A bit set of options:
+ *
+ *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
+ *                              and do not use STD3 ASCII rules
+ *                              If unassigned code points are found the operation fails with 
+ *                              U_UNASSIGNED_ERROR error code.
+ *
+ *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
+ *                              If this option is set, the unassigned code points are in the input 
+ *                              are treated as normal Unicode code points.
+ *                          
+ *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
+ *                              If this option is set and the input does not satisfy STD3 rules,  
+ *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError        Pointer to UParseError struct to receive information on position 
+ *                          of error if an error is encountered. Can be NULL.
+ * @param status            ICU in/out error code parameter.
+ *                          U_INVALID_CHAR_FOUND if src contains
+ *                          unmatched single surrogates.
+ *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ *                          too many code points.
+ *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_toASCII(const UChar* src, int32_t srcLength, 
+              UChar* dest, int32_t destCapacity,
+              int32_t options,
+              UParseError* parseError,
+              UErrorCode* status);
+
+
+/**
+ * This function implements the ToUnicode operation as defined in the IDNA RFC.
+ * This operation is done on <b>single labels</b> before sending it to something that expects
+ * Unicode names. A label is an individual part of a domain name. Labels are usually
+ * separated by dots; for e.g." "www.example.com" is composed of 3 labels 
+ * "www","example", and "com".
+ *
+ * @param src               Input UChar array containing ASCII (ACE encoded) label.
+ * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output       Converted UChar array containing Unicode equivalent of label.
+ * @param destCapacity      Size of dest.
+ * @param options           A bit set of options:
+ *  
+ *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
+ *                              and do not use STD3 ASCII rules
+ *                              If unassigned code points are found the operation fails with 
+ *                              U_UNASSIGNED_ERROR error code.
+ *
+ *  - UIDNA_ALLOW_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
+ *                              If this option is set, the unassigned code points are in the input 
+ *                              are treated as normal Unicode code points. <b> Note: </b> This option is 
+ *                              required on toUnicode operation because the RFC mandates 
+ *                              verification of decoded ACE input by applying toASCII and comparing
+ *                              its output with source
+ *
+ *                          
+ *                          
+ *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
+ *                              If this option is set and the input does not satisfy STD3 rules,  
+ *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError        Pointer to UParseError struct to receive information on position 
+ *                          of error if an error is encountered. Can be NULL.
+ * @param status            ICU in/out error code parameter.
+ *                          U_INVALID_CHAR_FOUND if src contains
+ *                          unmatched single surrogates.
+ *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ *                          too many code points.
+ *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_toUnicode(const UChar* src, int32_t srcLength,
+                UChar* dest, int32_t destCapacity,
+                int32_t options,
+                UParseError* parseError,
+                UErrorCode* status);
+
+
+/**
+ * Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
+ * This operation is done on complete domain names, e.g: "www.example.com". 
+ * It is important to note that this operation can fail. If it fails, then the input 
+ * domain name cannot be used as an Internationalized Domain Name and the application
+ * should have methods defined to deal with the failure.
+ * 
+ * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 
+ * and then convert. This function does not offer that level of granularity. The options once  
+ * set will apply to all labels in the domain name
+ *
+ * @param src               Input UChar array containing IDN in Unicode.
+ * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest              Output UChar array with ASCII (ACE encoded) IDN.
+ * @param destCapacity      Size of dest.
+ * @param options           A bit set of options:
+ *  
+ *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
+ *                              and do not use STD3 ASCII rules
+ *                              If unassigned code points are found the operation fails with 
+ *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
+ *                              If this option is set, the unassigned code points are in the input 
+ *                              are treated as normal Unicode code points.
+ *                          
+ *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
+ *                              If this option is set and the input does not satisfy STD3 rules,  
+ *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ * 
+ * @param parseError        Pointer to UParseError struct to receive information on position 
+ *                          of error if an error is encountered. Can be NULL.
+ * @param status            ICU in/out error code parameter.
+ *                          U_INVALID_CHAR_FOUND if src contains
+ *                          unmatched single surrogates.
+ *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ *                          too many code points.
+ *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_IDNToASCII(  const UChar* src, int32_t srcLength,
+                   UChar* dest, int32_t destCapacity,
+                   int32_t options,
+                   UParseError* parseError,
+                   UErrorCode* status);
+
+/**
+ * Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
+ * This operation is done on complete domain names, e.g: "www.example.com". 
+ *
+ * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 
+ * and then convert. This function does not offer that level of granularity. The options once  
+ * set will apply to all labels in the domain name
+ *
+ * @param src               Input UChar array containing IDN in ASCII (ACE encoded) form.
+ * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output       UChar array containing Unicode equivalent of source IDN.
+ * @param destCapacity      Size of dest.
+ * @param options           A bit set of options:
+ *  
+ *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
+ *                              and do not use STD3 ASCII rules
+ *                              If unassigned code points are found the operation fails with 
+ *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
+ *                              If this option is set, the unassigned code points are in the input 
+ *                              are treated as normal Unicode code points.
+ *                          
+ *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
+ *                              If this option is set and the input does not satisfy STD3 rules,  
+ *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError        Pointer to UParseError struct to receive information on position 
+ *                          of error if an error is encountered. Can be NULL.
+ * @param status            ICU in/out error code parameter.
+ *                          U_INVALID_CHAR_FOUND if src contains
+ *                          unmatched single surrogates.
+ *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ *                          too many code points.
+ *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
+                     UChar* dest, int32_t destCapacity,
+                     int32_t options,
+                     UParseError* parseError,
+                     UErrorCode* status);
+
+/**
+ * Compare two IDN strings for equivalence.
+ * This function splits the domain names into labels and compares them.
+ * According to IDN RFC, whenever two labels are compared, they are 
+ * considered equal if and only if their ASCII forms (obtained by 
+ * applying toASCII) match using an case-insensitive ASCII comparison.
+ * Two domain names are considered a match if and only if all labels 
+ * match regardless of whether label separators match.
+ *
+ * @param s1                First source string.
+ * @param length1           Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2                Second source string.
+ * @param length2           Length of second source string, or -1 if NUL-terminated.
+ * @param options           A bit set of options:
+ *  
+ *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
+ *                              and do not use STD3 ASCII rules
+ *                              If unassigned code points are found the operation fails with 
+ *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
+ *                              If this option is set, the unassigned code points are in the input 
+ *                              are treated as normal Unicode code points.
+ *                          
+ *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
+ *                              If this option is set and the input does not satisfy STD3 rules,  
+ *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param status            ICU error code in/out parameter.
+ *                          Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+uidna_compare(  const UChar *s1, int32_t length1,
+                const UChar *s2, int32_t length2,
+                int32_t options,
+                UErrorCode* status);
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/uintrnal.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uintrnal.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uintrnal.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,180 +0,0 @@
-/*
-*******************************************************************************
-*   Copyright (C) 2004-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*******************************************************************************
-*
-*   file name:  
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   Created by: genheaders.pl, a perl script written by Ram Viswanadha
-*
-*  Contains data for commenting out APIs.
-*  Gets included by umachine.h
-*
-*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
-*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
-*/
-
-#ifndef UINTRNAL_H
-#define UINTRNAL_H
-
-#ifdef U_HIDE_INTERNAL_API
-
-#    if U_DISABLE_RENAMING
-#        define RegexPatternDump RegexPatternDump_INTERNAL_API_DO_NOT_USE
-#        define pl_addFontRun pl_addFontRun_INTERNAL_API_DO_NOT_USE
-#        define pl_addLocaleRun pl_addLocaleRun_INTERNAL_API_DO_NOT_USE
-#        define pl_addValueRun pl_addValueRun_INTERNAL_API_DO_NOT_USE
-#        define pl_close pl_close_INTERNAL_API_DO_NOT_USE
-#        define pl_closeFontRuns pl_closeFontRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_closeLine pl_closeLine_INTERNAL_API_DO_NOT_USE
-#        define pl_closeLocaleRuns pl_closeLocaleRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_closeValueRuns pl_closeValueRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_countLineRuns pl_countLineRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_create pl_create_INTERNAL_API_DO_NOT_USE
-#        define pl_getAscent pl_getAscent_INTERNAL_API_DO_NOT_USE
-#        define pl_getDescent pl_getDescent_INTERNAL_API_DO_NOT_USE
-#        define pl_getFontRunCount pl_getFontRunCount_INTERNAL_API_DO_NOT_USE
-#        define pl_getFontRunFont pl_getFontRunFont_INTERNAL_API_DO_NOT_USE
-#        define pl_getFontRunLastLimit pl_getFontRunLastLimit_INTERNAL_API_DO_NOT_USE
-#        define pl_getFontRunLimit pl_getFontRunLimit_INTERNAL_API_DO_NOT_USE
-#        define pl_getLeading pl_getLeading_INTERNAL_API_DO_NOT_USE
-#        define pl_getLineAscent pl_getLineAscent_INTERNAL_API_DO_NOT_USE
-#        define pl_getLineDescent pl_getLineDescent_INTERNAL_API_DO_NOT_USE
-#        define pl_getLineLeading pl_getLineLeading_INTERNAL_API_DO_NOT_USE
-#        define pl_getLineVisualRun pl_getLineVisualRun_INTERNAL_API_DO_NOT_USE
-#        define pl_getLineWidth pl_getLineWidth_INTERNAL_API_DO_NOT_USE
-#        define pl_getLocaleRunCount pl_getLocaleRunCount_INTERNAL_API_DO_NOT_USE
-#        define pl_getLocaleRunLastLimit pl_getLocaleRunLastLimit_INTERNAL_API_DO_NOT_USE
-#        define pl_getLocaleRunLimit pl_getLocaleRunLimit_INTERNAL_API_DO_NOT_USE
-#        define pl_getLocaleRunLocale pl_getLocaleRunLocale_INTERNAL_API_DO_NOT_USE
-#        define pl_getParagraphLevel pl_getParagraphLevel_INTERNAL_API_DO_NOT_USE
-#        define pl_getTextDirection pl_getTextDirection_INTERNAL_API_DO_NOT_USE
-#        define pl_getValueRunCount pl_getValueRunCount_INTERNAL_API_DO_NOT_USE
-#        define pl_getValueRunLastLimit pl_getValueRunLastLimit_INTERNAL_API_DO_NOT_USE
-#        define pl_getValueRunLimit pl_getValueRunLimit_INTERNAL_API_DO_NOT_USE
-#        define pl_getValueRunValue pl_getValueRunValue_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunAscent pl_getVisualRunAscent_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunDescent pl_getVisualRunDescent_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunDirection pl_getVisualRunDirection_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunFont pl_getVisualRunFont_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunGlyphCount pl_getVisualRunGlyphCount_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunGlyphToCharMap pl_getVisualRunGlyphToCharMap_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunGlyphs pl_getVisualRunGlyphs_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunLeading pl_getVisualRunLeading_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunPositions pl_getVisualRunPositions_INTERNAL_API_DO_NOT_USE
-#        define pl_isComplex pl_isComplex_INTERNAL_API_DO_NOT_USE
-#        define pl_line pl_line_INTERNAL_API_DO_NOT_USE
-#        define pl_nextLine pl_nextLine_INTERNAL_API_DO_NOT_USE
-#        define pl_openEmptyFontRuns pl_openEmptyFontRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_openEmptyLocaleRuns pl_openEmptyLocaleRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_openEmptyValueRuns pl_openEmptyValueRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_openFontRuns pl_openFontRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_openLocaleRuns pl_openLocaleRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_openValueRuns pl_openValueRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_paragraph pl_paragraph_INTERNAL_API_DO_NOT_USE
-#        define pl_reflow pl_reflow_INTERNAL_API_DO_NOT_USE
-#        define pl_resetFontRuns pl_resetFontRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_resetLocaleRuns pl_resetLocaleRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_resetValueRuns pl_resetValueRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_visualRun pl_visualRun_INTERNAL_API_DO_NOT_USE
-#        define ucol_equals ucol_equals_INTERNAL_API_DO_NOT_USE
-#        define ucol_forgetUCA ucol_forgetUCA_INTERNAL_API_DO_NOT_USE
-#        define ucol_getAttributeOrDefault ucol_getAttributeOrDefault_INTERNAL_API_DO_NOT_USE
-#        define ucol_getUnsafeSet ucol_getUnsafeSet_INTERNAL_API_DO_NOT_USE
-#        define ucol_nextProcessed ucol_nextProcessed_INTERNAL_API_DO_NOT_USE
-#        define ucol_prepareShortStringOpen ucol_prepareShortStringOpen_INTERNAL_API_DO_NOT_USE
-#        define ucol_previousProcessed ucol_previousProcessed_INTERNAL_API_DO_NOT_USE
-#        define uprv_getDefaultCodepage uprv_getDefaultCodepage_INTERNAL_API_DO_NOT_USE
-#        define uprv_getDefaultLocaleID uprv_getDefaultLocaleID_INTERNAL_API_DO_NOT_USE
-#        define ures_openFillIn ures_openFillIn_INTERNAL_API_DO_NOT_USE
-#        define usearch_search usearch_search_INTERNAL_API_DO_NOT_USE
-#        define usearch_searchBackwards usearch_searchBackwards_INTERNAL_API_DO_NOT_USE
-#        define utf8_appendCharSafeBody utf8_appendCharSafeBody_INTERNAL_API_DO_NOT_USE
-#        define utf8_back1SafeBody utf8_back1SafeBody_INTERNAL_API_DO_NOT_USE
-#        define utf8_countTrailBytes utf8_countTrailBytes_INTERNAL_API_DO_NOT_USE
-#        define utf8_nextCharSafeBody utf8_nextCharSafeBody_INTERNAL_API_DO_NOT_USE
-#        define utf8_prevCharSafeBody utf8_prevCharSafeBody_INTERNAL_API_DO_NOT_USE
-#    else
-#        define RegexPatternDump_4_0 RegexPatternDump_INTERNAL_API_DO_NOT_USE
-#        define pl_addFontRun_4_0 pl_addFontRun_INTERNAL_API_DO_NOT_USE
-#        define pl_addLocaleRun_4_0 pl_addLocaleRun_INTERNAL_API_DO_NOT_USE
-#        define pl_addValueRun_4_0 pl_addValueRun_INTERNAL_API_DO_NOT_USE
-#        define pl_closeFontRuns_4_0 pl_closeFontRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_closeLine_4_0 pl_closeLine_INTERNAL_API_DO_NOT_USE
-#        define pl_closeLocaleRuns_4_0 pl_closeLocaleRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_closeValueRuns_4_0 pl_closeValueRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_close_4_0 pl_close_INTERNAL_API_DO_NOT_USE
-#        define pl_countLineRuns_4_0 pl_countLineRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_create_4_0 pl_create_INTERNAL_API_DO_NOT_USE
-#        define pl_getAscent_4_0 pl_getAscent_INTERNAL_API_DO_NOT_USE
-#        define pl_getDescent_4_0 pl_getDescent_INTERNAL_API_DO_NOT_USE
-#        define pl_getFontRunCount_4_0 pl_getFontRunCount_INTERNAL_API_DO_NOT_USE
-#        define pl_getFontRunFont_4_0 pl_getFontRunFont_INTERNAL_API_DO_NOT_USE
-#        define pl_getFontRunLastLimit_4_0 pl_getFontRunLastLimit_INTERNAL_API_DO_NOT_USE
-#        define pl_getFontRunLimit_4_0 pl_getFontRunLimit_INTERNAL_API_DO_NOT_USE
-#        define pl_getLeading_4_0 pl_getLeading_INTERNAL_API_DO_NOT_USE
-#        define pl_getLineAscent_4_0 pl_getLineAscent_INTERNAL_API_DO_NOT_USE
-#        define pl_getLineDescent_4_0 pl_getLineDescent_INTERNAL_API_DO_NOT_USE
-#        define pl_getLineLeading_4_0 pl_getLineLeading_INTERNAL_API_DO_NOT_USE
-#        define pl_getLineVisualRun_4_0 pl_getLineVisualRun_INTERNAL_API_DO_NOT_USE
-#        define pl_getLineWidth_4_0 pl_getLineWidth_INTERNAL_API_DO_NOT_USE
-#        define pl_getLocaleRunCount_4_0 pl_getLocaleRunCount_INTERNAL_API_DO_NOT_USE
-#        define pl_getLocaleRunLastLimit_4_0 pl_getLocaleRunLastLimit_INTERNAL_API_DO_NOT_USE
-#        define pl_getLocaleRunLimit_4_0 pl_getLocaleRunLimit_INTERNAL_API_DO_NOT_USE
-#        define pl_getLocaleRunLocale_4_0 pl_getLocaleRunLocale_INTERNAL_API_DO_NOT_USE
-#        define pl_getParagraphLevel_4_0 pl_getParagraphLevel_INTERNAL_API_DO_NOT_USE
-#        define pl_getTextDirection_4_0 pl_getTextDirection_INTERNAL_API_DO_NOT_USE
-#        define pl_getValueRunCount_4_0 pl_getValueRunCount_INTERNAL_API_DO_NOT_USE
-#        define pl_getValueRunLastLimit_4_0 pl_getValueRunLastLimit_INTERNAL_API_DO_NOT_USE
-#        define pl_getValueRunLimit_4_0 pl_getValueRunLimit_INTERNAL_API_DO_NOT_USE
-#        define pl_getValueRunValue_4_0 pl_getValueRunValue_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunAscent_4_0 pl_getVisualRunAscent_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunDescent_4_0 pl_getVisualRunDescent_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunDirection_4_0 pl_getVisualRunDirection_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunFont_4_0 pl_getVisualRunFont_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunGlyphCount_4_0 pl_getVisualRunGlyphCount_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunGlyphToCharMap_4_0 pl_getVisualRunGlyphToCharMap_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunGlyphs_4_0 pl_getVisualRunGlyphs_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunLeading_4_0 pl_getVisualRunLeading_INTERNAL_API_DO_NOT_USE
-#        define pl_getVisualRunPositions_4_0 pl_getVisualRunPositions_INTERNAL_API_DO_NOT_USE
-#        define pl_isComplex_4_0 pl_isComplex_INTERNAL_API_DO_NOT_USE
-#        define pl_line_4_0 pl_line_INTERNAL_API_DO_NOT_USE
-#        define pl_nextLine_4_0 pl_nextLine_INTERNAL_API_DO_NOT_USE
-#        define pl_openEmptyFontRuns_4_0 pl_openEmptyFontRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_openEmptyLocaleRuns_4_0 pl_openEmptyLocaleRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_openEmptyValueRuns_4_0 pl_openEmptyValueRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_openFontRuns_4_0 pl_openFontRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_openLocaleRuns_4_0 pl_openLocaleRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_openValueRuns_4_0 pl_openValueRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_paragraph_4_0 pl_paragraph_INTERNAL_API_DO_NOT_USE
-#        define pl_reflow_4_0 pl_reflow_INTERNAL_API_DO_NOT_USE
-#        define pl_resetFontRuns_4_0 pl_resetFontRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_resetLocaleRuns_4_0 pl_resetLocaleRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_resetValueRuns_4_0 pl_resetValueRuns_INTERNAL_API_DO_NOT_USE
-#        define pl_visualRun_4_0 pl_visualRun_INTERNAL_API_DO_NOT_USE
-#        define ucol_equals_4_0 ucol_equals_INTERNAL_API_DO_NOT_USE
-#        define ucol_forgetUCA_4_0 ucol_forgetUCA_INTERNAL_API_DO_NOT_USE
-#        define ucol_getAttributeOrDefault_4_0 ucol_getAttributeOrDefault_INTERNAL_API_DO_NOT_USE
-#        define ucol_getUnsafeSet_4_0 ucol_getUnsafeSet_INTERNAL_API_DO_NOT_USE
-#        define ucol_nextProcessed_4_0 ucol_nextProcessed_INTERNAL_API_DO_NOT_USE
-#        define ucol_prepareShortStringOpen_4_0 ucol_prepareShortStringOpen_INTERNAL_API_DO_NOT_USE
-#        define ucol_previousProcessed_4_0 ucol_previousProcessed_INTERNAL_API_DO_NOT_USE
-#        define uprv_getDefaultCodepage_4_0 uprv_getDefaultCodepage_INTERNAL_API_DO_NOT_USE
-#        define uprv_getDefaultLocaleID_4_0 uprv_getDefaultLocaleID_INTERNAL_API_DO_NOT_USE
-#        define ures_openFillIn_4_0 ures_openFillIn_INTERNAL_API_DO_NOT_USE
-#        define usearch_searchBackwards_4_0 usearch_searchBackwards_INTERNAL_API_DO_NOT_USE
-#        define usearch_search_4_0 usearch_search_INTERNAL_API_DO_NOT_USE
-#        define utf8_appendCharSafeBody_4_0 utf8_appendCharSafeBody_INTERNAL_API_DO_NOT_USE
-#        define utf8_back1SafeBody_4_0 utf8_back1SafeBody_INTERNAL_API_DO_NOT_USE
-#        define utf8_countTrailBytes_4_0 utf8_countTrailBytes_INTERNAL_API_DO_NOT_USE
-#        define utf8_nextCharSafeBody_4_0 utf8_nextCharSafeBody_INTERNAL_API_DO_NOT_USE
-#        define utf8_prevCharSafeBody_4_0 utf8_prevCharSafeBody_INTERNAL_API_DO_NOT_USE
-#    endif /* U_DISABLE_RENAMING */
-
-#endif /* U_HIDE_INTERNAL_API */
-#endif /* UINTRNAL_H */
-

Copied: MacRuby/trunk/icu-1060/unicode/uintrnal.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uintrnal.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uintrnal.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uintrnal.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,180 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2004-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*
+*   file name:  
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   Created by: genheaders.pl, a perl script written by Ram Viswanadha
+*
+*  Contains data for commenting out APIs.
+*  Gets included by umachine.h
+*
+*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef UINTRNAL_H
+#define UINTRNAL_H
+
+#ifdef U_HIDE_INTERNAL_API
+
+#    if U_DISABLE_RENAMING
+#        define RegexPatternDump RegexPatternDump_INTERNAL_API_DO_NOT_USE
+#        define pl_addFontRun pl_addFontRun_INTERNAL_API_DO_NOT_USE
+#        define pl_addLocaleRun pl_addLocaleRun_INTERNAL_API_DO_NOT_USE
+#        define pl_addValueRun pl_addValueRun_INTERNAL_API_DO_NOT_USE
+#        define pl_close pl_close_INTERNAL_API_DO_NOT_USE
+#        define pl_closeFontRuns pl_closeFontRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_closeLine pl_closeLine_INTERNAL_API_DO_NOT_USE
+#        define pl_closeLocaleRuns pl_closeLocaleRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_closeValueRuns pl_closeValueRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_countLineRuns pl_countLineRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_create pl_create_INTERNAL_API_DO_NOT_USE
+#        define pl_getAscent pl_getAscent_INTERNAL_API_DO_NOT_USE
+#        define pl_getDescent pl_getDescent_INTERNAL_API_DO_NOT_USE
+#        define pl_getFontRunCount pl_getFontRunCount_INTERNAL_API_DO_NOT_USE
+#        define pl_getFontRunFont pl_getFontRunFont_INTERNAL_API_DO_NOT_USE
+#        define pl_getFontRunLastLimit pl_getFontRunLastLimit_INTERNAL_API_DO_NOT_USE
+#        define pl_getFontRunLimit pl_getFontRunLimit_INTERNAL_API_DO_NOT_USE
+#        define pl_getLeading pl_getLeading_INTERNAL_API_DO_NOT_USE
+#        define pl_getLineAscent pl_getLineAscent_INTERNAL_API_DO_NOT_USE
+#        define pl_getLineDescent pl_getLineDescent_INTERNAL_API_DO_NOT_USE
+#        define pl_getLineLeading pl_getLineLeading_INTERNAL_API_DO_NOT_USE
+#        define pl_getLineVisualRun pl_getLineVisualRun_INTERNAL_API_DO_NOT_USE
+#        define pl_getLineWidth pl_getLineWidth_INTERNAL_API_DO_NOT_USE
+#        define pl_getLocaleRunCount pl_getLocaleRunCount_INTERNAL_API_DO_NOT_USE
+#        define pl_getLocaleRunLastLimit pl_getLocaleRunLastLimit_INTERNAL_API_DO_NOT_USE
+#        define pl_getLocaleRunLimit pl_getLocaleRunLimit_INTERNAL_API_DO_NOT_USE
+#        define pl_getLocaleRunLocale pl_getLocaleRunLocale_INTERNAL_API_DO_NOT_USE
+#        define pl_getParagraphLevel pl_getParagraphLevel_INTERNAL_API_DO_NOT_USE
+#        define pl_getTextDirection pl_getTextDirection_INTERNAL_API_DO_NOT_USE
+#        define pl_getValueRunCount pl_getValueRunCount_INTERNAL_API_DO_NOT_USE
+#        define pl_getValueRunLastLimit pl_getValueRunLastLimit_INTERNAL_API_DO_NOT_USE
+#        define pl_getValueRunLimit pl_getValueRunLimit_INTERNAL_API_DO_NOT_USE
+#        define pl_getValueRunValue pl_getValueRunValue_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunAscent pl_getVisualRunAscent_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunDescent pl_getVisualRunDescent_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunDirection pl_getVisualRunDirection_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunFont pl_getVisualRunFont_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunGlyphCount pl_getVisualRunGlyphCount_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunGlyphToCharMap pl_getVisualRunGlyphToCharMap_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunGlyphs pl_getVisualRunGlyphs_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunLeading pl_getVisualRunLeading_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunPositions pl_getVisualRunPositions_INTERNAL_API_DO_NOT_USE
+#        define pl_isComplex pl_isComplex_INTERNAL_API_DO_NOT_USE
+#        define pl_line pl_line_INTERNAL_API_DO_NOT_USE
+#        define pl_nextLine pl_nextLine_INTERNAL_API_DO_NOT_USE
+#        define pl_openEmptyFontRuns pl_openEmptyFontRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_openEmptyLocaleRuns pl_openEmptyLocaleRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_openEmptyValueRuns pl_openEmptyValueRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_openFontRuns pl_openFontRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_openLocaleRuns pl_openLocaleRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_openValueRuns pl_openValueRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_paragraph pl_paragraph_INTERNAL_API_DO_NOT_USE
+#        define pl_reflow pl_reflow_INTERNAL_API_DO_NOT_USE
+#        define pl_resetFontRuns pl_resetFontRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_resetLocaleRuns pl_resetLocaleRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_resetValueRuns pl_resetValueRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_visualRun pl_visualRun_INTERNAL_API_DO_NOT_USE
+#        define ucol_equals ucol_equals_INTERNAL_API_DO_NOT_USE
+#        define ucol_forgetUCA ucol_forgetUCA_INTERNAL_API_DO_NOT_USE
+#        define ucol_getAttributeOrDefault ucol_getAttributeOrDefault_INTERNAL_API_DO_NOT_USE
+#        define ucol_getUnsafeSet ucol_getUnsafeSet_INTERNAL_API_DO_NOT_USE
+#        define ucol_nextProcessed ucol_nextProcessed_INTERNAL_API_DO_NOT_USE
+#        define ucol_prepareShortStringOpen ucol_prepareShortStringOpen_INTERNAL_API_DO_NOT_USE
+#        define ucol_previousProcessed ucol_previousProcessed_INTERNAL_API_DO_NOT_USE
+#        define uprv_getDefaultCodepage uprv_getDefaultCodepage_INTERNAL_API_DO_NOT_USE
+#        define uprv_getDefaultLocaleID uprv_getDefaultLocaleID_INTERNAL_API_DO_NOT_USE
+#        define ures_openFillIn ures_openFillIn_INTERNAL_API_DO_NOT_USE
+#        define usearch_search usearch_search_INTERNAL_API_DO_NOT_USE
+#        define usearch_searchBackwards usearch_searchBackwards_INTERNAL_API_DO_NOT_USE
+#        define utf8_appendCharSafeBody utf8_appendCharSafeBody_INTERNAL_API_DO_NOT_USE
+#        define utf8_back1SafeBody utf8_back1SafeBody_INTERNAL_API_DO_NOT_USE
+#        define utf8_countTrailBytes utf8_countTrailBytes_INTERNAL_API_DO_NOT_USE
+#        define utf8_nextCharSafeBody utf8_nextCharSafeBody_INTERNAL_API_DO_NOT_USE
+#        define utf8_prevCharSafeBody utf8_prevCharSafeBody_INTERNAL_API_DO_NOT_USE
+#    else
+#        define RegexPatternDump_4_0 RegexPatternDump_INTERNAL_API_DO_NOT_USE
+#        define pl_addFontRun_4_0 pl_addFontRun_INTERNAL_API_DO_NOT_USE
+#        define pl_addLocaleRun_4_0 pl_addLocaleRun_INTERNAL_API_DO_NOT_USE
+#        define pl_addValueRun_4_0 pl_addValueRun_INTERNAL_API_DO_NOT_USE
+#        define pl_closeFontRuns_4_0 pl_closeFontRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_closeLine_4_0 pl_closeLine_INTERNAL_API_DO_NOT_USE
+#        define pl_closeLocaleRuns_4_0 pl_closeLocaleRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_closeValueRuns_4_0 pl_closeValueRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_close_4_0 pl_close_INTERNAL_API_DO_NOT_USE
+#        define pl_countLineRuns_4_0 pl_countLineRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_create_4_0 pl_create_INTERNAL_API_DO_NOT_USE
+#        define pl_getAscent_4_0 pl_getAscent_INTERNAL_API_DO_NOT_USE
+#        define pl_getDescent_4_0 pl_getDescent_INTERNAL_API_DO_NOT_USE
+#        define pl_getFontRunCount_4_0 pl_getFontRunCount_INTERNAL_API_DO_NOT_USE
+#        define pl_getFontRunFont_4_0 pl_getFontRunFont_INTERNAL_API_DO_NOT_USE
+#        define pl_getFontRunLastLimit_4_0 pl_getFontRunLastLimit_INTERNAL_API_DO_NOT_USE
+#        define pl_getFontRunLimit_4_0 pl_getFontRunLimit_INTERNAL_API_DO_NOT_USE
+#        define pl_getLeading_4_0 pl_getLeading_INTERNAL_API_DO_NOT_USE
+#        define pl_getLineAscent_4_0 pl_getLineAscent_INTERNAL_API_DO_NOT_USE
+#        define pl_getLineDescent_4_0 pl_getLineDescent_INTERNAL_API_DO_NOT_USE
+#        define pl_getLineLeading_4_0 pl_getLineLeading_INTERNAL_API_DO_NOT_USE
+#        define pl_getLineVisualRun_4_0 pl_getLineVisualRun_INTERNAL_API_DO_NOT_USE
+#        define pl_getLineWidth_4_0 pl_getLineWidth_INTERNAL_API_DO_NOT_USE
+#        define pl_getLocaleRunCount_4_0 pl_getLocaleRunCount_INTERNAL_API_DO_NOT_USE
+#        define pl_getLocaleRunLastLimit_4_0 pl_getLocaleRunLastLimit_INTERNAL_API_DO_NOT_USE
+#        define pl_getLocaleRunLimit_4_0 pl_getLocaleRunLimit_INTERNAL_API_DO_NOT_USE
+#        define pl_getLocaleRunLocale_4_0 pl_getLocaleRunLocale_INTERNAL_API_DO_NOT_USE
+#        define pl_getParagraphLevel_4_0 pl_getParagraphLevel_INTERNAL_API_DO_NOT_USE
+#        define pl_getTextDirection_4_0 pl_getTextDirection_INTERNAL_API_DO_NOT_USE
+#        define pl_getValueRunCount_4_0 pl_getValueRunCount_INTERNAL_API_DO_NOT_USE
+#        define pl_getValueRunLastLimit_4_0 pl_getValueRunLastLimit_INTERNAL_API_DO_NOT_USE
+#        define pl_getValueRunLimit_4_0 pl_getValueRunLimit_INTERNAL_API_DO_NOT_USE
+#        define pl_getValueRunValue_4_0 pl_getValueRunValue_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunAscent_4_0 pl_getVisualRunAscent_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunDescent_4_0 pl_getVisualRunDescent_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunDirection_4_0 pl_getVisualRunDirection_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunFont_4_0 pl_getVisualRunFont_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunGlyphCount_4_0 pl_getVisualRunGlyphCount_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunGlyphToCharMap_4_0 pl_getVisualRunGlyphToCharMap_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunGlyphs_4_0 pl_getVisualRunGlyphs_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunLeading_4_0 pl_getVisualRunLeading_INTERNAL_API_DO_NOT_USE
+#        define pl_getVisualRunPositions_4_0 pl_getVisualRunPositions_INTERNAL_API_DO_NOT_USE
+#        define pl_isComplex_4_0 pl_isComplex_INTERNAL_API_DO_NOT_USE
+#        define pl_line_4_0 pl_line_INTERNAL_API_DO_NOT_USE
+#        define pl_nextLine_4_0 pl_nextLine_INTERNAL_API_DO_NOT_USE
+#        define pl_openEmptyFontRuns_4_0 pl_openEmptyFontRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_openEmptyLocaleRuns_4_0 pl_openEmptyLocaleRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_openEmptyValueRuns_4_0 pl_openEmptyValueRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_openFontRuns_4_0 pl_openFontRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_openLocaleRuns_4_0 pl_openLocaleRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_openValueRuns_4_0 pl_openValueRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_paragraph_4_0 pl_paragraph_INTERNAL_API_DO_NOT_USE
+#        define pl_reflow_4_0 pl_reflow_INTERNAL_API_DO_NOT_USE
+#        define pl_resetFontRuns_4_0 pl_resetFontRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_resetLocaleRuns_4_0 pl_resetLocaleRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_resetValueRuns_4_0 pl_resetValueRuns_INTERNAL_API_DO_NOT_USE
+#        define pl_visualRun_4_0 pl_visualRun_INTERNAL_API_DO_NOT_USE
+#        define ucol_equals_4_0 ucol_equals_INTERNAL_API_DO_NOT_USE
+#        define ucol_forgetUCA_4_0 ucol_forgetUCA_INTERNAL_API_DO_NOT_USE
+#        define ucol_getAttributeOrDefault_4_0 ucol_getAttributeOrDefault_INTERNAL_API_DO_NOT_USE
+#        define ucol_getUnsafeSet_4_0 ucol_getUnsafeSet_INTERNAL_API_DO_NOT_USE
+#        define ucol_nextProcessed_4_0 ucol_nextProcessed_INTERNAL_API_DO_NOT_USE
+#        define ucol_prepareShortStringOpen_4_0 ucol_prepareShortStringOpen_INTERNAL_API_DO_NOT_USE
+#        define ucol_previousProcessed_4_0 ucol_previousProcessed_INTERNAL_API_DO_NOT_USE
+#        define uprv_getDefaultCodepage_4_0 uprv_getDefaultCodepage_INTERNAL_API_DO_NOT_USE
+#        define uprv_getDefaultLocaleID_4_0 uprv_getDefaultLocaleID_INTERNAL_API_DO_NOT_USE
+#        define ures_openFillIn_4_0 ures_openFillIn_INTERNAL_API_DO_NOT_USE
+#        define usearch_searchBackwards_4_0 usearch_searchBackwards_INTERNAL_API_DO_NOT_USE
+#        define usearch_search_4_0 usearch_search_INTERNAL_API_DO_NOT_USE
+#        define utf8_appendCharSafeBody_4_0 utf8_appendCharSafeBody_INTERNAL_API_DO_NOT_USE
+#        define utf8_back1SafeBody_4_0 utf8_back1SafeBody_INTERNAL_API_DO_NOT_USE
+#        define utf8_countTrailBytes_4_0 utf8_countTrailBytes_INTERNAL_API_DO_NOT_USE
+#        define utf8_nextCharSafeBody_4_0 utf8_nextCharSafeBody_INTERNAL_API_DO_NOT_USE
+#        define utf8_prevCharSafeBody_4_0 utf8_prevCharSafeBody_INTERNAL_API_DO_NOT_USE
+#    endif /* U_DISABLE_RENAMING */
+
+#endif /* U_HIDE_INTERNAL_API */
+#endif /* UINTRNAL_H */
+

Deleted: MacRuby/trunk/icu-1060/unicode/uiter.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uiter.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uiter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,707 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 2002-2006, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  uiter.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2002jan18
-*   created by: Markus W. Scherer
-*/
-
-#ifndef __UITER_H__
-#define __UITER_H__
-
-/**
- * \file
- * \brief C API: Unicode Character Iteration
- *
- * @see UCharIterator
- */
-
-#include "unicode/utypes.h"
-
-#ifdef XP_CPLUSPLUS
-    U_NAMESPACE_BEGIN
-
-    class CharacterIterator;
-    class Replaceable;
-
-    U_NAMESPACE_END
-#endif
-
-U_CDECL_BEGIN
-
-struct UCharIterator;
-typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
-
-/**
- * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
- * @see UCharIteratorMove
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef enum UCharIteratorOrigin {
-    UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
-} UCharIteratorOrigin;
-
-/** Constants for UCharIterator. @stable ICU 2.6 */
-enum {
-    /**
-     * Constant value that may be returned by UCharIteratorMove
-     * indicating that the final UTF-16 index is not known, but that the move succeeded.
-     * This can occur when moving relative to limit or length, or
-     * when moving relative to the current index after a setState()
-     * when the current UTF-16 index is not known.
-     *
-     * It would be very inefficient to have to count from the beginning of the text
-     * just to get the current/limit/length index after moving relative to it.
-     * The actual index can be determined with getIndex(UITER_CURRENT)
-     * which will count the UChars if necessary.
-     *
-     * @stable ICU 2.6
-     */
-    UITER_UNKNOWN_INDEX=-2
-};
-
-
-/**
- * Constant for UCharIterator getState() indicating an error or
- * an unknown state.
- * Returned by uiter_getState()/UCharIteratorGetState
- * when an error occurs.
- * Also, some UCharIterator implementations may not be able to return
- * a valid state for each position. This will be clearly documented
- * for each such iterator (none of the public ones here).
- *
- * @stable ICU 2.6
- */
-#define UITER_NO_STATE ((uint32_t)0xffffffff)
-
-/**
- * Function type declaration for UCharIterator.getIndex().
- *
- * Gets the current position, or the start or limit of the
- * iteration range.
- *
- * This function may perform slowly for UITER_CURRENT after setState() was called,
- * or for UITER_LENGTH, because an iterator implementation may have to count
- * UChars if the underlying storage is not UTF-16.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @param origin get the 0, start, limit, length, or current index
- * @return the requested index, or U_SENTINEL in an error condition
- *
- * @see UCharIteratorOrigin
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef int32_t U_CALLCONV
-UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
-
-/**
- * Function type declaration for UCharIterator.move().
- *
- * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
- *
- * Moves the current position relative to the start or limit of the
- * iteration range, or relative to the current position itself.
- * The movement is expressed in numbers of code units forward
- * or backward by specifying a positive or negative delta.
- * Out of bounds movement will be pinned to the start or limit.
- *
- * This function may perform slowly for moving relative to UITER_LENGTH
- * because an iterator implementation may have to count the rest of the
- * UChars if the native storage is not UTF-16.
- *
- * When moving relative to the limit or length, or
- * relative to the current position after setState() was called,
- * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
- * determination of the actual UTF-16 index.
- * The actual index can be determined with getIndex(UITER_CURRENT)
- * which will count the UChars if necessary.
- * See UITER_UNKNOWN_INDEX for details.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @param delta can be positive, zero, or negative
- * @param origin move relative to the 0, start, limit, length, or current index
- * @return the new index, or U_SENTINEL on an error condition,
- *         or UITER_UNKNOWN_INDEX when the index is not known.
- *
- * @see UCharIteratorOrigin
- * @see UCharIterator
- * @see UITER_UNKNOWN_INDEX
- * @stable ICU 2.1
- */
-typedef int32_t U_CALLCONV
-UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
-
-/**
- * Function type declaration for UCharIterator.hasNext().
- *
- * Check if current() and next() can still
- * return another code unit.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return boolean value for whether current() and next() can still return another code unit
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef UBool U_CALLCONV
-UCharIteratorHasNext(UCharIterator *iter);
-
-/**
- * Function type declaration for UCharIterator.hasPrevious().
- *
- * Check if previous() can still return another code unit.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return boolean value for whether previous() can still return another code unit
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef UBool U_CALLCONV
-UCharIteratorHasPrevious(UCharIterator *iter);
- 
-/**
- * Function type declaration for UCharIterator.current().
- *
- * Return the code unit at the current position,
- * or U_SENTINEL if there is none (index is at the limit).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the current code unit
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef UChar32 U_CALLCONV
-UCharIteratorCurrent(UCharIterator *iter);
-
-/**
- * Function type declaration for UCharIterator.next().
- *
- * Return the code unit at the current index and increment
- * the index (post-increment, like s[i++]),
- * or return U_SENTINEL if there is none (index is at the limit).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the current code unit (and post-increment the current index)
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef UChar32 U_CALLCONV
-UCharIteratorNext(UCharIterator *iter);
-
-/**
- * Function type declaration for UCharIterator.previous().
- *
- * Decrement the index and return the code unit from there
- * (pre-decrement, like s[--i]),
- * or return U_SENTINEL if there is none (index is at the start).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the previous code unit (after pre-decrementing the current index)
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef UChar32 U_CALLCONV
-UCharIteratorPrevious(UCharIterator *iter);
-
-/**
- * Function type declaration for UCharIterator.reservedFn().
- * Reserved for future use.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @param something some integer argument
- * @return some integer
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-typedef int32_t U_CALLCONV
-UCharIteratorReserved(UCharIterator *iter, int32_t something);
-
-/**
- * Function type declaration for UCharIterator.getState().
- *
- * Get the "state" of the iterator in the form of a single 32-bit word.
- * It is recommended that the state value be calculated to be as small as
- * is feasible. For strings with limited lengths, fewer than 32 bits may
- * be sufficient.
- *
- * This is used together with setState()/UCharIteratorSetState
- * to save and restore the iterator position more efficiently than with
- * getIndex()/move().
- *
- * The iterator state is defined as a uint32_t value because it is designed
- * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
- * of the character iterator.
- *
- * With some UCharIterator implementations (e.g., UTF-8),
- * getting and setting the UTF-16 index with existing functions
- * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
- * relatively slow because the iterator has to "walk" from a known index
- * to the requested one.
- * This takes more time the farther it needs to go.
- *
- * An opaque state value allows an iterator implementation to provide
- * an internal index (UTF-8: the source byte array index) for
- * fast, constant-time restoration.
- *
- * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
- * the UTF-16 index may not be restored as well, but the iterator can deliver
- * the correct text contents and move relative to the current position
- * without performance degradation.
- *
- * Some UCharIterator implementations may not be able to return
- * a valid state for each position, in which case they return UITER_NO_STATE instead.
- * This will be clearly documented for each such iterator (none of the public ones here).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the state word
- *
- * @see UCharIterator
- * @see UCharIteratorSetState
- * @see UITER_NO_STATE
- * @stable ICU 2.6
- */
-typedef uint32_t U_CALLCONV
-UCharIteratorGetState(const UCharIterator *iter);
-
-/**
- * Function type declaration for UCharIterator.setState().
- *
- * Restore the "state" of the iterator using a state word from a getState() call.
- * The iterator object need not be the same one as for which getState() was called,
- * but it must be of the same type (set up using the same uiter_setXYZ function)
- * and it must iterate over the same string
- * (binary identical regardless of memory address).
- * For more about the state word see UCharIteratorGetState.
- *
- * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
- * the UTF-16 index may not be restored as well, but the iterator can deliver
- * the correct text contents and move relative to the current position
- * without performance degradation.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @param state the state word from a getState() call
- *              on a same-type, same-string iterator
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                   which must not indicate a failure before the function call.
- *
- * @see UCharIterator
- * @see UCharIteratorGetState
- * @stable ICU 2.6
- */
-typedef void U_CALLCONV
-UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
-
-
-/**
- * C API for code unit iteration.
- * This can be used as a C wrapper around
- * CharacterIterator, Replaceable, or implemented using simple strings, etc.
- *
- * There are two roles for using UCharIterator:
- *
- * A "provider" sets the necessary function pointers and controls the "protected"
- * fields of the UCharIterator structure. A "provider" passes a UCharIterator
- * into C APIs that need a UCharIterator as an abstract, flexible string interface.
- *
- * Implementations of such C APIs are "callers" of UCharIterator functions;
- * they only use the "public" function pointers and never access the "protected"
- * fields directly.
- *
- * The current() and next() functions only check the current index against the
- * limit, and previous() only checks the current index against the start,
- * to see if the iterator already reached the end of the iteration range.
- *
- * The assumption - in all iterators - is that the index is moved via the API,
- * which means it won't go out of bounds, or the index is modified by
- * user code that knows enough about the iterator implementation to set valid
- * index values.
- *
- * UCharIterator functions return code unit values 0..0xffff,
- * or U_SENTINEL if the iteration bounds are reached.
- *
- * @stable ICU 2.1
- */
-struct UCharIterator {
-    /**
-     * (protected) Pointer to string or wrapped object or similar.
-     * Not used by caller.
-     * @stable ICU 2.1
-     */
-    const void *context;
-
-    /**
-     * (protected) Length of string or similar.
-     * Not used by caller.
-     * @stable ICU 2.1
-     */
-    int32_t length;
-
-    /**
-     * (protected) Start index or similar.
-     * Not used by caller.
-     * @stable ICU 2.1
-     */
-    int32_t start;
-
-    /**
-     * (protected) Current index or similar.
-     * Not used by caller.
-     * @stable ICU 2.1
-     */
-    int32_t index;
-
-    /**
-     * (protected) Limit index or similar.
-     * Not used by caller.
-     * @stable ICU 2.1
-     */
-    int32_t limit;
-
-    /**
-     * (protected) Used by UTF-8 iterators and possibly others.
-     * @stable ICU 2.1
-     */
-    int32_t reservedField;
-
-    /**
-     * (public) Returns the current position or the
-     * start or limit index of the iteration range.
-     *
-     * @see UCharIteratorGetIndex
-     * @stable ICU 2.1
-     */
-    UCharIteratorGetIndex *getIndex;
-
-    /**
-     * (public) Moves the current position relative to the start or limit of the
-     * iteration range, or relative to the current position itself.
-     * The movement is expressed in numbers of code units forward
-     * or backward by specifying a positive or negative delta.
-     *
-     * @see UCharIteratorMove
-     * @stable ICU 2.1
-     */
-    UCharIteratorMove *move;
-
-    /**
-     * (public) Check if current() and next() can still
-     * return another code unit.
-     *
-     * @see UCharIteratorHasNext
-     * @stable ICU 2.1
-     */
-    UCharIteratorHasNext *hasNext;
-
-    /**
-     * (public) Check if previous() can still return another code unit.
-     *
-     * @see UCharIteratorHasPrevious
-     * @stable ICU 2.1
-     */
-    UCharIteratorHasPrevious *hasPrevious;
-
-    /**
-     * (public) Return the code unit at the current position,
-     * or U_SENTINEL if there is none (index is at the limit).
-     *
-     * @see UCharIteratorCurrent
-     * @stable ICU 2.1
-     */
-    UCharIteratorCurrent *current;
-
-    /**
-     * (public) Return the code unit at the current index and increment
-     * the index (post-increment, like s[i++]),
-     * or return U_SENTINEL if there is none (index is at the limit).
-     *
-     * @see UCharIteratorNext
-     * @stable ICU 2.1
-     */
-    UCharIteratorNext *next;
-
-    /**
-     * (public) Decrement the index and return the code unit from there
-     * (pre-decrement, like s[--i]),
-     * or return U_SENTINEL if there is none (index is at the start).
-     *
-     * @see UCharIteratorPrevious
-     * @stable ICU 2.1
-     */
-    UCharIteratorPrevious *previous;
-
-    /**
-     * (public) Reserved for future use. Currently NULL.
-     *
-     * @see UCharIteratorReserved
-     * @stable ICU 2.1
-     */
-    UCharIteratorReserved *reservedFn;
-
-    /**
-     * (public) Return the state of the iterator, to be restored later with setState().
-     * This function pointer is NULL if the iterator does not implement it.
-     *
-     * @see UCharIteratorGet
-     * @stable ICU 2.6
-     */
-    UCharIteratorGetState *getState;
-
-    /**
-     * (public) Restore the iterator state from the state word from a call
-     * to getState().
-     * This function pointer is NULL if the iterator does not implement it.
-     *
-     * @see UCharIteratorSet
-     * @stable ICU 2.6
-     */
-    UCharIteratorSetState *setState;
-};
-
-/**
- * Helper function for UCharIterator to get the code point
- * at the current index.
- *
- * Return the code point that includes the code unit at the current position,
- * or U_SENTINEL if there is none (index is at the limit).
- * If the current code unit is a lead or trail surrogate,
- * then the following or preceding surrogate is used to form
- * the code point value.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the current code point
- *
- * @see UCharIterator
- * @see U16_GET
- * @see UnicodeString::char32At()
- * @stable ICU 2.1
- */
-U_STABLE UChar32 U_EXPORT2
-uiter_current32(UCharIterator *iter);
-
-/**
- * Helper function for UCharIterator to get the next code point.
- *
- * Return the code point at the current index and increment
- * the index (post-increment, like s[i++]),
- * or return U_SENTINEL if there is none (index is at the limit).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the current code point (and post-increment the current index)
- *
- * @see UCharIterator
- * @see U16_NEXT
- * @stable ICU 2.1
- */
-U_STABLE UChar32 U_EXPORT2
-uiter_next32(UCharIterator *iter);
-
-/**
- * Helper function for UCharIterator to get the previous code point.
- *
- * Decrement the index and return the code point from there
- * (pre-decrement, like s[--i]),
- * or return U_SENTINEL if there is none (index is at the start).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the previous code point (after pre-decrementing the current index)
- *
- * @see UCharIterator
- * @see U16_PREV
- * @stable ICU 2.1
- */
-U_STABLE UChar32 U_EXPORT2
-uiter_previous32(UCharIterator *iter);
-
-/**
- * Get the "state" of the iterator in the form of a single 32-bit word.
- * This is a convenience function that calls iter->getState(iter)
- * if iter->getState is not NULL;
- * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
- *
- * Some UCharIterator implementations may not be able to return
- * a valid state for each position, in which case they return UITER_NO_STATE instead.
- * This will be clearly documented for each such iterator (none of the public ones here).
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @return the state word
- *
- * @see UCharIterator
- * @see UCharIteratorGetState
- * @see UITER_NO_STATE
- * @stable ICU 2.6
- */
-U_STABLE uint32_t U_EXPORT2
-uiter_getState(const UCharIterator *iter);
-
-/**
- * Restore the "state" of the iterator using a state word from a getState() call.
- * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
- * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
- *
- * @param iter the UCharIterator structure ("this pointer")
- * @param state the state word from a getState() call
- *              on a same-type, same-string iterator
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                   which must not indicate a failure before the function call.
- *
- * @see UCharIterator
- * @see UCharIteratorSetState
- * @stable ICU 2.6
- */
-U_STABLE void U_EXPORT2
-uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
-
-/**
- * Set up a UCharIterator to iterate over a string.
- *
- * Sets the UCharIterator function pointers for iteration over the string s
- * with iteration boundaries start=index=0 and length=limit=string length.
- * The "provider" may set the start, index, and limit values at any time
- * within the range 0..length.
- * The length field will be ignored.
- *
- * The string pointer s is set into UCharIterator.context without copying
- * or reallocating the string contents.
- *
- * getState() simply returns the current index.
- * move() will always return the final index.
- *
- * @param iter UCharIterator structure to be set for iteration
- * @param s String to iterate over
- * @param length Length of s, or -1 if NUL-terminated
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-U_STABLE void U_EXPORT2
-uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
-
-/**
- * Set up a UCharIterator to iterate over a UTF-16BE string
- * (byte vector with a big-endian pair of bytes per UChar).
- *
- * Everything works just like with a normal UChar iterator (uiter_setString),
- * except that UChars are assembled from byte pairs,
- * and that the length argument here indicates an even number of bytes.
- *
- * getState() simply returns the current index.
- * move() will always return the final index.
- *
- * @param iter UCharIterator structure to be set for iteration
- * @param s UTF-16BE string to iterate over
- * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
- *               (NUL means pair of 0 bytes at even index from s)
- *
- * @see UCharIterator
- * @see uiter_setString
- * @stable ICU 2.6
- */
-U_STABLE void U_EXPORT2
-uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
-
-/**
- * Set up a UCharIterator to iterate over a UTF-8 string.
- *
- * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
- * with UTF-8 iteration boundaries 0 and length.
- * The implementation counts the UTF-16 index on the fly and
- * lazily evaluates the UTF-16 length of the text.
- *
- * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
- * When the reservedField is not 0, then it contains a supplementary code point
- * and the UTF-16 index is between the two corresponding surrogates.
- * At that point, the UTF-8 index is behind that code point.
- *
- * The UTF-8 string pointer s is set into UCharIterator.context without copying
- * or reallocating the string contents.
- *
- * getState() returns a state value consisting of
- * - the current UTF-8 source byte index (bits 31..1)
- * - a flag (bit 0) that indicates whether the UChar position is in the middle
- *   of a surrogate pair
- *   (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
- *
- * getState() cannot also encode the UTF-16 index in the state value.
- * move(relative to limit or length), or
- * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
- *
- * @param iter UCharIterator structure to be set for iteration
- * @param s UTF-8 string to iterate over
- * @param length Length of s in bytes, or -1 if NUL-terminated
- *
- * @see UCharIterator
- * @stable ICU 2.6
- */
-U_STABLE void U_EXPORT2
-uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
-
-#ifdef XP_CPLUSPLUS
-
-/**
- * Set up a UCharIterator to wrap around a C++ CharacterIterator.
- *
- * Sets the UCharIterator function pointers for iteration using the
- * CharacterIterator charIter.
- *
- * The CharacterIterator pointer charIter is set into UCharIterator.context
- * without copying or cloning the CharacterIterator object.
- * The other "protected" UCharIterator fields are set to 0 and will be ignored.
- * The iteration index and boundaries are controlled by the CharacterIterator.
- *
- * getState() simply returns the current index.
- * move() will always return the final index.
- *
- * @param iter UCharIterator structure to be set for iteration
- * @param charIter CharacterIterator to wrap
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-U_STABLE void U_EXPORT2
-uiter_setCharacterIterator(UCharIterator *iter, U_NAMESPACE_QUALIFIER CharacterIterator *charIter);
-
-/**
- * Set up a UCharIterator to iterate over a C++ Replaceable.
- *
- * Sets the UCharIterator function pointers for iteration over the
- * Replaceable rep with iteration boundaries start=index=0 and
- * length=limit=rep->length().
- * The "provider" may set the start, index, and limit values at any time
- * within the range 0..length=rep->length().
- * The length field will be ignored.
- *
- * The Replaceable pointer rep is set into UCharIterator.context without copying
- * or cloning/reallocating the Replaceable object.
- *
- * getState() simply returns the current index.
- * move() will always return the final index.
- *
- * @param iter UCharIterator structure to be set for iteration
- * @param rep Replaceable to iterate over
- *
- * @see UCharIterator
- * @stable ICU 2.1
- */
-U_STABLE void U_EXPORT2
-uiter_setReplaceable(UCharIterator *iter, const U_NAMESPACE_QUALIFIER Replaceable *rep);
-
-#endif
-
-U_CDECL_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/uiter.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uiter.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uiter.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uiter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,707 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2002-2006, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uiter.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002jan18
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UITER_H__
+#define __UITER_H__
+
+/**
+ * \file
+ * \brief C API: Unicode Character Iteration
+ *
+ * @see UCharIterator
+ */
+
+#include "unicode/utypes.h"
+
+#ifdef XP_CPLUSPLUS
+    U_NAMESPACE_BEGIN
+
+    class CharacterIterator;
+    class Replaceable;
+
+    U_NAMESPACE_END
+#endif
+
+U_CDECL_BEGIN
+
+struct UCharIterator;
+typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
+
+/**
+ * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
+ * @see UCharIteratorMove
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef enum UCharIteratorOrigin {
+    UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
+} UCharIteratorOrigin;
+
+/** Constants for UCharIterator. @stable ICU 2.6 */
+enum {
+    /**
+     * Constant value that may be returned by UCharIteratorMove
+     * indicating that the final UTF-16 index is not known, but that the move succeeded.
+     * This can occur when moving relative to limit or length, or
+     * when moving relative to the current index after a setState()
+     * when the current UTF-16 index is not known.
+     *
+     * It would be very inefficient to have to count from the beginning of the text
+     * just to get the current/limit/length index after moving relative to it.
+     * The actual index can be determined with getIndex(UITER_CURRENT)
+     * which will count the UChars if necessary.
+     *
+     * @stable ICU 2.6
+     */
+    UITER_UNKNOWN_INDEX=-2
+};
+
+
+/**
+ * Constant for UCharIterator getState() indicating an error or
+ * an unknown state.
+ * Returned by uiter_getState()/UCharIteratorGetState
+ * when an error occurs.
+ * Also, some UCharIterator implementations may not be able to return
+ * a valid state for each position. This will be clearly documented
+ * for each such iterator (none of the public ones here).
+ *
+ * @stable ICU 2.6
+ */
+#define UITER_NO_STATE ((uint32_t)0xffffffff)
+
+/**
+ * Function type declaration for UCharIterator.getIndex().
+ *
+ * Gets the current position, or the start or limit of the
+ * iteration range.
+ *
+ * This function may perform slowly for UITER_CURRENT after setState() was called,
+ * or for UITER_LENGTH, because an iterator implementation may have to count
+ * UChars if the underlying storage is not UTF-16.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param origin get the 0, start, limit, length, or current index
+ * @return the requested index, or U_SENTINEL in an error condition
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.move().
+ *
+ * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
+ *
+ * Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * Out of bounds movement will be pinned to the start or limit.
+ *
+ * This function may perform slowly for moving relative to UITER_LENGTH
+ * because an iterator implementation may have to count the rest of the
+ * UChars if the native storage is not UTF-16.
+ *
+ * When moving relative to the limit or length, or
+ * relative to the current position after setState() was called,
+ * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
+ * determination of the actual UTF-16 index.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ * See UITER_UNKNOWN_INDEX for details.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param delta can be positive, zero, or negative
+ * @param origin move relative to the 0, start, limit, length, or current index
+ * @return the new index, or U_SENTINEL on an error condition,
+ *         or UITER_UNKNOWN_INDEX when the index is not known.
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @see UITER_UNKNOWN_INDEX
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.hasNext().
+ *
+ * Check if current() and next() can still
+ * return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether current() and next() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.hasPrevious().
+ *
+ * Check if previous() can still return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether previous() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasPrevious(UCharIterator *iter);
+ 
+/**
+ * Function type declaration for UCharIterator.current().
+ *
+ * Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorCurrent(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.next().
+ *
+ * Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.previous().
+ *
+ * Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code unit (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.reservedFn().
+ * Reserved for future use.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param something some integer argument
+ * @return some integer
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorReserved(UCharIterator *iter, int32_t something);
+
+/**
+ * Function type declaration for UCharIterator.getState().
+ *
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * It is recommended that the state value be calculated to be as small as
+ * is feasible. For strings with limited lengths, fewer than 32 bits may
+ * be sufficient.
+ *
+ * This is used together with setState()/UCharIteratorSetState
+ * to save and restore the iterator position more efficiently than with
+ * getIndex()/move().
+ *
+ * The iterator state is defined as a uint32_t value because it is designed
+ * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
+ * of the character iterator.
+ *
+ * With some UCharIterator implementations (e.g., UTF-8),
+ * getting and setting the UTF-16 index with existing functions
+ * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
+ * relatively slow because the iterator has to "walk" from a known index
+ * to the requested one.
+ * This takes more time the farther it needs to go.
+ *
+ * An opaque state value allows an iterator implementation to provide
+ * an internal index (UTF-8: the source byte array index) for
+ * fast, constant-time restoration.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+typedef uint32_t U_CALLCONV
+UCharIteratorGetState(const UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.setState().
+ *
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * The iterator object need not be the same one as for which getState() was called,
+ * but it must be of the same type (set up using the same uiter_setXYZ function)
+ * and it must iterate over the same string
+ * (binary identical regardless of memory address).
+ * For more about the state word see UCharIteratorGetState.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ *              on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                   which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @stable ICU 2.6
+ */
+typedef void U_CALLCONV
+UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+
+/**
+ * C API for code unit iteration.
+ * This can be used as a C wrapper around
+ * CharacterIterator, Replaceable, or implemented using simple strings, etc.
+ *
+ * There are two roles for using UCharIterator:
+ *
+ * A "provider" sets the necessary function pointers and controls the "protected"
+ * fields of the UCharIterator structure. A "provider" passes a UCharIterator
+ * into C APIs that need a UCharIterator as an abstract, flexible string interface.
+ *
+ * Implementations of such C APIs are "callers" of UCharIterator functions;
+ * they only use the "public" function pointers and never access the "protected"
+ * fields directly.
+ *
+ * The current() and next() functions only check the current index against the
+ * limit, and previous() only checks the current index against the start,
+ * to see if the iterator already reached the end of the iteration range.
+ *
+ * The assumption - in all iterators - is that the index is moved via the API,
+ * which means it won't go out of bounds, or the index is modified by
+ * user code that knows enough about the iterator implementation to set valid
+ * index values.
+ *
+ * UCharIterator functions return code unit values 0..0xffff,
+ * or U_SENTINEL if the iteration bounds are reached.
+ *
+ * @stable ICU 2.1
+ */
+struct UCharIterator {
+    /**
+     * (protected) Pointer to string or wrapped object or similar.
+     * Not used by caller.
+     * @stable ICU 2.1
+     */
+    const void *context;
+
+    /**
+     * (protected) Length of string or similar.
+     * Not used by caller.
+     * @stable ICU 2.1
+     */
+    int32_t length;
+
+    /**
+     * (protected) Start index or similar.
+     * Not used by caller.
+     * @stable ICU 2.1
+     */
+    int32_t start;
+
+    /**
+     * (protected) Current index or similar.
+     * Not used by caller.
+     * @stable ICU 2.1
+     */
+    int32_t index;
+
+    /**
+     * (protected) Limit index or similar.
+     * Not used by caller.
+     * @stable ICU 2.1
+     */
+    int32_t limit;
+
+    /**
+     * (protected) Used by UTF-8 iterators and possibly others.
+     * @stable ICU 2.1
+     */
+    int32_t reservedField;
+
+    /**
+     * (public) Returns the current position or the
+     * start or limit index of the iteration range.
+     *
+     * @see UCharIteratorGetIndex
+     * @stable ICU 2.1
+     */
+    UCharIteratorGetIndex *getIndex;
+
+    /**
+     * (public) Moves the current position relative to the start or limit of the
+     * iteration range, or relative to the current position itself.
+     * The movement is expressed in numbers of code units forward
+     * or backward by specifying a positive or negative delta.
+     *
+     * @see UCharIteratorMove
+     * @stable ICU 2.1
+     */
+    UCharIteratorMove *move;
+
+    /**
+     * (public) Check if current() and next() can still
+     * return another code unit.
+     *
+     * @see UCharIteratorHasNext
+     * @stable ICU 2.1
+     */
+    UCharIteratorHasNext *hasNext;
+
+    /**
+     * (public) Check if previous() can still return another code unit.
+     *
+     * @see UCharIteratorHasPrevious
+     * @stable ICU 2.1
+     */
+    UCharIteratorHasPrevious *hasPrevious;
+
+    /**
+     * (public) Return the code unit at the current position,
+     * or U_SENTINEL if there is none (index is at the limit).
+     *
+     * @see UCharIteratorCurrent
+     * @stable ICU 2.1
+     */
+    UCharIteratorCurrent *current;
+
+    /**
+     * (public) Return the code unit at the current index and increment
+     * the index (post-increment, like s[i++]),
+     * or return U_SENTINEL if there is none (index is at the limit).
+     *
+     * @see UCharIteratorNext
+     * @stable ICU 2.1
+     */
+    UCharIteratorNext *next;
+
+    /**
+     * (public) Decrement the index and return the code unit from there
+     * (pre-decrement, like s[--i]),
+     * or return U_SENTINEL if there is none (index is at the start).
+     *
+     * @see UCharIteratorPrevious
+     * @stable ICU 2.1
+     */
+    UCharIteratorPrevious *previous;
+
+    /**
+     * (public) Reserved for future use. Currently NULL.
+     *
+     * @see UCharIteratorReserved
+     * @stable ICU 2.1
+     */
+    UCharIteratorReserved *reservedFn;
+
+    /**
+     * (public) Return the state of the iterator, to be restored later with setState().
+     * This function pointer is NULL if the iterator does not implement it.
+     *
+     * @see UCharIteratorGet
+     * @stable ICU 2.6
+     */
+    UCharIteratorGetState *getState;
+
+    /**
+     * (public) Restore the iterator state from the state word from a call
+     * to getState().
+     * This function pointer is NULL if the iterator does not implement it.
+     *
+     * @see UCharIteratorSet
+     * @stable ICU 2.6
+     */
+    UCharIteratorSetState *setState;
+};
+
+/**
+ * Helper function for UCharIterator to get the code point
+ * at the current index.
+ *
+ * Return the code point that includes the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ * If the current code unit is a lead or trail surrogate,
+ * then the following or preceding surrogate is used to form
+ * the code point value.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point
+ *
+ * @see UCharIterator
+ * @see U16_GET
+ * @see UnicodeString::char32At()
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_current32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the next code point.
+ *
+ * Return the code point at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @see U16_NEXT
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_next32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the previous code point.
+ *
+ * Decrement the index and return the code point from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code point (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @see U16_PREV
+ * @stable ICU 2.1
+ */
+U_STABLE UChar32 U_EXPORT2
+uiter_previous32(UCharIterator *iter);
+
+/**
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * This is a convenience function that calls iter->getState(iter)
+ * if iter->getState is not NULL;
+ * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+U_STABLE uint32_t U_EXPORT2
+uiter_getState(const UCharIterator *iter);
+
+/**
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
+ * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ *              on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                   which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+/**
+ * Set up a UCharIterator to iterate over a string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the string s
+ * with iteration boundaries start=index=0 and length=limit=string length.
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length.
+ * The length field will be ignored.
+ *
+ * The string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s String to iterate over
+ * @param length Length of s, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-16BE string
+ * (byte vector with a big-endian pair of bytes per UChar).
+ *
+ * Everything works just like with a normal UChar iterator (uiter_setString),
+ * except that UChars are assembled from byte pairs,
+ * and that the length argument here indicates an even number of bytes.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-16BE string to iterate over
+ * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
+ *               (NUL means pair of 0 bytes at even index from s)
+ *
+ * @see UCharIterator
+ * @see uiter_setString
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-8 string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
+ * with UTF-8 iteration boundaries 0 and length.
+ * The implementation counts the UTF-16 index on the fly and
+ * lazily evaluates the UTF-16 length of the text.
+ *
+ * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
+ * When the reservedField is not 0, then it contains a supplementary code point
+ * and the UTF-16 index is between the two corresponding surrogates.
+ * At that point, the UTF-8 index is behind that code point.
+ *
+ * The UTF-8 string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() returns a state value consisting of
+ * - the current UTF-8 source byte index (bits 31..1)
+ * - a flag (bit 0) that indicates whether the UChar position is in the middle
+ *   of a surrogate pair
+ *   (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
+ *
+ * getState() cannot also encode the UTF-16 index in the state value.
+ * move(relative to limit or length), or
+ * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-8 string to iterate over
+ * @param length Length of s in bytes, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
+
+#ifdef XP_CPLUSPLUS
+
+/**
+ * Set up a UCharIterator to wrap around a C++ CharacterIterator.
+ *
+ * Sets the UCharIterator function pointers for iteration using the
+ * CharacterIterator charIter.
+ *
+ * The CharacterIterator pointer charIter is set into UCharIterator.context
+ * without copying or cloning the CharacterIterator object.
+ * The other "protected" UCharIterator fields are set to 0 and will be ignored.
+ * The iteration index and boundaries are controlled by the CharacterIterator.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param charIter CharacterIterator to wrap
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setCharacterIterator(UCharIterator *iter, U_NAMESPACE_QUALIFIER CharacterIterator *charIter);
+
+/**
+ * Set up a UCharIterator to iterate over a C++ Replaceable.
+ *
+ * Sets the UCharIterator function pointers for iteration over the
+ * Replaceable rep with iteration boundaries start=index=0 and
+ * length=limit=rep->length().
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length=rep->length().
+ * The length field will be ignored.
+ *
+ * The Replaceable pointer rep is set into UCharIterator.context without copying
+ * or cloning/reallocating the Replaceable object.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param rep Replaceable to iterate over
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_STABLE void U_EXPORT2
+uiter_setReplaceable(UCharIterator *iter, const U_NAMESPACE_QUALIFIER Replaceable *rep);
+
+#endif
+
+U_CDECL_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/uloc.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uloc.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uloc.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1046 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1997-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*
-* File ULOC.H
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   04/01/97    aliu        Creation.
-*   08/22/98    stephen     JDK 1.2 sync.
-*   12/08/98    rtg         New C API for Locale
-*   03/30/99    damiba      overhaul
-*   03/31/99    helena      Javadoc for uloc functions.
-*   04/15/99    Madhu       Updated Javadoc
-********************************************************************************
-*/
-
-#ifndef ULOC_H
-#define ULOC_H
-
-#include "unicode/utypes.h"
-#include "unicode/uenum.h"
-
-/**    
- * \file
- * \brief  C API: Locale 
- *
- * <h2> ULoc C API for Locale </h2>
- * A <code>Locale</code> represents a specific geographical, political,
- * or cultural region. An operation that requires a <code>Locale</code> to perform
- * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
- * to tailor information for the user. For example, displaying a number
- * is a locale-sensitive operation--the number should be formatted
- * according to the customs/conventions of the user's native country,
- * region, or culture.  In the C APIs, a locales is simply a const char string.
- *
- * <P>
- * You create a <code>Locale</code> with one of the three options listed below.
- * Each of the component is separated by '_' in the locale string.
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * \code
- *       newLanguage
- * 
- *       newLanguage + newCountry
- * 
- *       newLanguage + newCountry + newVariant
- * \endcode
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- * The first option is a valid <STRONG>ISO
- * Language Code.</STRONG> These codes are the lower-case two-letter
- * codes as defined by ISO-639.
- * You can find a full list of these codes at a number of sites, such as:
- * <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
- * http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</a>
- *
- * <P>
- * The second option includes an additonal <STRONG>ISO Country
- * Code.</STRONG> These codes are the upper-case two-letter codes
- * as defined by ISO-3166.
- * You can find a full list of these codes at a number of sites, such as:
- * <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
- * http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</a>
- *
- * <P>
- * The third option requires another additonal information--the 
- * <STRONG>Variant.</STRONG>
- * The Variant codes are vendor and browser-specific.
- * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
- * Where there are two variants, separate them with an underscore, and
- * put the most important one first. For
- * example, a Traditional Spanish collation might be referenced, with
- * "ES", "ES", "Traditional_WIN".
- *
- * <P>
- * Because a <code>Locale</code> is just an identifier for a region,
- * no validity check is performed when you specify a <code>Locale</code>.
- * If you want to see whether particular resources are available for the
- * <code>Locale</code> you asked for, you must query those resources. For
- * example, ask the <code>UNumberFormat</code> for the locales it supports
- * using its <code>getAvailable</code> method.
- * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
- * locale, you get back the best available match, not necessarily
- * precisely what you asked for. For more information, look at
- * <code>UResourceBundle</code>.
- *
- * <P>
- * The <code>Locale</code> provides a number of convenient constants
- * that you can use to specify the commonly used
- * locales. For example, the following refers to a locale
- * for the United States:
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * \code
- *       ULOC_US
- * \endcode
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- *
- * <P>
- * Once you've specified a locale you can query it for information about
- * itself. Use <code>uloc_getCountry</code> to get the ISO Country Code and
- * <code>uloc_getLanguage</code> to get the ISO Language Code. You can
- * use <code>uloc_getDisplayCountry</code> to get the
- * name of the country suitable for displaying to the user. Similarly,
- * you can use <code>uloc_getDisplayLanguage</code> to get the name of
- * the language suitable for displaying to the user. Interestingly,
- * the <code>uloc_getDisplayXXX</code> methods are themselves locale-sensitive
- * and have two versions: one that uses the default locale and one
- * that takes a locale as an argument and displays the name or country in
- * a language appropriate to that locale.
- *
- * <P>
- * The ICU provides a number of services that perform locale-sensitive
- * operations. For example, the <code>unum_xxx</code> functions format
- * numbers, currency, or percentages in a locale-sensitive manner. 
- * </P>
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * \code
- *     UErrorCode success = U_ZERO_ERROR;
- *     UNumberFormat *nf;
- *     const char* myLocale = "fr_FR";
- * 
- *     nf = unum_open( UNUM_DEFAULT, NULL, success );          
- *     unum_close(nf);
- *     nf = unum_open( UNUM_CURRENCY, NULL, success );
- *     unum_close(nf);
- *     nf = unum_open( UNUM_PERCENT, NULL, success );   
- *     unum_close(nf);
- * \endcode
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- * Each of these methods has two variants; one with an explicit locale
- * and one without; the latter using the default locale.
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * \code 
- * 
- *     nf = unum_open( UNUM_DEFAULT, myLocale, success );          
- *     unum_close(nf);
- *     nf = unum_open( UNUM_CURRENCY, myLocale, success );
- *     unum_close(nf);
- *     nf = unum_open( UNUM_PERCENT, myLocale, success );   
- *     unum_close(nf);
- * \endcode
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- * A <code>Locale</code> is the mechanism for identifying the kind of services
- * (<code>UNumberFormat</code>) that you would like to get. The locale is
- * <STRONG>just</STRONG> a mechanism for identifying these services.
- *
- * <P>
- * Each international serivce that performs locale-sensitive operations 
- * allows you
- * to get all the available objects of that type. You can sift
- * through these objects by language, country, or variant,
- * and use the display names to present a menu to the user.
- * For example, you can create a menu of all the collation objects
- * suitable for a given language. Such classes implement these
- * three class methods:
- * \htmlonly<blockquote>\endhtmlonly
- * <pre>
- * \code
- *       const char* uloc_getAvailable(int32_t index);
- *       int32_t uloc_countAvailable();
- *       int32_t
- *       uloc_getDisplayName(const char* localeID,
- *                 const char* inLocaleID, 
- *                 UChar* result,
- *                 int32_t maxResultSize,
- *                  UErrorCode* err);
- * 
- * \endcode
- * </pre>
- * \htmlonly</blockquote>\endhtmlonly
- * <P>
- * Concerning POSIX/RFC1766 Locale IDs, 
- *  the getLanguage/getCountry/getVariant/getName functions do understand
- * the POSIX type form of  language_COUNTRY.ENCODING\@VARIANT
- * and if there is not an ICU-stype variant, uloc_getVariant() for example
- * will return the one listed after the \@at sign. As well, the hyphen
- * "-" is recognized as a country/variant separator similarly to RFC1766.
- * So for example, "en-us" will be interpreted as en_US.  
- * As a result, uloc_getName() is far from a no-op, and will have the
- * effect of converting POSIX/RFC1766 IDs into ICU form, although it does
- * NOT map any of the actual codes (i.e. russian->ru) in any way.
- * Applications should call uloc_getName() at the point where a locale ID
- * is coming from an external source (user entry, OS, web browser)
- * and pass the resulting string to other ICU functions.  For example,
- * don't use de-de\@EURO as an argument to resourcebundle.
- *
- * @see UResourceBundle
- */
-
-/** Useful constant for this language. @stable ICU 2.0 */
-#define ULOC_CHINESE            "zh"
-/** Useful constant for this language. @stable ICU 2.0 */
-#define ULOC_ENGLISH            "en"
-/** Useful constant for this language. @stable ICU 2.0 */
-#define ULOC_FRENCH             "fr"
-/** Useful constant for this language. @stable ICU 2.0 */
-#define ULOC_GERMAN             "de"
-/** Useful constant for this language. @stable ICU 2.0 */
-#define ULOC_ITALIAN            "it"
-/** Useful constant for this language. @stable ICU 2.0 */
-#define ULOC_JAPANESE           "ja"
-/** Useful constant for this language. @stable ICU 2.0 */
-#define ULOC_KOREAN             "ko"
-/** Useful constant for this language. @stable ICU 2.0 */
-#define ULOC_SIMPLIFIED_CHINESE "zh_CN"
-/** Useful constant for this language. @stable ICU 2.0 */
-#define ULOC_TRADITIONAL_CHINESE "zh_TW"
-
-/** Useful constant for this country/region. @stable ICU 2.0 */
-#define ULOC_CANADA         "en_CA"
-/** Useful constant for this country/region. @stable ICU 2.0 */
-#define ULOC_CANADA_FRENCH  "fr_CA"
-/** Useful constant for this country/region. @stable ICU 2.0 */
-#define ULOC_CHINA          "zh_CN"
-/** Useful constant for this country/region. @stable ICU 2.0 */
-#define ULOC_PRC            "zh_CN"
-/** Useful constant for this country/region. @stable ICU 2.0 */
-#define ULOC_FRANCE         "fr_FR"
-/** Useful constant for this country/region. @stable ICU 2.0 */
-#define ULOC_GERMANY        "de_DE"
-/** Useful constant for this country/region. @stable ICU 2.0 */
-#define ULOC_ITALY          "it_IT"
-/** Useful constant for this country/region. @stable ICU 2.0 */
-#define ULOC_JAPAN          "ja_JP"
-/** Useful constant for this country/region. @stable ICU 2.0 */
-#define ULOC_KOREA          "ko_KR"
-/** Useful constant for this country/region. @stable ICU 2.0 */
-#define ULOC_TAIWAN         "zh_TW"
-/** Useful constant for this country/region. @stable ICU 2.0 */
-#define ULOC_UK             "en_GB"
-/** Useful constant for this country/region. @stable ICU 2.0 */
-#define ULOC_US             "en_US"
-
-/**
- * Useful constant for the maximum size of the language part of a locale ID.
- * (including the terminating NULL).
- * @stable ICU 2.0
- */
-#define ULOC_LANG_CAPACITY 12
-
-/**
- * Useful constant for the maximum size of the country part of a locale ID
- * (including the terminating NULL).
- * @stable ICU 2.0
- */
-#define ULOC_COUNTRY_CAPACITY 4
-/**
- * Useful constant for the maximum size of the whole locale ID
- * (including the terminating NULL).
- * @stable ICU 2.0
- */
-#define ULOC_FULLNAME_CAPACITY 56
-
-/**
- * Useful constant for the maximum size of the script part of a locale ID
- * (including the terminating NULL).
- * @stable ICU 2.8
- */
-#define ULOC_SCRIPT_CAPACITY 6
-
-/**
- * Useful constant for the maximum size of keywords in a locale
- * @stable ICU 2.8
- */
-#define ULOC_KEYWORDS_CAPACITY 50
-
-/**
- * Useful constant for the maximum SIZE of keywords in a locale
- * @stable ICU 2.8
- */
-#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
-
-/**
- * Character separating keywords from the locale string
- * different for EBCDIC - TODO
- * @stable ICU 2.8
- */
-#define ULOC_KEYWORD_SEPARATOR '@'
-/**
- * Character for assigning value to a keyword
- * @stable ICU 2.8
- */
-#define ULOC_KEYWORD_ASSIGN '='
-/**
- * Character separating keywords
- * @stable ICU 2.8
- */
-#define ULOC_KEYWORD_ITEM_SEPARATOR ';'
-
-/**
- * Constants for *_getLocale()
- * Allow user to select whether she wants information on 
- * requested, valid or actual locale.
- * For example, a collator for "en_US_CALIFORNIA" was
- * requested. In the current state of ICU (2.0), 
- * the requested locale is "en_US_CALIFORNIA",
- * the valid locale is "en_US" (most specific locale supported by ICU)
- * and the actual locale is "root" (the collation data comes unmodified 
- * from the UCA)
- * The locale is considered supported by ICU if there is a core ICU bundle 
- * for that locale (although it may be empty).
- * @stable ICU 2.1
- */
-typedef enum {
-  /** This is locale the data actually comes from 
-   * @stable ICU 2.1
-   */
-  ULOC_ACTUAL_LOCALE    = 0,
-  /** This is the most specific locale supported by ICU 
-   * @stable ICU 2.1
-   */
-  ULOC_VALID_LOCALE    = 1,
-
-#ifndef U_HIDE_DEPRECATED_API
-  /** This is the requested locale
-   *  @deprecated ICU 2.8 
-   */
-  ULOC_REQUESTED_LOCALE = 2,
-#endif /* U_HIDE_DEPRECATED_API */
-
-  ULOC_DATA_LOCALE_TYPE_LIMIT = 3
-} ULocDataLocaleType ;
-
-
-/**
- * Gets ICU's default locale.  
- * The returned string is a snapshot in time, and will remain valid
- *   and unchanged even when uloc_setDefault() is called.
- *   The returned storage is owned by ICU, and must not be altered or deleted
- *   by the caller.
- *  
- * @return the ICU default locale
- * @system
- * @stable ICU 2.0
- */
-U_STABLE const char* U_EXPORT2
-uloc_getDefault(void);
-
-/**
- * Sets ICU's default locale.  
- *    By default (without calling this function), ICU's default locale will be based
- *    on information obtained from the underlying system environment.
- *    <p>
- *    Changes to ICU's default locale do not propagate back to the
- *    system environment.
- *    <p>
- *    Changes to ICU's default locale to not affect any ICU services that
- *    may already be open based on the previous default locale value.
- *
- * @param localeID the new ICU default locale. A value of NULL will try to get
- *                 the system's default locale.
- * @param status the error information if the setting of default locale fails
- * @system
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-uloc_setDefault(const char* localeID,
-        UErrorCode*       status);
-
-/**
- * Gets the language code for the specified locale.
- *
- * @param localeID the locale to get the ISO language code with
- * @param language the language code for localeID
- * @param languageCapacity the size of the language buffer to store the  
- * language code with
- * @param err error information if retrieving the language code failed
- * @return the actual buffer size needed for the language code.  If it's greater 
- * than languageCapacity, the returned language code will be truncated.  
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getLanguage(const char*    localeID,
-         char* language,
-         int32_t languageCapacity,
-         UErrorCode* err);
-
-/**
- * Gets the script code for the specified locale.
- *
- * @param localeID the locale to get the ISO language code with
- * @param script the language code for localeID
- * @param scriptCapacity the size of the language buffer to store the  
- * language code with
- * @param err error information if retrieving the language code failed
- * @return the actual buffer size needed for the language code.  If it's greater 
- * than scriptCapacity, the returned language code will be truncated.  
- * @stable ICU 2.8
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getScript(const char*    localeID,
-         char* script,
-         int32_t scriptCapacity,
-         UErrorCode* err);
-
-/**
- * Gets the  country code for the specified locale.
- *
- * @param localeID the locale to get the country code with
- * @param country the country code for localeID
- * @param countryCapacity the size of the country buffer to store the  
- * country code with
- * @param err error information if retrieving the country code failed
- * @return the actual buffer size needed for the country code.  If it's greater 
- * than countryCapacity, the returned country code will be truncated.  
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getCountry(const char*    localeID,
-        char* country,
-        int32_t countryCapacity,
-        UErrorCode* err);
-
-/**
- * Gets the variant code for the specified locale.
- *
- * @param localeID the locale to get the variant code with
- * @param variant the variant code for localeID
- * @param variantCapacity the size of the variant buffer to store the 
- * variant code with
- * @param err error information if retrieving the variant code failed
- * @return the actual buffer size needed for the variant code.  If it's greater 
- * than variantCapacity, the returned variant code will be truncated.  
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getVariant(const char*    localeID,
-        char* variant,
-        int32_t variantCapacity,
-        UErrorCode* err);
-
-
-/**
- * Gets the full name for the specified locale.
- * Note: This has the effect of 'canonicalizing' the ICU locale ID to
- * a certain extent. Upper and lower case are set as needed.
- * It does NOT map aliased names in any way.
- * See the top of this header file.
- * This API supports preflighting.
- *
- * @param localeID the locale to get the full name with
- * @param name fill in buffer for the name without keywords.
- * @param nameCapacity capacity of the fill in buffer.
- * @param err error information if retrieving the full name failed
- * @return the actual buffer size needed for the full name.  If it's greater 
- * than nameCapacity, the returned full name will be truncated.  
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getName(const char*    localeID,
-         char* name,
-         int32_t nameCapacity,
-         UErrorCode* err);
-
-/**
- * Gets the full name for the specified locale.
- * Note: This has the effect of 'canonicalizing' the string to
- * a certain extent. Upper and lower case are set as needed,
- * and if the components were in 'POSIX' format they are changed to
- * ICU format.  It does NOT map aliased names in any way.
- * See the top of this header file.
- *
- * @param localeID the locale to get the full name with
- * @param name the full name for localeID
- * @param nameCapacity the size of the name buffer to store the 
- * full name with
- * @param err error information if retrieving the full name failed
- * @return the actual buffer size needed for the full name.  If it's greater 
- * than nameCapacity, the returned full name will be truncated.  
- * @stable ICU 2.8
- */
-U_STABLE int32_t U_EXPORT2
-uloc_canonicalize(const char*    localeID,
-         char* name,
-         int32_t nameCapacity,
-         UErrorCode* err);
-
-/**
- * Gets the ISO language code for the specified locale.
- *
- * @param localeID the locale to get the ISO language code with
- * @return language the ISO language code for localeID
- * @stable ICU 2.0
- */
-U_STABLE const char* U_EXPORT2
-uloc_getISO3Language(const char* localeID);
-
-
-/**
- * Gets the ISO country code for the specified locale.
- *
- * @param localeID the locale to get the ISO country code with
- * @return country the ISO country code for localeID
- * @stable ICU 2.0
- */
-U_STABLE const char* U_EXPORT2
-uloc_getISO3Country(const char* localeID);
-
-/**
- * Gets the Win32 LCID value for the specified locale.
- * If the ICU locale is not recognized by Windows, 0 will be returned.
- *
- * @param localeID the locale to get the Win32 LCID value with
- * @return country the Win32 LCID for localeID
- * @stable ICU 2.0
- */
-U_STABLE uint32_t U_EXPORT2
-uloc_getLCID(const char* localeID);
-
-/**
- * Gets the language name suitable for display for the specified locale.
- *
- * @param locale the locale to get the ISO language code with
- * @param displayLocale Specifies the locale to be used to display the name.  In other words,
- *                 if the locale's language code is "en", passing Locale::getFrench() for
- *                 inLocale would result in "Anglais", while passing Locale::getGerman()
- *                 for inLocale would result in "Englisch".
- * @param language the displayable language code for localeID
- * @param languageCapacity the size of the language buffer to store the  
- * displayable language code with
- * @param status error information if retrieving the displayable language code failed
- * @return the actual buffer size needed for the displayable language code.  If it's greater 
- * than languageCapacity, the returned language code will be truncated.  
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getDisplayLanguage(const char* locale,
-            const char* displayLocale,
-            UChar* language,
-            int32_t languageCapacity,
-            UErrorCode* status);
-
-/**
- * Gets the script name suitable for display for the specified locale.
- *
- * @param locale the locale to get the displayable script code with. NULL may be used to specify the default.
- * @param displayLocale Specifies the locale to be used to display the name.  In other words,
- *                 if the locale's language code is "en", passing Locale::getFrench() for
- *                 inLocale would result in "", while passing Locale::getGerman()
- *                 for inLocale would result in "". NULL may be used to specify the default.
- * @param script the displayable country code for localeID
- * @param scriptCapacity the size of the script buffer to store the  
- * displayable script code with
- * @param status error information if retrieving the displayable script code failed
- * @return the actual buffer size needed for the displayable script code.  If it's greater 
- * than scriptCapacity, the returned displayable script code will be truncated.  
- * @stable ICU 2.8
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getDisplayScript(const char* locale,
-            const char* displayLocale,
-            UChar* script,
-            int32_t scriptCapacity,
-            UErrorCode* status);
-
-/**
- * Gets the country name suitable for display for the specified locale.
- *
- * @param locale the locale to get the displayable country code with. NULL may be used to specify the default.
- * @param displayLocale Specifies the locale to be used to display the name.  In other words,
- *                 if the locale's language code is "en", passing Locale::getFrench() for
- *                 inLocale would result in "Anglais", while passing Locale::getGerman()
- *                 for inLocale would result in "Englisch". NULL may be used to specify the default.
- * @param country the displayable country code for localeID
- * @param countryCapacity the size of the country buffer to store the  
- * displayable country code with
- * @param status error information if retrieving the displayable country code failed
- * @return the actual buffer size needed for the displayable country code.  If it's greater 
- * than countryCapacity, the returned displayable country code will be truncated.  
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getDisplayCountry(const char* locale,
-                       const char* displayLocale,
-                       UChar* country,
-                       int32_t countryCapacity,
-                       UErrorCode* status);
-
-
-/**
- * Gets the variant name suitable for display for the specified locale.
- *
- * @param locale the locale to get the displayable variant code with. NULL may be used to specify the default.
- * @param displayLocale Specifies the locale to be used to display the name.  In other words,
- *                 if the locale's language code is "en", passing Locale::getFrench() for
- *                 inLocale would result in "Anglais", while passing Locale::getGerman()
- *                 for inLocale would result in "Englisch". NULL may be used to specify the default.
- * @param variant the displayable variant code for localeID
- * @param variantCapacity the size of the variant buffer to store the 
- * displayable variant code with
- * @param status error information if retrieving the displayable variant code failed
- * @return the actual buffer size needed for the displayable variant code.  If it's greater 
- * than variantCapacity, the returned displayable variant code will be truncated.  
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getDisplayVariant(const char* locale,
-                       const char* displayLocale,
-                       UChar* variant,
-                       int32_t variantCapacity,
-                       UErrorCode* status);
-
-/**
- * Gets the keyword name suitable for display for the specified locale.
- * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display 
- * string for the keyword collation. 
- * Usage:
- * <code>
- *    UErrorCode status = U_ZERO_ERROR;
- *    const char* keyword =NULL;
- *    int32_t keywordLen = 0;
- *    int32_t keywordCount = 0;
- *    UChar displayKeyword[256];
- *    int32_t displayKeywordLen = 0;
- *    UEnumeration* keywordEnum = uloc_openKeywords("de_DE at collation=PHONEBOOK;calendar=TRADITIONAL", &status);
- *    for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){
- *          if(U_FAILURE(status)){
- *              ...something went wrong so handle the error...
- *              break;
- *          }
- *          // the uenum_next returns NUL terminated string
- *          keyword = uenum_next(keywordEnum, &keywordLen, &status);
- *          displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256);
- *          ... do something interesting .....
- *    }
- *    uenum_close(keywordEnum);
- * </code>
- * @param keyword           The keyword whose display string needs to be returned.
- * @param displayLocale     Specifies the locale to be used to display the name.  In other words,
- *                          if the locale's language code is "en", passing Locale::getFrench() for
- *                          inLocale would result in "Anglais", while passing Locale::getGerman()
- *                          for inLocale would result in "Englisch". NULL may be used to specify the default.
- * @param dest              the buffer to which the displayable keyword should be written.
- * @param destCapacity      The size of the buffer (number of UChars). If it is 0, then
- *                          dest may be NULL and the function will only return the length of the 
- *                          result without writing any of the result string (pre-flighting).
- * @param status            error information if retrieving the displayable string failed. 
- *                          Should not be NULL and should not indicate failure on entry.
- * @return the actual buffer size needed for the displayable variant code.  
- * @see #uloc_openKeywords
- * @stable ICU 2.8
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getDisplayKeyword(const char* keyword,
-                       const char* displayLocale,
-                       UChar* dest,
-                       int32_t destCapacity,
-                       UErrorCode* status);
-/**
- * Gets the value of the keyword suitable for display for the specified locale.
- * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display 
- * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.
- *
- * @param locale            The locale to get the displayable variant code with. NULL may be used to specify the default.
- * @param keyword           The keyword for whose value should be used.
- * @param displayLocale     Specifies the locale to be used to display the name.  In other words,
- *                          if the locale's language code is "en", passing Locale::getFrench() for
- *                          inLocale would result in "Anglais", while passing Locale::getGerman()
- *                          for inLocale would result in "Englisch". NULL may be used to specify the default.
- * @param dest              the buffer to which the displayable keyword should be written.
- * @param destCapacity      The size of the buffer (number of UChars). If it is 0, then
- *                          dest may be NULL and the function will only return the length of the 
- *                          result without writing any of the result string (pre-flighting).
- * @param status            error information if retrieving the displayable string failed. 
- *                          Should not be NULL and must not indicate failure on entry.
- * @return the actual buffer size needed for the displayable variant code.  
- * @stable ICU 2.8
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getDisplayKeywordValue(   const char* locale,
-                               const char* keyword,
-                               const char* displayLocale,
-                               UChar* dest,
-                               int32_t destCapacity,
-                               UErrorCode* status);
-/**
- * Gets the full name suitable for display for the specified locale.
- *
- * @param localeID the locale to get the displayable name with. NULL may be used to specify the default.
- * @param inLocaleID Specifies the locale to be used to display the name.  In other words,
- *                   if the locale's language code is "en", passing Locale::getFrench() for
- *                   inLocale would result in "Anglais", while passing Locale::getGerman()
- *                   for inLocale would result in "Englisch". NULL may be used to specify the default.
- * @param result the displayable name for localeID
- * @param maxResultSize the size of the name buffer to store the 
- * displayable full name with
- * @param err error information if retrieving the displayable name failed
- * @return the actual buffer size needed for the displayable name.  If it's greater 
- * than maxResultSize, the returned displayable name will be truncated.  
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getDisplayName(const char* localeID,
-            const char* inLocaleID,
-            UChar* result,
-            int32_t maxResultSize,
-            UErrorCode* err);
-
-
-/**
- * Gets the specified locale from a list of all available locales.  
- * The return value is a pointer to an item of 
- * a locale name array.  Both this array and the pointers
- * it contains are owned by ICU and should not be deleted or written through
- * by the caller.  The locale name is terminated by a null pointer.
- * @param n the specific locale name index of the available locale list
- * @return a specified locale name of all available locales
- * @stable ICU 2.0
- */
-U_STABLE const char* U_EXPORT2
-uloc_getAvailable(int32_t n);
-
-/**
- * Gets the size of the all available locale list.
- *
- * @return the size of the locale list
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 uloc_countAvailable(void);
-
-/**
- *
- * Gets a list of all available language codes defined in ISO 639.  This is a pointer
- * to an array of pointers to arrays of char.  All of these pointers are owned
- * by ICU-- do not delete them, and do not write through them.  The array is
- * terminated with a null pointer.
- * @return a list of all available language codes
- * @stable ICU 2.0
- */
-U_STABLE const char* const* U_EXPORT2
-uloc_getISOLanguages(void);
-
-/**
- *
- * Gets a list of all available 2-letter country codes defined in ISO 639.  This is a
- * pointer to an array of pointers to arrays of char.  All of these pointers are
- * owned by ICU-- do not delete them, and do not write through them.  The array is
- * terminated with a null pointer.
- * @return a list of all available country codes
- * @stable ICU 2.0
- */
-U_STABLE const char* const* U_EXPORT2
-uloc_getISOCountries(void);
-
-/**
- * Truncate the locale ID string to get the parent locale ID.
- * Copies the part of the string before the last underscore.
- * The parent locale ID will be an empty string if there is no
- * underscore, or if there is only one underscore at localeID[0].
- *
- * @param localeID Input locale ID string.
- * @param parent   Output string buffer for the parent locale ID.
- * @param parentCapacity Size of the output buffer.
- * @param err A UErrorCode value.
- * @return The length of the parent locale ID.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getParent(const char*    localeID,
-                 char* parent,
-                 int32_t parentCapacity,
-                 UErrorCode* err);
-
-
-
-
-/**
- * Gets the full name for the specified locale.
- * Note: This has the effect of 'canonicalizing' the string to
- * a certain extent. Upper and lower case are set as needed,
- * and if the components were in 'POSIX' format they are changed to
- * ICU format.  It does NOT map aliased names in any way.
- * See the top of this header file.
- * This API strips off the keyword part, so "de_DE\@collation=phonebook" 
- * will become "de_DE". 
- * This API supports preflighting.
- *
- * @param localeID the locale to get the full name with
- * @param name fill in buffer for the name without keywords.
- * @param nameCapacity capacity of the fill in buffer.
- * @param err error information if retrieving the full name failed
- * @return the actual buffer size needed for the full name.  If it's greater 
- * than nameCapacity, the returned full name will be truncated.  
- * @stable ICU 2.8
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getBaseName(const char*    localeID,
-         char* name,
-         int32_t nameCapacity,
-         UErrorCode* err);
-
-/**
- * Gets an enumeration of keywords for the specified locale. Enumeration
- * must get disposed of by the client using uenum_close function.
- *
- * @param localeID the locale to get the variant code with
- * @param status error information if retrieving the keywords failed
- * @return enumeration of keywords or NULL if there are no keywords.
- * @stable ICU 2.8
- */
-U_STABLE UEnumeration* U_EXPORT2
-uloc_openKeywords(const char* localeID,
-                        UErrorCode* status);
-
-/**
- * Get the value for a keyword. Locale name does not need to be normalized.
- * 
- * @param localeID locale name containing the keyword ("de_DE at currency=EURO;collation=PHONEBOOK")
- * @param keywordName name of the keyword for which we want the value. Case insensitive.
- * @param buffer receiving buffer
- * @param bufferCapacity capacity of receiving buffer
- * @param status containing error code - buffer not big enough.
- * @return the length of keyword value
- * @stable ICU 2.8
- */
-U_STABLE int32_t U_EXPORT2
-uloc_getKeywordValue(const char* localeID,
-                     const char* keywordName,
-                     char* buffer, int32_t bufferCapacity,
-                     UErrorCode* status);
-
-
-/**
- * Set the value of the specified keyword.
- * NOTE: Unlike almost every other ICU function which takes a
- * buffer, this function will NOT truncate the output text. If a
- * BUFFER_OVERFLOW_ERROR is received, it means that the original
- * buffer is untouched. This is done to prevent incorrect or possibly
- * even malformed locales from being generated and used.
- * 
- * @param keywordName name of the keyword to be set. Case insensitive.
- * @param keywordValue value of the keyword to be set. If 0-length or
- *  NULL, will result in the keyword being removed. No error is given if 
- *  that keyword does not exist.
- * @param buffer input buffer containing locale to be modified.
- * @param bufferCapacity capacity of receiving buffer
- * @param status containing error code - buffer not big enough.
- * @return the length needed for the buffer
- * @see uloc_getKeywordValue
- * @stable ICU 3.2
- */
-U_STABLE int32_t U_EXPORT2
-uloc_setKeywordValue(const char* keywordName,
-                     const char* keywordValue,
-                     char* buffer, int32_t bufferCapacity,
-                     UErrorCode* status);
-
-/**
- * enums for the  return value for the character and line orientation
- * functions.
- * @draft ICU 4.0
- */
-typedef enum {
-  ULOC_LAYOUT_LTR   = 0,  /* left-to-right. */
-  ULOC_LAYOUT_RTL    = 1,  /* right-to-left. */
-  ULOC_LAYOUT_TTB    = 2,  /* top-to-bottom. */
-  ULOC_LAYOUT_BTT    = 3,   /* bottom-to-top. */
-  ULOC_LAYOUT_UNKNOWN
-} ULayoutType;
-
-/**
- * Get the layout character orientation for the specified locale.
- * 
- * @param localeId locale name
- * @param status Error status
- * @return an enum indicating the layout orientation for characters.
- * @draft ICU 4.0
- */
-U_DRAFT ULayoutType U_EXPORT2
-uloc_getCharacterOrientation(const char* localeId,
-                             UErrorCode *status);
-
-/**
- * Get the layout line orientation for the specified locale.
- * 
- * @param localeId locale name
- * @param status Error status
- * @return an enum indicating the layout orientation for lines.
- * @draft ICU 4.0
- */
-U_DRAFT ULayoutType U_EXPORT2
-uloc_getLineOrientation(const char* localeId,
-                        UErrorCode *status);
-
-/**
- * enums for the 'outResult' parameter return value
- * @see uloc_acceptLanguageFromHTTP
- * @see uloc_acceptLanguage
- * @stable ICU 3.2
- */
-typedef enum {
-  ULOC_ACCEPT_FAILED   = 0,  /* No exact match was found. */
-  ULOC_ACCEPT_VALID    = 1,  /* An exact match was found. */
-  ULOC_ACCEPT_FALLBACK = 2   /* A fallback was found, for example, 
-                                Accept list contained 'ja_JP'
-                                which matched available locale 'ja'. */
-} UAcceptResult;
-
-
-/**
- * Based on a HTTP header from a web browser and a list of available locales,
- * determine an acceptable locale for the user.
- * @param result - buffer to accept the result locale
- * @param resultAvailable the size of the result buffer.
- * @param outResult - An out parameter that contains the fallback status
- * @param httpAcceptLanguage - "Accept-Language:" header as per HTTP.
- * @param availableLocales - list of available locales to match
- * @param status Error status, may be BUFFER_OVERFLOW_ERROR
- * @return length needed for the locale.
- * @stable ICU 3.2
- */
-U_STABLE int32_t U_EXPORT2
-uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
-                            UAcceptResult *outResult,
-                            const char *httpAcceptLanguage,
-                            UEnumeration* availableLocales,
-                            UErrorCode *status);
-
-/**
- * Based on a list of available locales,
- * determine an acceptable locale for the user.
- * @param result - buffer to accept the result locale
- * @param resultAvailable the size of the result buffer.
- * @param outResult - An out parameter that contains the fallback status
- * @param acceptList - list of acceptable languages
- * @param acceptListCount - count of acceptList items
- * @param availableLocales - list of available locales to match
- * @param status Error status, may be BUFFER_OVERFLOW_ERROR
- * @return length needed for the locale.
- * @stable ICU 3.2
- */
-U_STABLE int32_t U_EXPORT2
-uloc_acceptLanguage(char *result, int32_t resultAvailable, 
-                    UAcceptResult *outResult, const char **acceptList,
-                    int32_t acceptListCount,
-                    UEnumeration* availableLocales,
-                    UErrorCode *status);
-
-
-/**
- * Gets the ICU locale ID for the specified Win32 LCID value.
- *
- * @param hostID the Win32 LCID to translate
- * @param locale the output buffer for the ICU locale ID, which will be NUL-terminated
- *  if there is room.
- * @param localeCapacity the size of the output buffer
- * @param status an error is returned if the LCID is unrecognized or the output buffer
- *  is too small
- * @return actual the actual size of the locale ID, not including NUL-termination 
- * @stable ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity,
-                    UErrorCode *status);
-
-
-/**
- * Add the likely subtags for a provided locale ID, per the algorithm described
- * in the following CLDR technical report:
- *
- *   http://www.unicode.org/reports/tr35/#Likely_Subtags
- *
- * If localeID is already in the maximal form, or there is no data available
- * for maximization, it will be copied to the output buffer.  For example,
- * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
- *
- * Examples:
- *
- * "en" maximizes to "en_Latn_US"
- *
- * "de" maximizes to "de_Latn_US"
- *
- * "sr" maximizes to "sr_Cyrl_RS"
- *
- * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
- *
- * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
- *
- * @param localeID The locale to maximize
- * @param maximizedLocaleID The maximized locale
- * @param maximizedLocaleIDCapacity The capacity of the maximizedLocaleID buffer
- * @param err Error information if maximizing the locale failed.  If the length
- * of the localeID and the null-terminator is greater than the maximum allowed size,
- * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
- * @return The actual buffer size needed for the maximized locale.  If it's
- * greater than maximizedLocaleIDCapacity, the returned ID will be truncated.
- * On error, the return value is -1.
- * @draft ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-uloc_addLikelySubtags(const char*    localeID,
-         char* maximizedLocaleID,
-         int32_t maximizedLocaleIDCapacity,
-         UErrorCode* err);
-
-
-/**
- * Minimize the subtags for a provided locale ID, per the algorithm described
- * in the following CLDR technical report:
- *
- *   http://www.unicode.org/reports/tr35/#Likely_Subtags
- *
- * If localeID is already in the minimal form, or there is no data available
- * for minimization, it will be copied to the output buffer.  Since the
- * minimization algorithm relies on proper maximization, see the comments
- * for uloc_addLikelySubtags for reasons why there might not be any data.
- *
- * Examples:
- *
- * "en_Latn_US" minimizes to "en"
- *
- * "de_Latn_US" minimizes to "de"
- *
- * "sr_Cyrl_RS" minimizes to "sr"
- *
- * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
- * script, and minimizing to "zh" would imply "zh_Hans_CN".)
- *
- * @param localeID The locale to minimize
- * @param minimizedLocaleID The minimized locale
- * @param minimizedLocaleIDCapacity The capacity of the minimizedLocaleID buffer
- * @param err Error information if minimizing the locale failed.  If the length
- * of the localeID and the null-terminator is greater than the maximum allowed size,
- * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
- * @return The actual buffer size needed for the minimized locale.  If it's
- * greater than minimizedLocaleIDCapacity, the returned ID will be truncated.
- * On error, the return value is -1.
- * @draft ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-uloc_minimizeSubtags(const char*    localeID,
-         char* minimizedLocaleID,
-         int32_t minimizedLocaleIDCapacity,
-         UErrorCode* err);
-
-#endif /*_ULOC*/

Copied: MacRuby/trunk/icu-1060/unicode/uloc.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uloc.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uloc.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uloc.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1046 @@
+/*
+**********************************************************************
+*   Copyright (C) 1997-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+* File ULOC.H
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   04/01/97    aliu        Creation.
+*   08/22/98    stephen     JDK 1.2 sync.
+*   12/08/98    rtg         New C API for Locale
+*   03/30/99    damiba      overhaul
+*   03/31/99    helena      Javadoc for uloc functions.
+*   04/15/99    Madhu       Updated Javadoc
+********************************************************************************
+*/
+
+#ifndef ULOC_H
+#define ULOC_H
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+
+/**    
+ * \file
+ * \brief  C API: Locale 
+ *
+ * <h2> ULoc C API for Locale </h2>
+ * A <code>Locale</code> represents a specific geographical, political,
+ * or cultural region. An operation that requires a <code>Locale</code> to perform
+ * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
+ * to tailor information for the user. For example, displaying a number
+ * is a locale-sensitive operation--the number should be formatted
+ * according to the customs/conventions of the user's native country,
+ * region, or culture.  In the C APIs, a locales is simply a const char string.
+ *
+ * <P>
+ * You create a <code>Locale</code> with one of the three options listed below.
+ * Each of the component is separated by '_' in the locale string.
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ *       newLanguage
+ * 
+ *       newLanguage + newCountry
+ * 
+ *       newLanguage + newCountry + newVariant
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * The first option is a valid <STRONG>ISO
+ * Language Code.</STRONG> These codes are the lower-case two-letter
+ * codes as defined by ISO-639.
+ * You can find a full list of these codes at a number of sites, such as:
+ * <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
+ * http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</a>
+ *
+ * <P>
+ * The second option includes an additonal <STRONG>ISO Country
+ * Code.</STRONG> These codes are the upper-case two-letter codes
+ * as defined by ISO-3166.
+ * You can find a full list of these codes at a number of sites, such as:
+ * <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
+ * http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</a>
+ *
+ * <P>
+ * The third option requires another additonal information--the 
+ * <STRONG>Variant.</STRONG>
+ * The Variant codes are vendor and browser-specific.
+ * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
+ * Where there are two variants, separate them with an underscore, and
+ * put the most important one first. For
+ * example, a Traditional Spanish collation might be referenced, with
+ * "ES", "ES", "Traditional_WIN".
+ *
+ * <P>
+ * Because a <code>Locale</code> is just an identifier for a region,
+ * no validity check is performed when you specify a <code>Locale</code>.
+ * If you want to see whether particular resources are available for the
+ * <code>Locale</code> you asked for, you must query those resources. For
+ * example, ask the <code>UNumberFormat</code> for the locales it supports
+ * using its <code>getAvailable</code> method.
+ * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
+ * locale, you get back the best available match, not necessarily
+ * precisely what you asked for. For more information, look at
+ * <code>UResourceBundle</code>.
+ *
+ * <P>
+ * The <code>Locale</code> provides a number of convenient constants
+ * that you can use to specify the commonly used
+ * locales. For example, the following refers to a locale
+ * for the United States:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ *       ULOC_US
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <P>
+ * Once you've specified a locale you can query it for information about
+ * itself. Use <code>uloc_getCountry</code> to get the ISO Country Code and
+ * <code>uloc_getLanguage</code> to get the ISO Language Code. You can
+ * use <code>uloc_getDisplayCountry</code> to get the
+ * name of the country suitable for displaying to the user. Similarly,
+ * you can use <code>uloc_getDisplayLanguage</code> to get the name of
+ * the language suitable for displaying to the user. Interestingly,
+ * the <code>uloc_getDisplayXXX</code> methods are themselves locale-sensitive
+ * and have two versions: one that uses the default locale and one
+ * that takes a locale as an argument and displays the name or country in
+ * a language appropriate to that locale.
+ *
+ * <P>
+ * The ICU provides a number of services that perform locale-sensitive
+ * operations. For example, the <code>unum_xxx</code> functions format
+ * numbers, currency, or percentages in a locale-sensitive manner. 
+ * </P>
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ *     UErrorCode success = U_ZERO_ERROR;
+ *     UNumberFormat *nf;
+ *     const char* myLocale = "fr_FR";
+ * 
+ *     nf = unum_open( UNUM_DEFAULT, NULL, success );          
+ *     unum_close(nf);
+ *     nf = unum_open( UNUM_CURRENCY, NULL, success );
+ *     unum_close(nf);
+ *     nf = unum_open( UNUM_PERCENT, NULL, success );   
+ *     unum_close(nf);
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * Each of these methods has two variants; one with an explicit locale
+ * and one without; the latter using the default locale.
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code 
+ * 
+ *     nf = unum_open( UNUM_DEFAULT, myLocale, success );          
+ *     unum_close(nf);
+ *     nf = unum_open( UNUM_CURRENCY, myLocale, success );
+ *     unum_close(nf);
+ *     nf = unum_open( UNUM_PERCENT, myLocale, success );   
+ *     unum_close(nf);
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * A <code>Locale</code> is the mechanism for identifying the kind of services
+ * (<code>UNumberFormat</code>) that you would like to get. The locale is
+ * <STRONG>just</STRONG> a mechanism for identifying these services.
+ *
+ * <P>
+ * Each international serivce that performs locale-sensitive operations 
+ * allows you
+ * to get all the available objects of that type. You can sift
+ * through these objects by language, country, or variant,
+ * and use the display names to present a menu to the user.
+ * For example, you can create a menu of all the collation objects
+ * suitable for a given language. Such classes implement these
+ * three class methods:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ *       const char* uloc_getAvailable(int32_t index);
+ *       int32_t uloc_countAvailable();
+ *       int32_t
+ *       uloc_getDisplayName(const char* localeID,
+ *                 const char* inLocaleID, 
+ *                 UChar* result,
+ *                 int32_t maxResultSize,
+ *                  UErrorCode* err);
+ * 
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * <P>
+ * Concerning POSIX/RFC1766 Locale IDs, 
+ *  the getLanguage/getCountry/getVariant/getName functions do understand
+ * the POSIX type form of  language_COUNTRY.ENCODING\@VARIANT
+ * and if there is not an ICU-stype variant, uloc_getVariant() for example
+ * will return the one listed after the \@at sign. As well, the hyphen
+ * "-" is recognized as a country/variant separator similarly to RFC1766.
+ * So for example, "en-us" will be interpreted as en_US.  
+ * As a result, uloc_getName() is far from a no-op, and will have the
+ * effect of converting POSIX/RFC1766 IDs into ICU form, although it does
+ * NOT map any of the actual codes (i.e. russian->ru) in any way.
+ * Applications should call uloc_getName() at the point where a locale ID
+ * is coming from an external source (user entry, OS, web browser)
+ * and pass the resulting string to other ICU functions.  For example,
+ * don't use de-de\@EURO as an argument to resourcebundle.
+ *
+ * @see UResourceBundle
+ */
+
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_CHINESE            "zh"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_ENGLISH            "en"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_FRENCH             "fr"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_GERMAN             "de"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_ITALIAN            "it"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_JAPANESE           "ja"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_KOREAN             "ko"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_SIMPLIFIED_CHINESE "zh_CN"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_TRADITIONAL_CHINESE "zh_TW"
+
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CANADA         "en_CA"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CANADA_FRENCH  "fr_CA"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CHINA          "zh_CN"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_PRC            "zh_CN"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_FRANCE         "fr_FR"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_GERMANY        "de_DE"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_ITALY          "it_IT"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_JAPAN          "ja_JP"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_KOREA          "ko_KR"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_TAIWAN         "zh_TW"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_UK             "en_GB"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_US             "en_US"
+
+/**
+ * Useful constant for the maximum size of the language part of a locale ID.
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_LANG_CAPACITY 12
+
+/**
+ * Useful constant for the maximum size of the country part of a locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_COUNTRY_CAPACITY 4
+/**
+ * Useful constant for the maximum size of the whole locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_FULLNAME_CAPACITY 56
+
+/**
+ * Useful constant for the maximum size of the script part of a locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.8
+ */
+#define ULOC_SCRIPT_CAPACITY 6
+
+/**
+ * Useful constant for the maximum size of keywords in a locale
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORDS_CAPACITY 50
+
+/**
+ * Useful constant for the maximum SIZE of keywords in a locale
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
+
+/**
+ * Character separating keywords from the locale string
+ * different for EBCDIC - TODO
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_SEPARATOR '@'
+/**
+ * Character for assigning value to a keyword
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_ASSIGN '='
+/**
+ * Character separating keywords
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_ITEM_SEPARATOR ';'
+
+/**
+ * Constants for *_getLocale()
+ * Allow user to select whether she wants information on 
+ * requested, valid or actual locale.
+ * For example, a collator for "en_US_CALIFORNIA" was
+ * requested. In the current state of ICU (2.0), 
+ * the requested locale is "en_US_CALIFORNIA",
+ * the valid locale is "en_US" (most specific locale supported by ICU)
+ * and the actual locale is "root" (the collation data comes unmodified 
+ * from the UCA)
+ * The locale is considered supported by ICU if there is a core ICU bundle 
+ * for that locale (although it may be empty).
+ * @stable ICU 2.1
+ */
+typedef enum {
+  /** This is locale the data actually comes from 
+   * @stable ICU 2.1
+   */
+  ULOC_ACTUAL_LOCALE    = 0,
+  /** This is the most specific locale supported by ICU 
+   * @stable ICU 2.1
+   */
+  ULOC_VALID_LOCALE    = 1,
+
+#ifndef U_HIDE_DEPRECATED_API
+  /** This is the requested locale
+   *  @deprecated ICU 2.8 
+   */
+  ULOC_REQUESTED_LOCALE = 2,
+#endif /* U_HIDE_DEPRECATED_API */
+
+  ULOC_DATA_LOCALE_TYPE_LIMIT = 3
+} ULocDataLocaleType ;
+
+
+/**
+ * Gets ICU's default locale.  
+ * The returned string is a snapshot in time, and will remain valid
+ *   and unchanged even when uloc_setDefault() is called.
+ *   The returned storage is owned by ICU, and must not be altered or deleted
+ *   by the caller.
+ *  
+ * @return the ICU default locale
+ * @system
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+uloc_getDefault(void);
+
+/**
+ * Sets ICU's default locale.  
+ *    By default (without calling this function), ICU's default locale will be based
+ *    on information obtained from the underlying system environment.
+ *    <p>
+ *    Changes to ICU's default locale do not propagate back to the
+ *    system environment.
+ *    <p>
+ *    Changes to ICU's default locale to not affect any ICU services that
+ *    may already be open based on the previous default locale value.
+ *
+ * @param localeID the new ICU default locale. A value of NULL will try to get
+ *                 the system's default locale.
+ * @param status the error information if the setting of default locale fails
+ * @system
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+uloc_setDefault(const char* localeID,
+        UErrorCode*       status);
+
+/**
+ * Gets the language code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param language the language code for localeID
+ * @param languageCapacity the size of the language buffer to store the  
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code.  If it's greater 
+ * than languageCapacity, the returned language code will be truncated.  
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getLanguage(const char*    localeID,
+         char* language,
+         int32_t languageCapacity,
+         UErrorCode* err);
+
+/**
+ * Gets the script code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param script the language code for localeID
+ * @param scriptCapacity the size of the language buffer to store the  
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code.  If it's greater 
+ * than scriptCapacity, the returned language code will be truncated.  
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getScript(const char*    localeID,
+         char* script,
+         int32_t scriptCapacity,
+         UErrorCode* err);
+
+/**
+ * Gets the  country code for the specified locale.
+ *
+ * @param localeID the locale to get the country code with
+ * @param country the country code for localeID
+ * @param countryCapacity the size of the country buffer to store the  
+ * country code with
+ * @param err error information if retrieving the country code failed
+ * @return the actual buffer size needed for the country code.  If it's greater 
+ * than countryCapacity, the returned country code will be truncated.  
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getCountry(const char*    localeID,
+        char* country,
+        int32_t countryCapacity,
+        UErrorCode* err);
+
+/**
+ * Gets the variant code for the specified locale.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param variant the variant code for localeID
+ * @param variantCapacity the size of the variant buffer to store the 
+ * variant code with
+ * @param err error information if retrieving the variant code failed
+ * @return the actual buffer size needed for the variant code.  If it's greater 
+ * than variantCapacity, the returned variant code will be truncated.  
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getVariant(const char*    localeID,
+        char* variant,
+        int32_t variantCapacity,
+        UErrorCode* err);
+
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the ICU locale ID to
+ * a certain extent. Upper and lower case are set as needed.
+ * It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name.  If it's greater 
+ * than nameCapacity, the returned full name will be truncated.  
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getName(const char*    localeID,
+         char* name,
+         int32_t nameCapacity,
+         UErrorCode* err);
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format.  It does NOT map aliased names in any way.
+ * See the top of this header file.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name the full name for localeID
+ * @param nameCapacity the size of the name buffer to store the 
+ * full name with
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name.  If it's greater 
+ * than nameCapacity, the returned full name will be truncated.  
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_canonicalize(const char*    localeID,
+         char* name,
+         int32_t nameCapacity,
+         UErrorCode* err);
+
+/**
+ * Gets the ISO language code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @return language the ISO language code for localeID
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+uloc_getISO3Language(const char* localeID);
+
+
+/**
+ * Gets the ISO country code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO country code with
+ * @return country the ISO country code for localeID
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+uloc_getISO3Country(const char* localeID);
+
+/**
+ * Gets the Win32 LCID value for the specified locale.
+ * If the ICU locale is not recognized by Windows, 0 will be returned.
+ *
+ * @param localeID the locale to get the Win32 LCID value with
+ * @return country the Win32 LCID for localeID
+ * @stable ICU 2.0
+ */
+U_STABLE uint32_t U_EXPORT2
+uloc_getLCID(const char* localeID);
+
+/**
+ * Gets the language name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the ISO language code with
+ * @param displayLocale Specifies the locale to be used to display the name.  In other words,
+ *                 if the locale's language code is "en", passing Locale::getFrench() for
+ *                 inLocale would result in "Anglais", while passing Locale::getGerman()
+ *                 for inLocale would result in "Englisch".
+ * @param language the displayable language code for localeID
+ * @param languageCapacity the size of the language buffer to store the  
+ * displayable language code with
+ * @param status error information if retrieving the displayable language code failed
+ * @return the actual buffer size needed for the displayable language code.  If it's greater 
+ * than languageCapacity, the returned language code will be truncated.  
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayLanguage(const char* locale,
+            const char* displayLocale,
+            UChar* language,
+            int32_t languageCapacity,
+            UErrorCode* status);
+
+/**
+ * Gets the script name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable script code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name.  In other words,
+ *                 if the locale's language code is "en", passing Locale::getFrench() for
+ *                 inLocale would result in "", while passing Locale::getGerman()
+ *                 for inLocale would result in "". NULL may be used to specify the default.
+ * @param script the displayable country code for localeID
+ * @param scriptCapacity the size of the script buffer to store the  
+ * displayable script code with
+ * @param status error information if retrieving the displayable script code failed
+ * @return the actual buffer size needed for the displayable script code.  If it's greater 
+ * than scriptCapacity, the returned displayable script code will be truncated.  
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayScript(const char* locale,
+            const char* displayLocale,
+            UChar* script,
+            int32_t scriptCapacity,
+            UErrorCode* status);
+
+/**
+ * Gets the country name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable country code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name.  In other words,
+ *                 if the locale's language code is "en", passing Locale::getFrench() for
+ *                 inLocale would result in "Anglais", while passing Locale::getGerman()
+ *                 for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param country the displayable country code for localeID
+ * @param countryCapacity the size of the country buffer to store the  
+ * displayable country code with
+ * @param status error information if retrieving the displayable country code failed
+ * @return the actual buffer size needed for the displayable country code.  If it's greater 
+ * than countryCapacity, the returned displayable country code will be truncated.  
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayCountry(const char* locale,
+                       const char* displayLocale,
+                       UChar* country,
+                       int32_t countryCapacity,
+                       UErrorCode* status);
+
+
+/**
+ * Gets the variant name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name.  In other words,
+ *                 if the locale's language code is "en", passing Locale::getFrench() for
+ *                 inLocale would result in "Anglais", while passing Locale::getGerman()
+ *                 for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param variant the displayable variant code for localeID
+ * @param variantCapacity the size of the variant buffer to store the 
+ * displayable variant code with
+ * @param status error information if retrieving the displayable variant code failed
+ * @return the actual buffer size needed for the displayable variant code.  If it's greater 
+ * than variantCapacity, the returned displayable variant code will be truncated.  
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayVariant(const char* locale,
+                       const char* displayLocale,
+                       UChar* variant,
+                       int32_t variantCapacity,
+                       UErrorCode* status);
+
+/**
+ * Gets the keyword name suitable for display for the specified locale.
+ * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display 
+ * string for the keyword collation. 
+ * Usage:
+ * <code>
+ *    UErrorCode status = U_ZERO_ERROR;
+ *    const char* keyword =NULL;
+ *    int32_t keywordLen = 0;
+ *    int32_t keywordCount = 0;
+ *    UChar displayKeyword[256];
+ *    int32_t displayKeywordLen = 0;
+ *    UEnumeration* keywordEnum = uloc_openKeywords("de_DE at collation=PHONEBOOK;calendar=TRADITIONAL", &status);
+ *    for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){
+ *          if(U_FAILURE(status)){
+ *              ...something went wrong so handle the error...
+ *              break;
+ *          }
+ *          // the uenum_next returns NUL terminated string
+ *          keyword = uenum_next(keywordEnum, &keywordLen, &status);
+ *          displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256);
+ *          ... do something interesting .....
+ *    }
+ *    uenum_close(keywordEnum);
+ * </code>
+ * @param keyword           The keyword whose display string needs to be returned.
+ * @param displayLocale     Specifies the locale to be used to display the name.  In other words,
+ *                          if the locale's language code is "en", passing Locale::getFrench() for
+ *                          inLocale would result in "Anglais", while passing Locale::getGerman()
+ *                          for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest              the buffer to which the displayable keyword should be written.
+ * @param destCapacity      The size of the buffer (number of UChars). If it is 0, then
+ *                          dest may be NULL and the function will only return the length of the 
+ *                          result without writing any of the result string (pre-flighting).
+ * @param status            error information if retrieving the displayable string failed. 
+ *                          Should not be NULL and should not indicate failure on entry.
+ * @return the actual buffer size needed for the displayable variant code.  
+ * @see #uloc_openKeywords
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayKeyword(const char* keyword,
+                       const char* displayLocale,
+                       UChar* dest,
+                       int32_t destCapacity,
+                       UErrorCode* status);
+/**
+ * Gets the value of the keyword suitable for display for the specified locale.
+ * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display 
+ * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.
+ *
+ * @param locale            The locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param keyword           The keyword for whose value should be used.
+ * @param displayLocale     Specifies the locale to be used to display the name.  In other words,
+ *                          if the locale's language code is "en", passing Locale::getFrench() for
+ *                          inLocale would result in "Anglais", while passing Locale::getGerman()
+ *                          for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest              the buffer to which the displayable keyword should be written.
+ * @param destCapacity      The size of the buffer (number of UChars). If it is 0, then
+ *                          dest may be NULL and the function will only return the length of the 
+ *                          result without writing any of the result string (pre-flighting).
+ * @param status            error information if retrieving the displayable string failed. 
+ *                          Should not be NULL and must not indicate failure on entry.
+ * @return the actual buffer size needed for the displayable variant code.  
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayKeywordValue(   const char* locale,
+                               const char* keyword,
+                               const char* displayLocale,
+                               UChar* dest,
+                               int32_t destCapacity,
+                               UErrorCode* status);
+/**
+ * Gets the full name suitable for display for the specified locale.
+ *
+ * @param localeID the locale to get the displayable name with. NULL may be used to specify the default.
+ * @param inLocaleID Specifies the locale to be used to display the name.  In other words,
+ *                   if the locale's language code is "en", passing Locale::getFrench() for
+ *                   inLocale would result in "Anglais", while passing Locale::getGerman()
+ *                   for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param result the displayable name for localeID
+ * @param maxResultSize the size of the name buffer to store the 
+ * displayable full name with
+ * @param err error information if retrieving the displayable name failed
+ * @return the actual buffer size needed for the displayable name.  If it's greater 
+ * than maxResultSize, the returned displayable name will be truncated.  
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getDisplayName(const char* localeID,
+            const char* inLocaleID,
+            UChar* result,
+            int32_t maxResultSize,
+            UErrorCode* err);
+
+
+/**
+ * Gets the specified locale from a list of all available locales.  
+ * The return value is a pointer to an item of 
+ * a locale name array.  Both this array and the pointers
+ * it contains are owned by ICU and should not be deleted or written through
+ * by the caller.  The locale name is terminated by a null pointer.
+ * @param n the specific locale name index of the available locale list
+ * @return a specified locale name of all available locales
+ * @stable ICU 2.0
+ */
+U_STABLE const char* U_EXPORT2
+uloc_getAvailable(int32_t n);
+
+/**
+ * Gets the size of the all available locale list.
+ *
+ * @return the size of the locale list
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 uloc_countAvailable(void);
+
+/**
+ *
+ * Gets a list of all available language codes defined in ISO 639.  This is a pointer
+ * to an array of pointers to arrays of char.  All of these pointers are owned
+ * by ICU-- do not delete them, and do not write through them.  The array is
+ * terminated with a null pointer.
+ * @return a list of all available language codes
+ * @stable ICU 2.0
+ */
+U_STABLE const char* const* U_EXPORT2
+uloc_getISOLanguages(void);
+
+/**
+ *
+ * Gets a list of all available 2-letter country codes defined in ISO 639.  This is a
+ * pointer to an array of pointers to arrays of char.  All of these pointers are
+ * owned by ICU-- do not delete them, and do not write through them.  The array is
+ * terminated with a null pointer.
+ * @return a list of all available country codes
+ * @stable ICU 2.0
+ */
+U_STABLE const char* const* U_EXPORT2
+uloc_getISOCountries(void);
+
+/**
+ * Truncate the locale ID string to get the parent locale ID.
+ * Copies the part of the string before the last underscore.
+ * The parent locale ID will be an empty string if there is no
+ * underscore, or if there is only one underscore at localeID[0].
+ *
+ * @param localeID Input locale ID string.
+ * @param parent   Output string buffer for the parent locale ID.
+ * @param parentCapacity Size of the output buffer.
+ * @param err A UErrorCode value.
+ * @return The length of the parent locale ID.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getParent(const char*    localeID,
+                 char* parent,
+                 int32_t parentCapacity,
+                 UErrorCode* err);
+
+
+
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format.  It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API strips off the keyword part, so "de_DE\@collation=phonebook" 
+ * will become "de_DE". 
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name.  If it's greater 
+ * than nameCapacity, the returned full name will be truncated.  
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getBaseName(const char*    localeID,
+         char* name,
+         int32_t nameCapacity,
+         UErrorCode* err);
+
+/**
+ * Gets an enumeration of keywords for the specified locale. Enumeration
+ * must get disposed of by the client using uenum_close function.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param status error information if retrieving the keywords failed
+ * @return enumeration of keywords or NULL if there are no keywords.
+ * @stable ICU 2.8
+ */
+U_STABLE UEnumeration* U_EXPORT2
+uloc_openKeywords(const char* localeID,
+                        UErrorCode* status);
+
+/**
+ * Get the value for a keyword. Locale name does not need to be normalized.
+ * 
+ * @param localeID locale name containing the keyword ("de_DE at currency=EURO;collation=PHONEBOOK")
+ * @param keywordName name of the keyword for which we want the value. Case insensitive.
+ * @param buffer receiving buffer
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code - buffer not big enough.
+ * @return the length of keyword value
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_getKeywordValue(const char* localeID,
+                     const char* keywordName,
+                     char* buffer, int32_t bufferCapacity,
+                     UErrorCode* status);
+
+
+/**
+ * Set the value of the specified keyword.
+ * NOTE: Unlike almost every other ICU function which takes a
+ * buffer, this function will NOT truncate the output text. If a
+ * BUFFER_OVERFLOW_ERROR is received, it means that the original
+ * buffer is untouched. This is done to prevent incorrect or possibly
+ * even malformed locales from being generated and used.
+ * 
+ * @param keywordName name of the keyword to be set. Case insensitive.
+ * @param keywordValue value of the keyword to be set. If 0-length or
+ *  NULL, will result in the keyword being removed. No error is given if 
+ *  that keyword does not exist.
+ * @param buffer input buffer containing locale to be modified.
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code - buffer not big enough.
+ * @return the length needed for the buffer
+ * @see uloc_getKeywordValue
+ * @stable ICU 3.2
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_setKeywordValue(const char* keywordName,
+                     const char* keywordValue,
+                     char* buffer, int32_t bufferCapacity,
+                     UErrorCode* status);
+
+/**
+ * enums for the  return value for the character and line orientation
+ * functions.
+ * @draft ICU 4.0
+ */
+typedef enum {
+  ULOC_LAYOUT_LTR   = 0,  /* left-to-right. */
+  ULOC_LAYOUT_RTL    = 1,  /* right-to-left. */
+  ULOC_LAYOUT_TTB    = 2,  /* top-to-bottom. */
+  ULOC_LAYOUT_BTT    = 3,   /* bottom-to-top. */
+  ULOC_LAYOUT_UNKNOWN
+} ULayoutType;
+
+/**
+ * Get the layout character orientation for the specified locale.
+ * 
+ * @param localeId locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for characters.
+ * @draft ICU 4.0
+ */
+U_DRAFT ULayoutType U_EXPORT2
+uloc_getCharacterOrientation(const char* localeId,
+                             UErrorCode *status);
+
+/**
+ * Get the layout line orientation for the specified locale.
+ * 
+ * @param localeId locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for lines.
+ * @draft ICU 4.0
+ */
+U_DRAFT ULayoutType U_EXPORT2
+uloc_getLineOrientation(const char* localeId,
+                        UErrorCode *status);
+
+/**
+ * enums for the 'outResult' parameter return value
+ * @see uloc_acceptLanguageFromHTTP
+ * @see uloc_acceptLanguage
+ * @stable ICU 3.2
+ */
+typedef enum {
+  ULOC_ACCEPT_FAILED   = 0,  /* No exact match was found. */
+  ULOC_ACCEPT_VALID    = 1,  /* An exact match was found. */
+  ULOC_ACCEPT_FALLBACK = 2   /* A fallback was found, for example, 
+                                Accept list contained 'ja_JP'
+                                which matched available locale 'ja'. */
+} UAcceptResult;
+
+
+/**
+ * Based on a HTTP header from a web browser and a list of available locales,
+ * determine an acceptable locale for the user.
+ * @param result - buffer to accept the result locale
+ * @param resultAvailable the size of the result buffer.
+ * @param outResult - An out parameter that contains the fallback status
+ * @param httpAcceptLanguage - "Accept-Language:" header as per HTTP.
+ * @param availableLocales - list of available locales to match
+ * @param status Error status, may be BUFFER_OVERFLOW_ERROR
+ * @return length needed for the locale.
+ * @stable ICU 3.2
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
+                            UAcceptResult *outResult,
+                            const char *httpAcceptLanguage,
+                            UEnumeration* availableLocales,
+                            UErrorCode *status);
+
+/**
+ * Based on a list of available locales,
+ * determine an acceptable locale for the user.
+ * @param result - buffer to accept the result locale
+ * @param resultAvailable the size of the result buffer.
+ * @param outResult - An out parameter that contains the fallback status
+ * @param acceptList - list of acceptable languages
+ * @param acceptListCount - count of acceptList items
+ * @param availableLocales - list of available locales to match
+ * @param status Error status, may be BUFFER_OVERFLOW_ERROR
+ * @return length needed for the locale.
+ * @stable ICU 3.2
+ */
+U_STABLE int32_t U_EXPORT2
+uloc_acceptLanguage(char *result, int32_t resultAvailable, 
+                    UAcceptResult *outResult, const char **acceptList,
+                    int32_t acceptListCount,
+                    UEnumeration* availableLocales,
+                    UErrorCode *status);
+
+
+/**
+ * Gets the ICU locale ID for the specified Win32 LCID value.
+ *
+ * @param hostID the Win32 LCID to translate
+ * @param locale the output buffer for the ICU locale ID, which will be NUL-terminated
+ *  if there is room.
+ * @param localeCapacity the size of the output buffer
+ * @param status an error is returned if the LCID is unrecognized or the output buffer
+ *  is too small
+ * @return actual the actual size of the locale ID, not including NUL-termination 
+ * @stable ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity,
+                    UErrorCode *status);
+
+
+/**
+ * Add the likely subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ *   http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the maximal form, or there is no data available
+ * for maximization, it will be copied to the output buffer.  For example,
+ * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
+ *
+ * Examples:
+ *
+ * "en" maximizes to "en_Latn_US"
+ *
+ * "de" maximizes to "de_Latn_US"
+ *
+ * "sr" maximizes to "sr_Cyrl_RS"
+ *
+ * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+ *
+ * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+ *
+ * @param localeID The locale to maximize
+ * @param maximizedLocaleID The maximized locale
+ * @param maximizedLocaleIDCapacity The capacity of the maximizedLocaleID buffer
+ * @param err Error information if maximizing the locale failed.  If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual buffer size needed for the maximized locale.  If it's
+ * greater than maximizedLocaleIDCapacity, the returned ID will be truncated.
+ * On error, the return value is -1.
+ * @draft ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_addLikelySubtags(const char*    localeID,
+         char* maximizedLocaleID,
+         int32_t maximizedLocaleIDCapacity,
+         UErrorCode* err);
+
+
+/**
+ * Minimize the subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ *   http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the minimal form, or there is no data available
+ * for minimization, it will be copied to the output buffer.  Since the
+ * minimization algorithm relies on proper maximization, see the comments
+ * for uloc_addLikelySubtags for reasons why there might not be any data.
+ *
+ * Examples:
+ *
+ * "en_Latn_US" minimizes to "en"
+ *
+ * "de_Latn_US" minimizes to "de"
+ *
+ * "sr_Cyrl_RS" minimizes to "sr"
+ *
+ * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
+ * script, and minimizing to "zh" would imply "zh_Hans_CN".)
+ *
+ * @param localeID The locale to minimize
+ * @param minimizedLocaleID The minimized locale
+ * @param minimizedLocaleIDCapacity The capacity of the minimizedLocaleID buffer
+ * @param err Error information if minimizing the locale failed.  If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual buffer size needed for the minimized locale.  If it's
+ * greater than minimizedLocaleIDCapacity, the returned ID will be truncated.
+ * On error, the return value is -1.
+ * @draft ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+uloc_minimizeSubtags(const char*    localeID,
+         char* minimizedLocaleID,
+         int32_t minimizedLocaleIDCapacity,
+         UErrorCode* err);
+
+#endif /*_ULOC*/

Deleted: MacRuby/trunk/icu-1060/unicode/ulocdata.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ulocdata.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ulocdata.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,192 +0,0 @@
-/*
-******************************************************************************
-*                                                                            *
-* Copyright (C) 2003-2007, International Business Machines                   *
-*                Corporation and others. All Rights Reserved.                *
-*                                                                            *
-******************************************************************************
-*   file name:  ulocdata.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2003Oct21
-*   created by: Ram Viswanadha
-*/
-
-#ifndef __ULOCDATA_H__
-#define __ULOCDATA_H__
-
-#include "unicode/ures.h"
-#include "unicode/uloc.h"
-#include "unicode/uset.h"
-
-/**
- * \file
- * \brief C API: Provides access to locale data. 
- */
-
-/** Forward declaration of the ULocaleData structure. @stable ICU 3.6 */
-struct ULocaleData;
-
-/** A locale data object. @stable ICU 3.6 */
-typedef struct ULocaleData ULocaleData;
-
-
-
-/** The possible types of exemplar character sets.
-  * @stable ICU 3.4
-  */
-typedef enum ULocaleDataExemplarSetType  {
-     ULOCDATA_ES_STANDARD=0,      /* Basic set */
-     ULOCDATA_ES_AUXILIARY=1,     /* Auxiliary set */
-     ULOCDATA_ES_COUNT=2
-} ULocaleDataExemplarSetType;
-
-/** The possible types of delimiters.
-  * @stable ICU 3.4
-  */
-typedef enum ULocaleDataDelimiterType {
-    ULOCDATA_QUOTATION_START = 0,     /* Quotation start */
-     ULOCDATA_QUOTATION_END = 1,       /* Quotation end */
-     ULOCDATA_ALT_QUOTATION_START = 2, /* Alternate quotation start */
-     ULOCDATA_ALT_QUOTATION_END = 3,   /* Alternate quotation end */
-     ULOCDATA_DELIMITER_COUNT = 4
-} ULocaleDataDelimiterType;
-
-/**
- * Opens a locale data object for the given locale
- *
- * @param localeID  Specifies the locale associated with this locale
- *                  data object.
- * @param status    Pointer to error status code.
- * @stable ICU 3.4
- */
-U_STABLE ULocaleData* U_EXPORT2 
-ulocdata_open(const char *localeID, UErrorCode *status);
-
-/**
- * Closes a locale data object.
- *
- * @param uld       The locale data object to close
- * @stable ICU 3.4
- */
-U_STABLE void U_EXPORT2 
-ulocdata_close(ULocaleData *uld);
-
-/**
- * Sets the "no Substitute" attribute of the locale data
- * object.  If true, then any methods associated with the
- * locale data object will return null when there is no
- * data available for that method, given the locale ID
- * supplied to ulocdata_open().
- *
- * @param uld       The locale data object to set.
- * @param setting   Value of the "no substitute" attribute.
- * @stable ICU 3.4
- */
-U_STABLE void U_EXPORT2 
-ulocdata_setNoSubstitute(ULocaleData *uld, UBool setting);
-
-/**
- * Retrieves the current "no Substitute" value of the locale data
- * object.  If true, then any methods associated with the
- * locale data object will return null when there is no
- * data available for that method, given the locale ID
- * supplied to ulocdata_open().
- *
- * @param uld       Pointer to the The locale data object to set.
- * @return UBool    Value of the "no substitute" attribute.
- * @stable ICU 3.4
- */
-U_STABLE UBool U_EXPORT2 
-ulocdata_getNoSubstitute(ULocaleData *uld);
-
-/**
- * Returns the set of exemplar characters for a locale.
- *
- * @param uld       Pointer to the locale data object from which the 
- *                  exemplar character set is to be retrieved.
- * @param fillIn    Pointer to a USet object to receive the 
- *                  exemplar character set for the given locale.  Previous
- *                  contents of fillIn are lost.  <em>If fillIn is NULL,
- *                  then a new USet is created and returned.  The caller
- *                  owns the result and must dispose of it by calling
- *                  uset_close.</em>
- * @param options   Bitmask for options to apply to the exemplar pattern.
- *                  Specify zero to retrieve the exemplar set as it is
- *                  defined in the locale data.  Specify
- *                  USET_CASE_INSENSITIVE to retrieve a case-folded
- *                  exemplar set.  See uset_applyPattern for a complete
- *                  list of valid options.  The USET_IGNORE_SPACE bit is
- *                  always set, regardless of the value of 'options'.
- * @param extype    Specifies the type of exemplar set to be retrieved.
- * @param status    Pointer to an input-output error code value;
- *                  must not be NULL.
- * @return USet*    Either fillIn, or if fillIn is NULL, a pointer to
- *                  a newly-allocated USet that the user must close.
- * @stable ICU 3.4
- */
-U_STABLE USet* U_EXPORT2 
-ulocdata_getExemplarSet(ULocaleData *uld, USet *fillIn, 
-                        uint32_t options, ULocaleDataExemplarSetType extype, UErrorCode *status);
-
-/**
- * Returns one of the delimiter strings associated with a locale.
- *
- * @param uld           Pointer to the locale data object from which the 
- *                      delimiter string is to be retrieved.
- * @param type          the type of delimiter to be retrieved.
- * @param result        A pointer to a buffer to receive the result.
- * @param resultLength  The maximum size of result.
- * @param status        Pointer to an error code value
- * @return int32_t      The total buffer size needed; if greater than resultLength,
- *                      the output was truncated.
- * @stable ICU 3.4
- */
-U_STABLE int32_t U_EXPORT2 
-ulocdata_getDelimiter(ULocaleData *uld, ULocaleDataDelimiterType type, UChar *result, int32_t resultLength, UErrorCode *status);
-
-/**
- * Enumeration for representing the measurement systems.
- * @stable ICU 2.8
- */
-typedef enum UMeasurementSystem {
-    UMS_SI,     /** Measurement system specified by SI otherwise known as Metric system. */
-    UMS_US,     /** Measurement system followed in the United States of America. */ 
-    UMS_LIMIT
-} UMeasurementSystem;
-
-/**
- * Returns the measurement system used in the locale specified by the localeID.
- * Please note that this API will change in ICU 3.6 and will use an ulocdata object.
- *
- * @param localeID      The id of the locale for which the measurement system to be retrieved.
- * @param status        Must be a valid pointer to an error code value,
- *                      which must not indicate a failure before the function call.
- * @return UMeasurementSystem the measurement system used in the locale.
- * @stable ICU 2.8
- */
-U_STABLE UMeasurementSystem U_EXPORT2
-ulocdata_getMeasurementSystem(const char *localeID, UErrorCode *status);
-
-/**
- * Returns the element gives the normal business letter size, and customary units. 
- * The units for the numbers are always in <em>milli-meters</em>.
- * For US since 8.5 and 11 do not yeild an integral value when converted to milli-meters,
- * the values are rounded off.
- * So for A4 size paper the height and width are 297 mm and 210 mm repectively, 
- * and for US letter size the height and width are 279 mm and 216 mm respectively.
- * Please note that this API will change in ICU 3.6 and will use an ulocdata object.
- *
- * @param localeID      The id of the locale for which the paper size information to be retrieved.
- * @param height        A pointer to int to recieve the height information.
- * @param width         A pointer to int to recieve the width information.
- * @param status        Must be a valid pointer to an error code value,
- *                      which must not indicate a failure before the function call.
- * @stable ICU 2.8
- */
-U_STABLE void U_EXPORT2
-ulocdata_getPaperSize(const char *localeID, int32_t *height, int32_t *width, UErrorCode *status);
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/ulocdata.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ulocdata.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ulocdata.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ulocdata.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,192 @@
+/*
+******************************************************************************
+*                                                                            *
+* Copyright (C) 2003-2007, International Business Machines                   *
+*                Corporation and others. All Rights Reserved.                *
+*                                                                            *
+******************************************************************************
+*   file name:  ulocdata.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003Oct21
+*   created by: Ram Viswanadha
+*/
+
+#ifndef __ULOCDATA_H__
+#define __ULOCDATA_H__
+
+#include "unicode/ures.h"
+#include "unicode/uloc.h"
+#include "unicode/uset.h"
+
+/**
+ * \file
+ * \brief C API: Provides access to locale data. 
+ */
+
+/** Forward declaration of the ULocaleData structure. @stable ICU 3.6 */
+struct ULocaleData;
+
+/** A locale data object. @stable ICU 3.6 */
+typedef struct ULocaleData ULocaleData;
+
+
+
+/** The possible types of exemplar character sets.
+  * @stable ICU 3.4
+  */
+typedef enum ULocaleDataExemplarSetType  {
+     ULOCDATA_ES_STANDARD=0,      /* Basic set */
+     ULOCDATA_ES_AUXILIARY=1,     /* Auxiliary set */
+     ULOCDATA_ES_COUNT=2
+} ULocaleDataExemplarSetType;
+
+/** The possible types of delimiters.
+  * @stable ICU 3.4
+  */
+typedef enum ULocaleDataDelimiterType {
+    ULOCDATA_QUOTATION_START = 0,     /* Quotation start */
+     ULOCDATA_QUOTATION_END = 1,       /* Quotation end */
+     ULOCDATA_ALT_QUOTATION_START = 2, /* Alternate quotation start */
+     ULOCDATA_ALT_QUOTATION_END = 3,   /* Alternate quotation end */
+     ULOCDATA_DELIMITER_COUNT = 4
+} ULocaleDataDelimiterType;
+
+/**
+ * Opens a locale data object for the given locale
+ *
+ * @param localeID  Specifies the locale associated with this locale
+ *                  data object.
+ * @param status    Pointer to error status code.
+ * @stable ICU 3.4
+ */
+U_STABLE ULocaleData* U_EXPORT2 
+ulocdata_open(const char *localeID, UErrorCode *status);
+
+/**
+ * Closes a locale data object.
+ *
+ * @param uld       The locale data object to close
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2 
+ulocdata_close(ULocaleData *uld);
+
+/**
+ * Sets the "no Substitute" attribute of the locale data
+ * object.  If true, then any methods associated with the
+ * locale data object will return null when there is no
+ * data available for that method, given the locale ID
+ * supplied to ulocdata_open().
+ *
+ * @param uld       The locale data object to set.
+ * @param setting   Value of the "no substitute" attribute.
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2 
+ulocdata_setNoSubstitute(ULocaleData *uld, UBool setting);
+
+/**
+ * Retrieves the current "no Substitute" value of the locale data
+ * object.  If true, then any methods associated with the
+ * locale data object will return null when there is no
+ * data available for that method, given the locale ID
+ * supplied to ulocdata_open().
+ *
+ * @param uld       Pointer to the The locale data object to set.
+ * @return UBool    Value of the "no substitute" attribute.
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2 
+ulocdata_getNoSubstitute(ULocaleData *uld);
+
+/**
+ * Returns the set of exemplar characters for a locale.
+ *
+ * @param uld       Pointer to the locale data object from which the 
+ *                  exemplar character set is to be retrieved.
+ * @param fillIn    Pointer to a USet object to receive the 
+ *                  exemplar character set for the given locale.  Previous
+ *                  contents of fillIn are lost.  <em>If fillIn is NULL,
+ *                  then a new USet is created and returned.  The caller
+ *                  owns the result and must dispose of it by calling
+ *                  uset_close.</em>
+ * @param options   Bitmask for options to apply to the exemplar pattern.
+ *                  Specify zero to retrieve the exemplar set as it is
+ *                  defined in the locale data.  Specify
+ *                  USET_CASE_INSENSITIVE to retrieve a case-folded
+ *                  exemplar set.  See uset_applyPattern for a complete
+ *                  list of valid options.  The USET_IGNORE_SPACE bit is
+ *                  always set, regardless of the value of 'options'.
+ * @param extype    Specifies the type of exemplar set to be retrieved.
+ * @param status    Pointer to an input-output error code value;
+ *                  must not be NULL.
+ * @return USet*    Either fillIn, or if fillIn is NULL, a pointer to
+ *                  a newly-allocated USet that the user must close.
+ * @stable ICU 3.4
+ */
+U_STABLE USet* U_EXPORT2 
+ulocdata_getExemplarSet(ULocaleData *uld, USet *fillIn, 
+                        uint32_t options, ULocaleDataExemplarSetType extype, UErrorCode *status);
+
+/**
+ * Returns one of the delimiter strings associated with a locale.
+ *
+ * @param uld           Pointer to the locale data object from which the 
+ *                      delimiter string is to be retrieved.
+ * @param type          the type of delimiter to be retrieved.
+ * @param result        A pointer to a buffer to receive the result.
+ * @param resultLength  The maximum size of result.
+ * @param status        Pointer to an error code value
+ * @return int32_t      The total buffer size needed; if greater than resultLength,
+ *                      the output was truncated.
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2 
+ulocdata_getDelimiter(ULocaleData *uld, ULocaleDataDelimiterType type, UChar *result, int32_t resultLength, UErrorCode *status);
+
+/**
+ * Enumeration for representing the measurement systems.
+ * @stable ICU 2.8
+ */
+typedef enum UMeasurementSystem {
+    UMS_SI,     /** Measurement system specified by SI otherwise known as Metric system. */
+    UMS_US,     /** Measurement system followed in the United States of America. */ 
+    UMS_LIMIT
+} UMeasurementSystem;
+
+/**
+ * Returns the measurement system used in the locale specified by the localeID.
+ * Please note that this API will change in ICU 3.6 and will use an ulocdata object.
+ *
+ * @param localeID      The id of the locale for which the measurement system to be retrieved.
+ * @param status        Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return UMeasurementSystem the measurement system used in the locale.
+ * @stable ICU 2.8
+ */
+U_STABLE UMeasurementSystem U_EXPORT2
+ulocdata_getMeasurementSystem(const char *localeID, UErrorCode *status);
+
+/**
+ * Returns the element gives the normal business letter size, and customary units. 
+ * The units for the numbers are always in <em>milli-meters</em>.
+ * For US since 8.5 and 11 do not yeild an integral value when converted to milli-meters,
+ * the values are rounded off.
+ * So for A4 size paper the height and width are 297 mm and 210 mm repectively, 
+ * and for US letter size the height and width are 279 mm and 216 mm respectively.
+ * Please note that this API will change in ICU 3.6 and will use an ulocdata object.
+ *
+ * @param localeID      The id of the locale for which the paper size information to be retrieved.
+ * @param height        A pointer to int to recieve the height information.
+ * @param width         A pointer to int to recieve the width information.
+ * @param status        Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+ulocdata_getPaperSize(const char *localeID, int32_t *height, int32_t *width, UErrorCode *status);
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/umachine.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/umachine.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/umachine.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,338 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 1999-2006, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-******************************************************************************
-*   file name:  umachine.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 1999sep13
-*   created by: Markus W. Scherer
-*
-*   This file defines basic types and constants for utf.h to be
-*   platform-independent. umachine.h and utf.h are included into
-*   utypes.h to provide all the general definitions for ICU.
-*   All of these definitions used to be in utypes.h before
-*   the UTF-handling macros made this unmaintainable.
-*/
-
-#ifndef __UMACHINE_H__
-#define __UMACHINE_H__
-
-
-/**
- * \file
- * \brief Basic types and constants for UTF 
- * 
- * <h2> Basic types and constants for UTF </h2>
- *   This file defines basic types and constants for utf.h to be
- *   platform-independent. umachine.h and utf.h are included into
- *   utypes.h to provide all the general definitions for ICU.
- *   All of these definitions used to be in utypes.h before
- *   the UTF-handling macros made this unmaintainable.
- * 
- */
-/*==========================================================================*/
-/* Include platform-dependent definitions                                   */
-/* which are contained in the platform-specific file platform.h             */
-/*==========================================================================*/
-
-#if defined(U_PALMOS)
-#   include "unicode/ppalmos.h"
-#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-#   include "unicode/pwin32.h"
-#else
-#   include "unicode/platform.h"
-#endif
-
-/*
- * ANSI C headers:
- * stddef.h defines wchar_t
- */
-#include <stddef.h>
-
-/*==========================================================================*/
-/* XP_CPLUSPLUS is a cross-platform symbol which should be defined when     */
-/* using C++.  It should not be defined when compiling under C.             */
-/*==========================================================================*/
-
-#ifdef __cplusplus
-#   ifndef XP_CPLUSPLUS
-#       define XP_CPLUSPLUS
-#   endif
-#else
-#   undef XP_CPLUSPLUS
-#endif
-
-/*==========================================================================*/
-/* For C wrappers, we use the symbol U_STABLE.                                */
-/* This works properly if the includer is C or C++.                         */
-/* Functions are declared   U_STABLE return-type U_EXPORT2 function-name()... */
-/*==========================================================================*/
-
-/**
- * \def U_CFUNC
- * This is used in a declaration of a library private ICU C function.
- * @stable ICU 2.4
- */
-
-/**
- * \def U_CDECL_BEGIN
- * This is used to begin a declaration of a library private ICU C API.
- * @stable ICU 2.4
- */
-
-/**
- * \def U_CDECL_END
- * This is used to end a declaration of a library private ICU C API 
- * @stable ICU 2.4
- */
-
-#ifdef XP_CPLUSPLUS
-#   define U_CFUNC extern "C"
-#   define U_CDECL_BEGIN extern "C" {
-#   define U_CDECL_END   }
-#else
-#   define U_CFUNC extern
-#   define U_CDECL_BEGIN
-#   define U_CDECL_END
-#endif
-
-/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
-#define U_CAPI U_CFUNC U_EXPORT
-#define U_STABLE U_CAPI
-#define U_DRAFT  U_CAPI
-#define U_DEPRECATED U_CAPI
-#define U_OBSOLETE U_CAPI
-#define U_INTERNAL U_CAPI
-
-/*==========================================================================*/
-/* limits for int32_t etc., like in POSIX inttypes.h                        */
-/*==========================================================================*/
-
-#ifndef INT8_MIN
-/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
-#   define INT8_MIN        ((int8_t)(-128))
-#endif
-#ifndef INT16_MIN
-/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
-#   define INT16_MIN       ((int16_t)(-32767-1))
-#endif
-#ifndef INT32_MIN
-/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
-#   define INT32_MIN       ((int32_t)(-2147483647-1))
-#endif
-
-#ifndef INT8_MAX
-/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
-#   define INT8_MAX        ((int8_t)(127))
-#endif
-#ifndef INT16_MAX
-/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
-#   define INT16_MAX       ((int16_t)(32767))
-#endif
-#ifndef INT32_MAX
-/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
-#   define INT32_MAX       ((int32_t)(2147483647))
-#endif
-
-#ifndef UINT8_MAX
-/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
-#   define UINT8_MAX       ((uint8_t)(255U))
-#endif
-#ifndef UINT16_MAX
-/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
-#   define UINT16_MAX      ((uint16_t)(65535U))
-#endif
-#ifndef UINT32_MAX
-/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
-#   define UINT32_MAX      ((uint32_t)(4294967295U))
-#endif
-
-#if defined(U_INT64_T_UNAVAILABLE)
-# error int64_t is required for decimal format and rule-based number format.
-#else
-# ifndef INT64_C
-/**
- * Provides a platform independent way to specify a signed 64-bit integer constant.
- * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
- * @stable ICU 2.8
- */
-#   define INT64_C(c) c ## LL
-# endif
-# ifndef UINT64_C
-/**
- * Provides a platform independent way to specify an unsigned 64-bit integer constant.
- * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
- * @stable ICU 2.8
- */
-#   define UINT64_C(c) c ## ULL
-# endif
-# ifndef U_INT64_MIN
-/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
-#     define U_INT64_MIN       ((int64_t)(INT64_C(-9223372036854775807)-1))
-# endif
-# ifndef U_INT64_MAX
-/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
-#     define U_INT64_MAX       ((int64_t)(INT64_C(9223372036854775807)))
-# endif
-# ifndef U_UINT64_MAX
-/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
-#     define U_UINT64_MAX      ((uint64_t)(UINT64_C(18446744073709551615)))
-# endif
-#endif
-
-/*==========================================================================*/
-/* Boolean data type                                                        */
-/*==========================================================================*/
-
-/** The ICU boolean type @stable ICU 2.0 */
-typedef int8_t UBool;
-
-#ifndef TRUE
-/** The TRUE value of a UBool @stable ICU 2.0 */
-#   define TRUE  1
-#endif
-#ifndef FALSE
-/** The FALSE value of a UBool @stable ICU 2.0 */
-#   define FALSE 0
-#endif
-
-
-/*==========================================================================*/
-/* Unicode data types                                                       */
-/*==========================================================================*/
-
-/* wchar_t-related definitions -------------------------------------------- */
-
-/**
- * \def U_HAVE_WCHAR_H
- * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
- *
- * @stable ICU 2.0
- */
-#ifndef U_HAVE_WCHAR_H
-#   define U_HAVE_WCHAR_H 1
-#endif
-
-/**
- * \def U_SIZEOF_WCHAR_T
- * U_SIZEOF_WCHAR_T==sizeof(wchar_t) (0 means it is not defined or autoconf could not set it)
- *
- * @stable ICU 2.0
- */
-#if U_SIZEOF_WCHAR_T==0
-#   undef U_SIZEOF_WCHAR_T
-#   define U_SIZEOF_WCHAR_T 4
-#endif
-
-/*
- * \def U_WCHAR_IS_UTF16
- * Defined if wchar_t uses UTF-16.
- *
- * @stable ICU 2.0
- */
-/*
- * \def U_WCHAR_IS_UTF32
- * Defined if wchar_t uses UTF-32.
- *
- * @stable ICU 2.0
- */
-#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
-#   ifdef __STDC_ISO_10646__ 
-#       if (U_SIZEOF_WCHAR_T==2)
-#           define U_WCHAR_IS_UTF16
-#       elif (U_SIZEOF_WCHAR_T==4)
-#           define  U_WCHAR_IS_UTF32
-#       endif
-#   elif defined __UCS2__
-#       if (__OS390__ || __OS400__) && (U_SIZEOF_WCHAR_T==2)
-#           define U_WCHAR_IS_UTF16
-#       endif
-#   elif defined __UCS4__
-#       if (U_SIZEOF_WCHAR_T==4)
-#           define U_WCHAR_IS_UTF32
-#       endif
-#   elif defined(U_WINDOWS)
-#       define U_WCHAR_IS_UTF16    
-#   endif
-#endif
-
-/* UChar and UChar32 definitions -------------------------------------------- */
-
-/** Number of bytes in a UChar. @stable ICU 2.0 */
-#define U_SIZEOF_UCHAR 2
-
-/**
- * \var UChar
- * Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
- * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
- * This makes the definition of UChar platform-dependent
- * but allows direct string type compatibility with platforms with
- * 16-bit wchar_t types.
- *
- * @stable ICU 2.0
- */
-
-/* Define UChar to be compatible with wchar_t if possible. */
-#if U_SIZEOF_WCHAR_T==2
-    typedef wchar_t UChar;
-#else
-    typedef uint16_t UChar;
-#endif
-
-/**
- * Define UChar32 as a type for single Unicode code points.
- * UChar32 is a signed 32-bit integer (same as int32_t).
- *
- * The Unicode code point range is 0..0x10ffff.
- * All other values (negative or >=0x110000) are illegal as Unicode code points.
- * They may be used as sentinel values to indicate "done", "error"
- * or similar non-code point conditions.
- *
- * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
- * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
- * or else to be uint32_t.
- * That is, the definition of UChar32 was platform-dependent.
- *
- * @see U_SENTINEL
- * @stable ICU 2.4
- */
-typedef int32_t UChar32;
-
-/*==========================================================================*/
-/* U_INLINE and U_ALIGN_CODE   Set default values if these are not already  */
-/*                             defined.  Definitions normally are in        */
-/*                             platform.h or the corresponding file for     */
-/*                             the OS in use.                               */
-/*==========================================================================*/
-
-#ifndef U_HIDE_INTERNAL_API
-
-/**
- * \def U_ALIGN_CODE
- * This is used to align code fragments to a specific byte boundary.
- * This is useful for getting consistent performance test results.
- * @internal
- */
-#ifndef U_ALIGN_CODE
-#   define U_ALIGN_CODE(n)
-#endif
-
-#endif /* U_HIDE_INTERNAL_API */
-
-#ifndef U_INLINE
-#   ifdef XP_CPLUSPLUS
-#       define U_INLINE inline
-#   else
-#       define U_INLINE
-#   endif
-#endif
-
-#include "unicode/urename.h"
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/umachine.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/umachine.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/umachine.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/umachine.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,338 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1999-2006, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  umachine.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep13
+*   created by: Markus W. Scherer
+*
+*   This file defines basic types and constants for utf.h to be
+*   platform-independent. umachine.h and utf.h are included into
+*   utypes.h to provide all the general definitions for ICU.
+*   All of these definitions used to be in utypes.h before
+*   the UTF-handling macros made this unmaintainable.
+*/
+
+#ifndef __UMACHINE_H__
+#define __UMACHINE_H__
+
+
+/**
+ * \file
+ * \brief Basic types and constants for UTF 
+ * 
+ * <h2> Basic types and constants for UTF </h2>
+ *   This file defines basic types and constants for utf.h to be
+ *   platform-independent. umachine.h and utf.h are included into
+ *   utypes.h to provide all the general definitions for ICU.
+ *   All of these definitions used to be in utypes.h before
+ *   the UTF-handling macros made this unmaintainable.
+ * 
+ */
+/*==========================================================================*/
+/* Include platform-dependent definitions                                   */
+/* which are contained in the platform-specific file platform.h             */
+/*==========================================================================*/
+
+#if defined(U_PALMOS)
+#   include "unicode/ppalmos.h"
+#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+#   include "unicode/pwin32.h"
+#else
+#   include "unicode/platform.h"
+#endif
+
+/*
+ * ANSI C headers:
+ * stddef.h defines wchar_t
+ */
+#include <stddef.h>
+
+/*==========================================================================*/
+/* XP_CPLUSPLUS is a cross-platform symbol which should be defined when     */
+/* using C++.  It should not be defined when compiling under C.             */
+/*==========================================================================*/
+
+#ifdef __cplusplus
+#   ifndef XP_CPLUSPLUS
+#       define XP_CPLUSPLUS
+#   endif
+#else
+#   undef XP_CPLUSPLUS
+#endif
+
+/*==========================================================================*/
+/* For C wrappers, we use the symbol U_STABLE.                                */
+/* This works properly if the includer is C or C++.                         */
+/* Functions are declared   U_STABLE return-type U_EXPORT2 function-name()... */
+/*==========================================================================*/
+
+/**
+ * \def U_CFUNC
+ * This is used in a declaration of a library private ICU C function.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_BEGIN
+ * This is used to begin a declaration of a library private ICU C API.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_END
+ * This is used to end a declaration of a library private ICU C API 
+ * @stable ICU 2.4
+ */
+
+#ifdef XP_CPLUSPLUS
+#   define U_CFUNC extern "C"
+#   define U_CDECL_BEGIN extern "C" {
+#   define U_CDECL_END   }
+#else
+#   define U_CFUNC extern
+#   define U_CDECL_BEGIN
+#   define U_CDECL_END
+#endif
+
+/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
+#define U_CAPI U_CFUNC U_EXPORT
+#define U_STABLE U_CAPI
+#define U_DRAFT  U_CAPI
+#define U_DEPRECATED U_CAPI
+#define U_OBSOLETE U_CAPI
+#define U_INTERNAL U_CAPI
+
+/*==========================================================================*/
+/* limits for int32_t etc., like in POSIX inttypes.h                        */
+/*==========================================================================*/
+
+#ifndef INT8_MIN
+/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
+#   define INT8_MIN        ((int8_t)(-128))
+#endif
+#ifndef INT16_MIN
+/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
+#   define INT16_MIN       ((int16_t)(-32767-1))
+#endif
+#ifndef INT32_MIN
+/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
+#   define INT32_MIN       ((int32_t)(-2147483647-1))
+#endif
+
+#ifndef INT8_MAX
+/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
+#   define INT8_MAX        ((int8_t)(127))
+#endif
+#ifndef INT16_MAX
+/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
+#   define INT16_MAX       ((int16_t)(32767))
+#endif
+#ifndef INT32_MAX
+/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
+#   define INT32_MAX       ((int32_t)(2147483647))
+#endif
+
+#ifndef UINT8_MAX
+/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
+#   define UINT8_MAX       ((uint8_t)(255U))
+#endif
+#ifndef UINT16_MAX
+/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
+#   define UINT16_MAX      ((uint16_t)(65535U))
+#endif
+#ifndef UINT32_MAX
+/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
+#   define UINT32_MAX      ((uint32_t)(4294967295U))
+#endif
+
+#if defined(U_INT64_T_UNAVAILABLE)
+# error int64_t is required for decimal format and rule-based number format.
+#else
+# ifndef INT64_C
+/**
+ * Provides a platform independent way to specify a signed 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
+ * @stable ICU 2.8
+ */
+#   define INT64_C(c) c ## LL
+# endif
+# ifndef UINT64_C
+/**
+ * Provides a platform independent way to specify an unsigned 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
+ * @stable ICU 2.8
+ */
+#   define UINT64_C(c) c ## ULL
+# endif
+# ifndef U_INT64_MIN
+/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
+#     define U_INT64_MIN       ((int64_t)(INT64_C(-9223372036854775807)-1))
+# endif
+# ifndef U_INT64_MAX
+/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
+#     define U_INT64_MAX       ((int64_t)(INT64_C(9223372036854775807)))
+# endif
+# ifndef U_UINT64_MAX
+/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
+#     define U_UINT64_MAX      ((uint64_t)(UINT64_C(18446744073709551615)))
+# endif
+#endif
+
+/*==========================================================================*/
+/* Boolean data type                                                        */
+/*==========================================================================*/
+
+/** The ICU boolean type @stable ICU 2.0 */
+typedef int8_t UBool;
+
+#ifndef TRUE
+/** The TRUE value of a UBool @stable ICU 2.0 */
+#   define TRUE  1
+#endif
+#ifndef FALSE
+/** The FALSE value of a UBool @stable ICU 2.0 */
+#   define FALSE 0
+#endif
+
+
+/*==========================================================================*/
+/* Unicode data types                                                       */
+/*==========================================================================*/
+
+/* wchar_t-related definitions -------------------------------------------- */
+
+/**
+ * \def U_HAVE_WCHAR_H
+ * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
+ *
+ * @stable ICU 2.0
+ */
+#ifndef U_HAVE_WCHAR_H
+#   define U_HAVE_WCHAR_H 1
+#endif
+
+/**
+ * \def U_SIZEOF_WCHAR_T
+ * U_SIZEOF_WCHAR_T==sizeof(wchar_t) (0 means it is not defined or autoconf could not set it)
+ *
+ * @stable ICU 2.0
+ */
+#if U_SIZEOF_WCHAR_T==0
+#   undef U_SIZEOF_WCHAR_T
+#   define U_SIZEOF_WCHAR_T 4
+#endif
+
+/*
+ * \def U_WCHAR_IS_UTF16
+ * Defined if wchar_t uses UTF-16.
+ *
+ * @stable ICU 2.0
+ */
+/*
+ * \def U_WCHAR_IS_UTF32
+ * Defined if wchar_t uses UTF-32.
+ *
+ * @stable ICU 2.0
+ */
+#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
+#   ifdef __STDC_ISO_10646__ 
+#       if (U_SIZEOF_WCHAR_T==2)
+#           define U_WCHAR_IS_UTF16
+#       elif (U_SIZEOF_WCHAR_T==4)
+#           define  U_WCHAR_IS_UTF32
+#       endif
+#   elif defined __UCS2__
+#       if (__OS390__ || __OS400__) && (U_SIZEOF_WCHAR_T==2)
+#           define U_WCHAR_IS_UTF16
+#       endif
+#   elif defined __UCS4__
+#       if (U_SIZEOF_WCHAR_T==4)
+#           define U_WCHAR_IS_UTF32
+#       endif
+#   elif defined(U_WINDOWS)
+#       define U_WCHAR_IS_UTF16    
+#   endif
+#endif
+
+/* UChar and UChar32 definitions -------------------------------------------- */
+
+/** Number of bytes in a UChar. @stable ICU 2.0 */
+#define U_SIZEOF_UCHAR 2
+
+/**
+ * \var UChar
+ * Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
+ * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
+ * This makes the definition of UChar platform-dependent
+ * but allows direct string type compatibility with platforms with
+ * 16-bit wchar_t types.
+ *
+ * @stable ICU 2.0
+ */
+
+/* Define UChar to be compatible with wchar_t if possible. */
+#if U_SIZEOF_WCHAR_T==2
+    typedef wchar_t UChar;
+#else
+    typedef uint16_t UChar;
+#endif
+
+/**
+ * Define UChar32 as a type for single Unicode code points.
+ * UChar32 is a signed 32-bit integer (same as int32_t).
+ *
+ * The Unicode code point range is 0..0x10ffff.
+ * All other values (negative or >=0x110000) are illegal as Unicode code points.
+ * They may be used as sentinel values to indicate "done", "error"
+ * or similar non-code point conditions.
+ *
+ * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
+ * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
+ * or else to be uint32_t.
+ * That is, the definition of UChar32 was platform-dependent.
+ *
+ * @see U_SENTINEL
+ * @stable ICU 2.4
+ */
+typedef int32_t UChar32;
+
+/*==========================================================================*/
+/* U_INLINE and U_ALIGN_CODE   Set default values if these are not already  */
+/*                             defined.  Definitions normally are in        */
+/*                             platform.h or the corresponding file for     */
+/*                             the OS in use.                               */
+/*==========================================================================*/
+
+#ifndef U_HIDE_INTERNAL_API
+
+/**
+ * \def U_ALIGN_CODE
+ * This is used to align code fragments to a specific byte boundary.
+ * This is useful for getting consistent performance test results.
+ * @internal
+ */
+#ifndef U_ALIGN_CODE
+#   define U_ALIGN_CODE(n)
+#endif
+
+#endif /* U_HIDE_INTERNAL_API */
+
+#ifndef U_INLINE
+#   ifdef XP_CPLUSPLUS
+#       define U_INLINE inline
+#   else
+#       define U_INLINE
+#   endif
+#endif
+
+#include "unicode/urename.h"
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/umisc.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/umisc.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/umisc.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,60 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1999-2006, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*   file name:  umisc.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 1999oct15
-*   created by: Markus W. Scherer
-*/
-
-#ifndef UMISC_H
-#define UMISC_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief  C API:misc definitions 
- *
- *  This file contains miscellaneous definitions for the C APIs. 
- */
-
-U_CDECL_BEGIN
-
-/** A struct representing a range of text containing a specific field 
- *  @stable ICU 2.0
- */
-typedef struct UFieldPosition {
-  /**
-   * The field 
-   * @stable ICU 2.0
-   */
-  int32_t field;
-  /**
-   * The start of the text range containing field 
-   * @stable ICU 2.0
-   */
-  int32_t beginIndex;
-  /** 
-   * The limit of the text range containing field 
-   * @stable ICU 2.0
-   */
-  int32_t endIndex;
-} UFieldPosition;
-
-#if !UCONFIG_NO_SERVICE
-/**
- * Opaque type returned by registerInstance, registerFactory and unregister for service registration.
- * @stable ICU 2.6
- */
-typedef const void* URegistryKey;
-#endif
-
-U_CDECL_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/umisc.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/umisc.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/umisc.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/umisc.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,60 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999-2006, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  umisc.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999oct15
+*   created by: Markus W. Scherer
+*/
+
+#ifndef UMISC_H
+#define UMISC_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief  C API:misc definitions 
+ *
+ *  This file contains miscellaneous definitions for the C APIs. 
+ */
+
+U_CDECL_BEGIN
+
+/** A struct representing a range of text containing a specific field 
+ *  @stable ICU 2.0
+ */
+typedef struct UFieldPosition {
+  /**
+   * The field 
+   * @stable ICU 2.0
+   */
+  int32_t field;
+  /**
+   * The start of the text range containing field 
+   * @stable ICU 2.0
+   */
+  int32_t beginIndex;
+  /** 
+   * The limit of the text range containing field 
+   * @stable ICU 2.0
+   */
+  int32_t endIndex;
+} UFieldPosition;
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * Opaque type returned by registerInstance, registerFactory and unregister for service registration.
+ * @stable ICU 2.6
+ */
+typedef const void* URegistryKey;
+#endif
+
+U_CDECL_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/umsg.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/umsg.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/umsg.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,647 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 1996-2006, International Business Machines Corporation
-* and others. All Rights Reserved.
-*******************************************************************************
-*
-*   file name:  umsg.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   Change history:
-*
-*   08/5/2001  Ram         Added C wrappers for C++ API.
-*                          
-*
-*/
-
-#ifndef UMSG_H
-#define UMSG_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/uloc.h"
-#include "unicode/parseerr.h"
-#include <stdarg.h>
-/**
- * \file
- * \brief C API: MessageFormat
- *
- * <h2>Message Format C API </h2>
- *
- * Provides means to produce concatenated messages in language-neutral way.
- * Use this for all concatenations that show up to end users.
- * <P>
- * Takes a set of objects, formats them, then inserts the formatted
- * strings into the pattern at the appropriate places.
- * <P>
- * Here are some examples of usage:
- * Example 1:
- * <pre>
- * \code
- *     UChar *result, *tzID, *str;
- *     UChar pattern[100];
- *     int32_t resultLengthOut, resultlength;
- *     UCalendar *cal;
- *     UDate d1;
- *     UDateFormat *def1;
- *     UErrorCode status = U_ZERO_ERROR;
- *
- *     str=(UChar*)malloc(sizeof(UChar) * (strlen("disturbance in force") +1));
- *     u_uastrcpy(str, "disturbance in force");
- *     tzID=(UChar*)malloc(sizeof(UChar) * 4);
- *     u_uastrcpy(tzID, "PST");
- *     cal=ucal_open(tzID, u_strlen(tzID), "en_US", UCAL_TRADITIONAL, &status);
- *     ucal_setDateTime(cal, 1999, UCAL_MARCH, 18, 0, 0, 0, &status);
- *     d1=ucal_getMillis(cal, &status);
- *     u_uastrcpy(pattern, "On {0, date, long}, there was a {1} on planet {2,number,integer}");
- *     resultlength=0;
- *     resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, d1, str, 7);
- *     if(status==U_BUFFER_OVERFLOW_ERROR){
- *         status=U_ZERO_ERROR;
- *         resultlength=resultLengthOut+1;
- *         result=(UChar*)realloc(result, sizeof(UChar) * resultlength);
- *         u_formatMessage( "en_US", pattern, u_strlen(pattern), result, resultlength, &status, d1, str, 7);
- *     }
- *     printf("%s\n", austrdup(result) );//austrdup( a function used to convert UChar* to char*)
- *     //output>: "On March 18, 1999, there was a disturbance in force on planet 7
- * \endcode
- * </pre>
- * Typically, the message format will come from resources, and the
- * arguments will be dynamically set at runtime.
- * <P>
- * Example 2:
- * <pre>
- * \code
- *     UChar* str;
- *     UErrorCode status = U_ZERO_ERROR;
- *     UChar *result;
- *     UChar pattern[100];
- *     int32_t resultlength, resultLengthOut, i;
- *     double testArgs= { 100.0, 1.0, 0.0};
- *
- *     str=(UChar*)malloc(sizeof(UChar) * 10);
- *     u_uastrcpy(str, "MyDisk");
- *     u_uastrcpy(pattern, "The disk {1} contains {0,choice,0#no files|1#one file|1<{0,number,integer} files}");
- *     for(i=0; i<3; i++){
- *       resultlength=0;
- *       resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, testArgs[i], str);
- *       if(status==U_BUFFER_OVERFLOW_ERROR){
- *         status=U_ZERO_ERROR;
- *         resultlength=resultLengthOut+1;
- *         result=(UChar*)malloc(sizeof(UChar) * resultlength);
- *         u_formatMessage( "en_US", pattern, u_strlen(pattern), result, resultlength, &status, testArgs[i], str);
- *       }
- *       printf("%s\n", austrdup(result) );  //austrdup( a function used to convert UChar* to char*)
- *       free(result);
- *     }
- *     // output, with different testArgs:
- *     // output: The disk "MyDisk" contains 100 files.
- *     // output: The disk "MyDisk" contains one file.
- *     // output: The disk "MyDisk" contains no files.
- * \endcode
- *  </pre>
- *
- *  The pattern is of the following form.  Legend:
- *  <pre>
- * \code
- *       {optional item}
- *       (group that may be repeated)*
- * \endcode
- *  </pre>
- *  Do not confuse optional items with items inside quotes braces, such
- *  as this: "{".  Quoted braces are literals.
- *  <pre>
- * \code
- *       messageFormatPattern := string ( "{" messageFormatElement "}" string )*
- *
- *       messageFormatElement := argument { "," elementFormat }
- *
- *       elementFormat := "time" { "," datetimeStyle }
- *                      | "date" { "," datetimeStyle }
- *                      | "number" { "," numberStyle }
- *                      | "choice" "," choiceStyle
- *
- *       datetimeStyle := "short"
- *                      | "medium"
- *                      | "long"
- *                      | "full"
- *                      | dateFormatPattern
- *
- *       numberStyle :=   "currency"
- *                      | "percent"
- *                      | "integer"
- *                      | numberFormatPattern
- *
- *       choiceStyle :=   choiceFormatPattern
- * \endcode
- * </pre>
- * If there is no elementFormat, then the argument must be a string,
- * which is substituted. If there is no dateTimeStyle or numberStyle,
- * then the default format is used (e.g.  NumberFormat.getInstance(),
- * DateFormat.getDefaultTime() or DateFormat.getDefaultDate(). For
- * a ChoiceFormat, the pattern must always be specified, since there
- * is no default.
- * <P>
- * In strings, single quotes can be used to quote the "{" sign if
- * necessary. A real single quote is represented by ''.  Inside a
- * messageFormatElement, quotes are [not] removed. For example,
- * {1,number,$'#',##} will produce a number format with the pound-sign
- * quoted, with a result such as: "$#31,45".
- * <P>
- * If a pattern is used, then unquoted braces in the pattern, if any,
- * must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab
- * {0'}' de" and "ab } de" are not.
- * <p>
- * <dl><dt><b>Warning:</b><dd>The rules for using quotes within message
- * format patterns unfortunately have shown to be somewhat confusing.
- * In particular, it isn't always obvious to localizers whether single
- * quotes need to be doubled or not. Make sure to inform localizers about
- * the rules, and tell them (for example, by using comments in resource
- * bundle source files) which strings will be processed by MessageFormat.
- * Note that localizers may need to use single quotes in translated
- * strings where the original version doesn't have them.
- * <br>Note also that the simplest way to avoid the problem is to
- * use the real apostrophe (single quote) character U+2019 (') for
- * human-readable text, and to use the ASCII apostrophe (U+0027 ' )
- * only in program syntax, like quoting in MessageFormat.
- * See the annotations for U+0027 Apostrophe in The Unicode Standard.</p>
- * </dl>
- * <P>
- * The argument is a number from 0 to 9, which corresponds to the
- * arguments presented in an array to be formatted.
- * <P>
- * It is ok to have unused arguments in the array.  With missing
- * arguments or arguments that are not of the right class for the
- * specified format, a failing UErrorCode result is set.
- * <P>
-
- * <P>
- * [Note:] As we see above, the string produced by a choice Format in
- * MessageFormat is treated specially; occurances of '{' are used to
- * indicated subformats.
- * <P>
- * [Note:] Formats are numbered by order of variable in the string.
- * This is [not] the same as the argument numbering!
- * <pre>
- * \code
- *    For example: with "abc{2}def{3}ghi{0}...",
- *
- *    format0 affects the first variable {2}
- *    format1 affects the second variable {3}
- *    format2 affects the second variable {0}
- * \endcode
- * </pre>
- * and so on.
- */
-
-/**
- * Format a message for a locale.
- * This function may perform re-ordering of the arguments depending on the
- * locale. For all numeric arguments, double is assumed unless the type is
- * explicitly integer.  All choice format arguments must be of type double.
- * @param locale The locale for which the message will be formatted
- * @param pattern The pattern specifying the message's format
- * @param patternLength The length of pattern
- * @param result A pointer to a buffer to receive the formatted message.
- * @param resultLength The maximum size of result.
- * @param status A pointer to an UErrorCode to receive any errors
- * @param ... A variable-length argument list containing the arguments specified
- * in pattern.
- * @return The total buffer size needed; if greater than resultLength, the
- * output was truncated.
- * @see u_parseMessage
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-u_formatMessage(const char  *locale,
-                 const UChar *pattern,
-                int32_t     patternLength,
-                UChar       *result,
-                int32_t     resultLength,
-                UErrorCode  *status,
-                ...);
-
-/**
- * Format a message for a locale.
- * This function may perform re-ordering of the arguments depending on the
- * locale. For all numeric arguments, double is assumed unless the type is
- * explicitly integer.  All choice format arguments must be of type double.
- * @param locale The locale for which the message will be formatted
- * @param pattern The pattern specifying the message's format
- * @param patternLength The length of pattern
- * @param result A pointer to a buffer to receive the formatted message.
- * @param resultLength The maximum size of result.
- * @param ap A variable-length argument list containing the arguments specified
- * @param status A pointer to an UErrorCode to receive any errors
- * in pattern.
- * @return The total buffer size needed; if greater than resultLength, the
- * output was truncated.
- * @see u_parseMessage
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-u_vformatMessage(   const char  *locale,
-                    const UChar *pattern,
-                    int32_t     patternLength,
-                    UChar       *result,
-                    int32_t     resultLength,
-                    va_list     ap,
-                    UErrorCode  *status);
-
-/**
- * Parse a message.
- * For numeric arguments, this function will always use doubles.  Integer types
- * should not be passed.
- * This function is not able to parse all output from {@link #u_formatMessage }.
- * @param locale The locale for which the message is formatted
- * @param pattern The pattern specifying the message's format
- * @param patternLength The length of pattern
- * @param source The text to parse.
- * @param sourceLength The length of source, or -1 if null-terminated.
- * @param status A pointer to an UErrorCode to receive any errors
- * @param ... A variable-length argument list containing the arguments
- * specified in pattern.
- * @see u_formatMessage
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-u_parseMessage( const char   *locale,
-                const UChar  *pattern,
-                int32_t      patternLength,
-                const UChar  *source,
-                int32_t      sourceLength,
-                UErrorCode   *status,
-                ...);
-
-/**
- * Parse a message.
- * For numeric arguments, this function will always use doubles.  Integer types
- * should not be passed.
- * This function is not able to parse all output from {@link #u_formatMessage }.
- * @param locale The locale for which the message is formatted
- * @param pattern The pattern specifying the message's format
- * @param patternLength The length of pattern
- * @param source The text to parse.
- * @param sourceLength The length of source, or -1 if null-terminated.
- * @param ap A variable-length argument list containing the arguments
- * @param status A pointer to an UErrorCode to receive any errors
- * specified in pattern.
- * @see u_formatMessage
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-u_vparseMessage(const char  *locale,
-                const UChar *pattern,
-                int32_t     patternLength,
-                const UChar *source,
-                int32_t     sourceLength,
-                va_list     ap,
-                UErrorCode  *status);
-
-/**
- * Format a message for a locale.
- * This function may perform re-ordering of the arguments depending on the
- * locale. For all numeric arguments, double is assumed unless the type is
- * explicitly integer.  All choice format arguments must be of type double.
- * @param locale The locale for which the message will be formatted
- * @param pattern The pattern specifying the message's format
- * @param patternLength The length of pattern
- * @param result A pointer to a buffer to receive the formatted message.
- * @param resultLength The maximum size of result.
- * @param status A pointer to an UErrorCode to receive any errors
- * @param ... A variable-length argument list containing the arguments specified
- * in pattern.
- * @param parseError  A pointer to UParseError to receive information about errors
- *                     occurred during parsing.
- * @return The total buffer size needed; if greater than resultLength, the
- * output was truncated.
- * @see u_parseMessage
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-u_formatMessageWithError(   const char    *locale,
-                            const UChar   *pattern,
-                            int32_t       patternLength,
-                            UChar         *result,
-                            int32_t       resultLength,
-                            UParseError   *parseError,
-                            UErrorCode    *status,
-                            ...);
-
-/**
- * Format a message for a locale.
- * This function may perform re-ordering of the arguments depending on the
- * locale. For all numeric arguments, double is assumed unless the type is
- * explicitly integer.  All choice format arguments must be of type double.
- * @param locale The locale for which the message will be formatted
- * @param pattern The pattern specifying the message's format
- * @param patternLength The length of pattern
- * @param result A pointer to a buffer to receive the formatted message.
- * @param resultLength The maximum size of result.
- * @param parseError  A pointer to UParseError to receive information about errors
- *                    occurred during parsing.
- * @param ap A variable-length argument list containing the arguments specified
- * @param status A pointer to an UErrorCode to receive any errors
- * in pattern.
- * @return The total buffer size needed; if greater than resultLength, the
- * output was truncated.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-u_vformatMessageWithError(  const char   *locale,
-                            const UChar  *pattern,
-                            int32_t      patternLength,
-                            UChar        *result,
-                            int32_t      resultLength,
-                            UParseError* parseError,
-                            va_list      ap,
-                            UErrorCode   *status);
-
-/**
- * Parse a message.
- * For numeric arguments, this function will always use doubles.  Integer types
- * should not be passed.
- * This function is not able to parse all output from {@link #u_formatMessage }.
- * @param locale The locale for which the message is formatted
- * @param pattern The pattern specifying the message's format
- * @param patternLength The length of pattern
- * @param source The text to parse.
- * @param sourceLength The length of source, or -1 if null-terminated.
- * @param parseError  A pointer to UParseError to receive information about errors
- *                     occurred during parsing.
- * @param status A pointer to an UErrorCode to receive any errors
- * @param ... A variable-length argument list containing the arguments
- * specified in pattern.
- * @see u_formatMessage
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-u_parseMessageWithError(const char  *locale,
-                        const UChar *pattern,
-                        int32_t     patternLength,
-                        const UChar *source,
-                        int32_t     sourceLength,
-                        UParseError *parseError,
-                        UErrorCode  *status,
-                        ...);
-
-/**
- * Parse a message.
- * For numeric arguments, this function will always use doubles.  Integer types
- * should not be passed.
- * This function is not able to parse all output from {@link #u_formatMessage }.
- * @param locale The locale for which the message is formatted
- * @param pattern The pattern specifying the message's format
- * @param patternLength The length of pattern
- * @param source The text to parse.
- * @param sourceLength The length of source, or -1 if null-terminated.
- * @param ap A variable-length argument list containing the arguments
- * @param parseError  A pointer to UParseError to receive information about errors
- *                     occurred during parsing.
- * @param status A pointer to an UErrorCode to receive any errors
- * specified in pattern.
- * @see u_formatMessage
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-u_vparseMessageWithError(const char  *locale,
-                         const UChar *pattern,
-                         int32_t     patternLength,
-                         const UChar *source,
-                         int32_t     sourceLength,
-                         va_list     ap,
-                         UParseError *parseError,
-                         UErrorCode* status);
-
-/*----------------------- New experimental API --------------------------- */
-/** 
- * The message format object
- * @stable ICU 2.0
- */
-typedef void* UMessageFormat;
-
-
-/**
- * Open a message formatter with given pattern and for the given locale.
- * @param pattern       A pattern specifying the format to use.
- * @param patternLength Length of the pattern to use
- * @param locale        The locale for which the messages are formatted.
- * @param parseError    A pointer to UParseError struct to receive any errors 
- *                      occured during parsing. Can be NULL.
- * @param status        A pointer to an UErrorCode to receive any errors.
- * @return              A pointer to a UMessageFormat to use for formatting 
- *                      messages, or 0 if an error occurred. 
- * @stable ICU 2.0
- */
-U_STABLE UMessageFormat* U_EXPORT2 
-umsg_open(  const UChar     *pattern,
-            int32_t         patternLength,
-            const  char     *locale,
-            UParseError     *parseError,
-            UErrorCode      *status);
-
-/**
- * Close a UMessageFormat.
- * Once closed, a UMessageFormat may no longer be used.
- * @param format The formatter to close.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-umsg_close(UMessageFormat* format);
-
-/**
- * Open a copy of a UMessageFormat.
- * This function performs a deep copy.
- * @param fmt The formatter to copy
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return A pointer to a UDateFormat identical to fmt.
- * @stable ICU 2.0
- */
-U_STABLE UMessageFormat U_EXPORT2 
-umsg_clone(const UMessageFormat *fmt,
-           UErrorCode *status);
-
-/**
- * Sets the locale. This locale is used for fetching default number or date
- * format information.
- * @param fmt The formatter to set
- * @param locale The locale the formatter should use.
- * @stable ICU 2.0
- */
-U_STABLE void  U_EXPORT2 
-umsg_setLocale(UMessageFormat *fmt,
-               const char* locale);
-
-/**
- * Gets the locale. This locale is used for fetching default number or date
- * format information.
- * @param fmt The formatter to querry
- * @return the locale.
- * @stable ICU 2.0
- */
-U_STABLE const char*  U_EXPORT2 
-umsg_getLocale(const UMessageFormat *fmt);
-
-/**
- * Sets the pattern.
- * @param fmt           The formatter to use
- * @param pattern       The pattern to be applied.
- * @param patternLength Length of the pattern to use
- * @param parseError    Struct to receive information on position 
- *                      of error if an error is encountered.Can be NULL.
- * @param status        Output param set to success/failure code on
- *                      exit. If the pattern is invalid, this will be
- *                      set to a failure result.
- * @stable ICU 2.0
- */
-U_STABLE void  U_EXPORT2 
-umsg_applyPattern( UMessageFormat *fmt,
-                   const UChar* pattern,
-                   int32_t patternLength,
-                   UParseError* parseError,
-                   UErrorCode* status);
-
-/**
- * Gets the pattern.
- * @param fmt          The formatter to use
- * @param result       A pointer to a buffer to receive the pattern.
- * @param resultLength The maximum size of result.
- * @param status       Output param set to success/failure code on
- *                     exit. If the pattern is invalid, this will be
- *                     set to a failure result.  
- * @return the pattern of the format
- * @stable ICU 2.0
- */
-U_STABLE int32_t  U_EXPORT2 
-umsg_toPattern(const UMessageFormat *fmt,
-               UChar* result, 
-               int32_t resultLength,
-               UErrorCode* status);
-
-/**
- * Format a message for a locale.
- * This function may perform re-ordering of the arguments depending on the
- * locale. For all numeric arguments, double is assumed unless the type is
- * explicitly integer.  All choice format arguments must be of type double.
- * @param fmt           The formatter to use
- * @param result        A pointer to a buffer to receive the formatted message.
- * @param resultLength  The maximum size of result.
- * @param status        A pointer to an UErrorCode to receive any errors
- * @param ...           A variable-length argument list containing the arguments 
- *                      specified in pattern.
- * @return              The total buffer size needed; if greater than resultLength, 
- *                      the output was truncated.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-umsg_format(    const UMessageFormat *fmt,
-                UChar          *result,
-                int32_t        resultLength,
-                UErrorCode     *status,
-                ...);
-
-/**
- * Format a message for a locale.
- * This function may perform re-ordering of the arguments depending on the
- * locale. For all numeric arguments, double is assumed unless the type is
- * explicitly integer.  All choice format arguments must be of type double.
- * @param fmt          The formatter to use 
- * @param result       A pointer to a buffer to receive the formatted message.
- * @param resultLength The maximum size of result.
- * @param ap           A variable-length argument list containing the arguments 
- * @param status       A pointer to an UErrorCode to receive any errors
- *                     specified in pattern.
- * @return             The total buffer size needed; if greater than resultLength, 
- *                     the output was truncated.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-umsg_vformat(   const UMessageFormat *fmt,
-                UChar          *result,
-                int32_t        resultLength,
-                va_list        ap,
-                UErrorCode     *status);
-
-/**
- * Parse a message.
- * For numeric arguments, this function will always use doubles.  Integer types
- * should not be passed.
- * This function is not able to parse all output from {@link #umsg_format }.
- * @param fmt           The formatter to use 
- * @param source        The text to parse.
- * @param sourceLength  The length of source, or -1 if null-terminated.
- * @param count         Output param to receive number of elements returned.
- * @param status        A pointer to an UErrorCode to receive any errors
- * @param ...           A variable-length argument list containing the arguments
- *                      specified in pattern.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-umsg_parse( const UMessageFormat *fmt,
-            const UChar    *source,
-            int32_t        sourceLength,
-            int32_t        *count,
-            UErrorCode     *status,
-            ...);
-
-/**
- * Parse a message.
- * For numeric arguments, this function will always use doubles.  Integer types
- * should not be passed.
- * This function is not able to parse all output from {@link #umsg_format }.
- * @param fmt           The formatter to use 
- * @param source        The text to parse.
- * @param sourceLength  The length of source, or -1 if null-terminated.
- * @param count         Output param to receive number of elements returned.
- * @param ap            A variable-length argument list containing the arguments
- * @param status        A pointer to an UErrorCode to receive any errors
- *                      specified in pattern.
- * @see u_formatMessage
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-umsg_vparse(const UMessageFormat *fmt,
-            const UChar    *source,
-            int32_t        sourceLength,
-            int32_t        *count,
-            va_list        ap,
-            UErrorCode     *status);
-
-
-/**
- * Convert an 'apostrophe-friendly' pattern into a standard
- * pattern.  Standard patterns treat all apostrophes as
- * quotes, which is problematic in some languages, e.g. 
- * French, where apostrophe is commonly used.  This utility
- * assumes that only an unpaired apostrophe immediately before
- * a brace is a true quote.  Other unpaired apostrophes are paired,
- * and the resulting standard pattern string is returned.
- *
- * <p><b>Note</b> it is not guaranteed that the returned pattern
- * is indeed a valid pattern.  The only effect is to convert
- * between patterns having different quoting semantics.
- *
- * @param pattern the 'apostrophe-friendly' patttern to convert
- * @param patternLength the length of pattern, or -1 if unknown and pattern is null-terminated
- * @param dest the buffer for the result, or NULL if preflight only
- * @param destCapacity the length of the buffer, or 0 if preflighting
- * @param ec the error code
- * @return the length of the resulting text, not including trailing null
- *        if buffer has room for the trailing null, it is provided, otherwise
- *        not
- * @stable ICU 3.4
- */
-U_STABLE int32_t U_EXPORT2 
-umsg_autoQuoteApostrophe(const UChar* pattern, 
-                         int32_t patternLength,
-                         UChar* dest,
-                         int32_t destCapacity,
-                         UErrorCode* ec);
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/umsg.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/umsg.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/umsg.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/umsg.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,647 @@
+/*
+*******************************************************************************
+* Copyright (C) 1996-2006, International Business Machines Corporation
+* and others. All Rights Reserved.
+*******************************************************************************
+*
+*   file name:  umsg.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   Change history:
+*
+*   08/5/2001  Ram         Added C wrappers for C++ API.
+*                          
+*
+*/
+
+#ifndef UMSG_H
+#define UMSG_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/uloc.h"
+#include "unicode/parseerr.h"
+#include <stdarg.h>
+/**
+ * \file
+ * \brief C API: MessageFormat
+ *
+ * <h2>Message Format C API </h2>
+ *
+ * Provides means to produce concatenated messages in language-neutral way.
+ * Use this for all concatenations that show up to end users.
+ * <P>
+ * Takes a set of objects, formats them, then inserts the formatted
+ * strings into the pattern at the appropriate places.
+ * <P>
+ * Here are some examples of usage:
+ * Example 1:
+ * <pre>
+ * \code
+ *     UChar *result, *tzID, *str;
+ *     UChar pattern[100];
+ *     int32_t resultLengthOut, resultlength;
+ *     UCalendar *cal;
+ *     UDate d1;
+ *     UDateFormat *def1;
+ *     UErrorCode status = U_ZERO_ERROR;
+ *
+ *     str=(UChar*)malloc(sizeof(UChar) * (strlen("disturbance in force") +1));
+ *     u_uastrcpy(str, "disturbance in force");
+ *     tzID=(UChar*)malloc(sizeof(UChar) * 4);
+ *     u_uastrcpy(tzID, "PST");
+ *     cal=ucal_open(tzID, u_strlen(tzID), "en_US", UCAL_TRADITIONAL, &status);
+ *     ucal_setDateTime(cal, 1999, UCAL_MARCH, 18, 0, 0, 0, &status);
+ *     d1=ucal_getMillis(cal, &status);
+ *     u_uastrcpy(pattern, "On {0, date, long}, there was a {1} on planet {2,number,integer}");
+ *     resultlength=0;
+ *     resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, d1, str, 7);
+ *     if(status==U_BUFFER_OVERFLOW_ERROR){
+ *         status=U_ZERO_ERROR;
+ *         resultlength=resultLengthOut+1;
+ *         result=(UChar*)realloc(result, sizeof(UChar) * resultlength);
+ *         u_formatMessage( "en_US", pattern, u_strlen(pattern), result, resultlength, &status, d1, str, 7);
+ *     }
+ *     printf("%s\n", austrdup(result) );//austrdup( a function used to convert UChar* to char*)
+ *     //output>: "On March 18, 1999, there was a disturbance in force on planet 7
+ * \endcode
+ * </pre>
+ * Typically, the message format will come from resources, and the
+ * arguments will be dynamically set at runtime.
+ * <P>
+ * Example 2:
+ * <pre>
+ * \code
+ *     UChar* str;
+ *     UErrorCode status = U_ZERO_ERROR;
+ *     UChar *result;
+ *     UChar pattern[100];
+ *     int32_t resultlength, resultLengthOut, i;
+ *     double testArgs= { 100.0, 1.0, 0.0};
+ *
+ *     str=(UChar*)malloc(sizeof(UChar) * 10);
+ *     u_uastrcpy(str, "MyDisk");
+ *     u_uastrcpy(pattern, "The disk {1} contains {0,choice,0#no files|1#one file|1<{0,number,integer} files}");
+ *     for(i=0; i<3; i++){
+ *       resultlength=0;
+ *       resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, testArgs[i], str);
+ *       if(status==U_BUFFER_OVERFLOW_ERROR){
+ *         status=U_ZERO_ERROR;
+ *         resultlength=resultLengthOut+1;
+ *         result=(UChar*)malloc(sizeof(UChar) * resultlength);
+ *         u_formatMessage( "en_US", pattern, u_strlen(pattern), result, resultlength, &status, testArgs[i], str);
+ *       }
+ *       printf("%s\n", austrdup(result) );  //austrdup( a function used to convert UChar* to char*)
+ *       free(result);
+ *     }
+ *     // output, with different testArgs:
+ *     // output: The disk "MyDisk" contains 100 files.
+ *     // output: The disk "MyDisk" contains one file.
+ *     // output: The disk "MyDisk" contains no files.
+ * \endcode
+ *  </pre>
+ *
+ *  The pattern is of the following form.  Legend:
+ *  <pre>
+ * \code
+ *       {optional item}
+ *       (group that may be repeated)*
+ * \endcode
+ *  </pre>
+ *  Do not confuse optional items with items inside quotes braces, such
+ *  as this: "{".  Quoted braces are literals.
+ *  <pre>
+ * \code
+ *       messageFormatPattern := string ( "{" messageFormatElement "}" string )*
+ *
+ *       messageFormatElement := argument { "," elementFormat }
+ *
+ *       elementFormat := "time" { "," datetimeStyle }
+ *                      | "date" { "," datetimeStyle }
+ *                      | "number" { "," numberStyle }
+ *                      | "choice" "," choiceStyle
+ *
+ *       datetimeStyle := "short"
+ *                      | "medium"
+ *                      | "long"
+ *                      | "full"
+ *                      | dateFormatPattern
+ *
+ *       numberStyle :=   "currency"
+ *                      | "percent"
+ *                      | "integer"
+ *                      | numberFormatPattern
+ *
+ *       choiceStyle :=   choiceFormatPattern
+ * \endcode
+ * </pre>
+ * If there is no elementFormat, then the argument must be a string,
+ * which is substituted. If there is no dateTimeStyle or numberStyle,
+ * then the default format is used (e.g.  NumberFormat.getInstance(),
+ * DateFormat.getDefaultTime() or DateFormat.getDefaultDate(). For
+ * a ChoiceFormat, the pattern must always be specified, since there
+ * is no default.
+ * <P>
+ * In strings, single quotes can be used to quote the "{" sign if
+ * necessary. A real single quote is represented by ''.  Inside a
+ * messageFormatElement, quotes are [not] removed. For example,
+ * {1,number,$'#',##} will produce a number format with the pound-sign
+ * quoted, with a result such as: "$#31,45".
+ * <P>
+ * If a pattern is used, then unquoted braces in the pattern, if any,
+ * must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab
+ * {0'}' de" and "ab } de" are not.
+ * <p>
+ * <dl><dt><b>Warning:</b><dd>The rules for using quotes within message
+ * format patterns unfortunately have shown to be somewhat confusing.
+ * In particular, it isn't always obvious to localizers whether single
+ * quotes need to be doubled or not. Make sure to inform localizers about
+ * the rules, and tell them (for example, by using comments in resource
+ * bundle source files) which strings will be processed by MessageFormat.
+ * Note that localizers may need to use single quotes in translated
+ * strings where the original version doesn't have them.
+ * <br>Note also that the simplest way to avoid the problem is to
+ * use the real apostrophe (single quote) character U+2019 (') for
+ * human-readable text, and to use the ASCII apostrophe (U+0027 ' )
+ * only in program syntax, like quoting in MessageFormat.
+ * See the annotations for U+0027 Apostrophe in The Unicode Standard.</p>
+ * </dl>
+ * <P>
+ * The argument is a number from 0 to 9, which corresponds to the
+ * arguments presented in an array to be formatted.
+ * <P>
+ * It is ok to have unused arguments in the array.  With missing
+ * arguments or arguments that are not of the right class for the
+ * specified format, a failing UErrorCode result is set.
+ * <P>
+
+ * <P>
+ * [Note:] As we see above, the string produced by a choice Format in
+ * MessageFormat is treated specially; occurances of '{' are used to
+ * indicated subformats.
+ * <P>
+ * [Note:] Formats are numbered by order of variable in the string.
+ * This is [not] the same as the argument numbering!
+ * <pre>
+ * \code
+ *    For example: with "abc{2}def{3}ghi{0}...",
+ *
+ *    format0 affects the first variable {2}
+ *    format1 affects the second variable {3}
+ *    format2 affects the second variable {0}
+ * \endcode
+ * </pre>
+ * and so on.
+ */
+
+/**
+ * Format a message for a locale.
+ * This function may perform re-ordering of the arguments depending on the
+ * locale. For all numeric arguments, double is assumed unless the type is
+ * explicitly integer.  All choice format arguments must be of type double.
+ * @param locale The locale for which the message will be formatted
+ * @param pattern The pattern specifying the message's format
+ * @param patternLength The length of pattern
+ * @param result A pointer to a buffer to receive the formatted message.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @param ... A variable-length argument list containing the arguments specified
+ * in pattern.
+ * @return The total buffer size needed; if greater than resultLength, the
+ * output was truncated.
+ * @see u_parseMessage
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+u_formatMessage(const char  *locale,
+                 const UChar *pattern,
+                int32_t     patternLength,
+                UChar       *result,
+                int32_t     resultLength,
+                UErrorCode  *status,
+                ...);
+
+/**
+ * Format a message for a locale.
+ * This function may perform re-ordering of the arguments depending on the
+ * locale. For all numeric arguments, double is assumed unless the type is
+ * explicitly integer.  All choice format arguments must be of type double.
+ * @param locale The locale for which the message will be formatted
+ * @param pattern The pattern specifying the message's format
+ * @param patternLength The length of pattern
+ * @param result A pointer to a buffer to receive the formatted message.
+ * @param resultLength The maximum size of result.
+ * @param ap A variable-length argument list containing the arguments specified
+ * @param status A pointer to an UErrorCode to receive any errors
+ * in pattern.
+ * @return The total buffer size needed; if greater than resultLength, the
+ * output was truncated.
+ * @see u_parseMessage
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+u_vformatMessage(   const char  *locale,
+                    const UChar *pattern,
+                    int32_t     patternLength,
+                    UChar       *result,
+                    int32_t     resultLength,
+                    va_list     ap,
+                    UErrorCode  *status);
+
+/**
+ * Parse a message.
+ * For numeric arguments, this function will always use doubles.  Integer types
+ * should not be passed.
+ * This function is not able to parse all output from {@link #u_formatMessage }.
+ * @param locale The locale for which the message is formatted
+ * @param pattern The pattern specifying the message's format
+ * @param patternLength The length of pattern
+ * @param source The text to parse.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @param ... A variable-length argument list containing the arguments
+ * specified in pattern.
+ * @see u_formatMessage
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+u_parseMessage( const char   *locale,
+                const UChar  *pattern,
+                int32_t      patternLength,
+                const UChar  *source,
+                int32_t      sourceLength,
+                UErrorCode   *status,
+                ...);
+
+/**
+ * Parse a message.
+ * For numeric arguments, this function will always use doubles.  Integer types
+ * should not be passed.
+ * This function is not able to parse all output from {@link #u_formatMessage }.
+ * @param locale The locale for which the message is formatted
+ * @param pattern The pattern specifying the message's format
+ * @param patternLength The length of pattern
+ * @param source The text to parse.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param ap A variable-length argument list containing the arguments
+ * @param status A pointer to an UErrorCode to receive any errors
+ * specified in pattern.
+ * @see u_formatMessage
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+u_vparseMessage(const char  *locale,
+                const UChar *pattern,
+                int32_t     patternLength,
+                const UChar *source,
+                int32_t     sourceLength,
+                va_list     ap,
+                UErrorCode  *status);
+
+/**
+ * Format a message for a locale.
+ * This function may perform re-ordering of the arguments depending on the
+ * locale. For all numeric arguments, double is assumed unless the type is
+ * explicitly integer.  All choice format arguments must be of type double.
+ * @param locale The locale for which the message will be formatted
+ * @param pattern The pattern specifying the message's format
+ * @param patternLength The length of pattern
+ * @param result A pointer to a buffer to receive the formatted message.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @param ... A variable-length argument list containing the arguments specified
+ * in pattern.
+ * @param parseError  A pointer to UParseError to receive information about errors
+ *                     occurred during parsing.
+ * @return The total buffer size needed; if greater than resultLength, the
+ * output was truncated.
+ * @see u_parseMessage
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+u_formatMessageWithError(   const char    *locale,
+                            const UChar   *pattern,
+                            int32_t       patternLength,
+                            UChar         *result,
+                            int32_t       resultLength,
+                            UParseError   *parseError,
+                            UErrorCode    *status,
+                            ...);
+
+/**
+ * Format a message for a locale.
+ * This function may perform re-ordering of the arguments depending on the
+ * locale. For all numeric arguments, double is assumed unless the type is
+ * explicitly integer.  All choice format arguments must be of type double.
+ * @param locale The locale for which the message will be formatted
+ * @param pattern The pattern specifying the message's format
+ * @param patternLength The length of pattern
+ * @param result A pointer to a buffer to receive the formatted message.
+ * @param resultLength The maximum size of result.
+ * @param parseError  A pointer to UParseError to receive information about errors
+ *                    occurred during parsing.
+ * @param ap A variable-length argument list containing the arguments specified
+ * @param status A pointer to an UErrorCode to receive any errors
+ * in pattern.
+ * @return The total buffer size needed; if greater than resultLength, the
+ * output was truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+u_vformatMessageWithError(  const char   *locale,
+                            const UChar  *pattern,
+                            int32_t      patternLength,
+                            UChar        *result,
+                            int32_t      resultLength,
+                            UParseError* parseError,
+                            va_list      ap,
+                            UErrorCode   *status);
+
+/**
+ * Parse a message.
+ * For numeric arguments, this function will always use doubles.  Integer types
+ * should not be passed.
+ * This function is not able to parse all output from {@link #u_formatMessage }.
+ * @param locale The locale for which the message is formatted
+ * @param pattern The pattern specifying the message's format
+ * @param patternLength The length of pattern
+ * @param source The text to parse.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param parseError  A pointer to UParseError to receive information about errors
+ *                     occurred during parsing.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @param ... A variable-length argument list containing the arguments
+ * specified in pattern.
+ * @see u_formatMessage
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+u_parseMessageWithError(const char  *locale,
+                        const UChar *pattern,
+                        int32_t     patternLength,
+                        const UChar *source,
+                        int32_t     sourceLength,
+                        UParseError *parseError,
+                        UErrorCode  *status,
+                        ...);
+
+/**
+ * Parse a message.
+ * For numeric arguments, this function will always use doubles.  Integer types
+ * should not be passed.
+ * This function is not able to parse all output from {@link #u_formatMessage }.
+ * @param locale The locale for which the message is formatted
+ * @param pattern The pattern specifying the message's format
+ * @param patternLength The length of pattern
+ * @param source The text to parse.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param ap A variable-length argument list containing the arguments
+ * @param parseError  A pointer to UParseError to receive information about errors
+ *                     occurred during parsing.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * specified in pattern.
+ * @see u_formatMessage
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+u_vparseMessageWithError(const char  *locale,
+                         const UChar *pattern,
+                         int32_t     patternLength,
+                         const UChar *source,
+                         int32_t     sourceLength,
+                         va_list     ap,
+                         UParseError *parseError,
+                         UErrorCode* status);
+
+/*----------------------- New experimental API --------------------------- */
+/** 
+ * The message format object
+ * @stable ICU 2.0
+ */
+typedef void* UMessageFormat;
+
+
+/**
+ * Open a message formatter with given pattern and for the given locale.
+ * @param pattern       A pattern specifying the format to use.
+ * @param patternLength Length of the pattern to use
+ * @param locale        The locale for which the messages are formatted.
+ * @param parseError    A pointer to UParseError struct to receive any errors 
+ *                      occured during parsing. Can be NULL.
+ * @param status        A pointer to an UErrorCode to receive any errors.
+ * @return              A pointer to a UMessageFormat to use for formatting 
+ *                      messages, or 0 if an error occurred. 
+ * @stable ICU 2.0
+ */
+U_STABLE UMessageFormat* U_EXPORT2 
+umsg_open(  const UChar     *pattern,
+            int32_t         patternLength,
+            const  char     *locale,
+            UParseError     *parseError,
+            UErrorCode      *status);
+
+/**
+ * Close a UMessageFormat.
+ * Once closed, a UMessageFormat may no longer be used.
+ * @param format The formatter to close.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+umsg_close(UMessageFormat* format);
+
+/**
+ * Open a copy of a UMessageFormat.
+ * This function performs a deep copy.
+ * @param fmt The formatter to copy
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UDateFormat identical to fmt.
+ * @stable ICU 2.0
+ */
+U_STABLE UMessageFormat U_EXPORT2 
+umsg_clone(const UMessageFormat *fmt,
+           UErrorCode *status);
+
+/**
+ * Sets the locale. This locale is used for fetching default number or date
+ * format information.
+ * @param fmt The formatter to set
+ * @param locale The locale the formatter should use.
+ * @stable ICU 2.0
+ */
+U_STABLE void  U_EXPORT2 
+umsg_setLocale(UMessageFormat *fmt,
+               const char* locale);
+
+/**
+ * Gets the locale. This locale is used for fetching default number or date
+ * format information.
+ * @param fmt The formatter to querry
+ * @return the locale.
+ * @stable ICU 2.0
+ */
+U_STABLE const char*  U_EXPORT2 
+umsg_getLocale(const UMessageFormat *fmt);
+
+/**
+ * Sets the pattern.
+ * @param fmt           The formatter to use
+ * @param pattern       The pattern to be applied.
+ * @param patternLength Length of the pattern to use
+ * @param parseError    Struct to receive information on position 
+ *                      of error if an error is encountered.Can be NULL.
+ * @param status        Output param set to success/failure code on
+ *                      exit. If the pattern is invalid, this will be
+ *                      set to a failure result.
+ * @stable ICU 2.0
+ */
+U_STABLE void  U_EXPORT2 
+umsg_applyPattern( UMessageFormat *fmt,
+                   const UChar* pattern,
+                   int32_t patternLength,
+                   UParseError* parseError,
+                   UErrorCode* status);
+
+/**
+ * Gets the pattern.
+ * @param fmt          The formatter to use
+ * @param result       A pointer to a buffer to receive the pattern.
+ * @param resultLength The maximum size of result.
+ * @param status       Output param set to success/failure code on
+ *                     exit. If the pattern is invalid, this will be
+ *                     set to a failure result.  
+ * @return the pattern of the format
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t  U_EXPORT2 
+umsg_toPattern(const UMessageFormat *fmt,
+               UChar* result, 
+               int32_t resultLength,
+               UErrorCode* status);
+
+/**
+ * Format a message for a locale.
+ * This function may perform re-ordering of the arguments depending on the
+ * locale. For all numeric arguments, double is assumed unless the type is
+ * explicitly integer.  All choice format arguments must be of type double.
+ * @param fmt           The formatter to use
+ * @param result        A pointer to a buffer to receive the formatted message.
+ * @param resultLength  The maximum size of result.
+ * @param status        A pointer to an UErrorCode to receive any errors
+ * @param ...           A variable-length argument list containing the arguments 
+ *                      specified in pattern.
+ * @return              The total buffer size needed; if greater than resultLength, 
+ *                      the output was truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+umsg_format(    const UMessageFormat *fmt,
+                UChar          *result,
+                int32_t        resultLength,
+                UErrorCode     *status,
+                ...);
+
+/**
+ * Format a message for a locale.
+ * This function may perform re-ordering of the arguments depending on the
+ * locale. For all numeric arguments, double is assumed unless the type is
+ * explicitly integer.  All choice format arguments must be of type double.
+ * @param fmt          The formatter to use 
+ * @param result       A pointer to a buffer to receive the formatted message.
+ * @param resultLength The maximum size of result.
+ * @param ap           A variable-length argument list containing the arguments 
+ * @param status       A pointer to an UErrorCode to receive any errors
+ *                     specified in pattern.
+ * @return             The total buffer size needed; if greater than resultLength, 
+ *                     the output was truncated.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+umsg_vformat(   const UMessageFormat *fmt,
+                UChar          *result,
+                int32_t        resultLength,
+                va_list        ap,
+                UErrorCode     *status);
+
+/**
+ * Parse a message.
+ * For numeric arguments, this function will always use doubles.  Integer types
+ * should not be passed.
+ * This function is not able to parse all output from {@link #umsg_format }.
+ * @param fmt           The formatter to use 
+ * @param source        The text to parse.
+ * @param sourceLength  The length of source, or -1 if null-terminated.
+ * @param count         Output param to receive number of elements returned.
+ * @param status        A pointer to an UErrorCode to receive any errors
+ * @param ...           A variable-length argument list containing the arguments
+ *                      specified in pattern.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+umsg_parse( const UMessageFormat *fmt,
+            const UChar    *source,
+            int32_t        sourceLength,
+            int32_t        *count,
+            UErrorCode     *status,
+            ...);
+
+/**
+ * Parse a message.
+ * For numeric arguments, this function will always use doubles.  Integer types
+ * should not be passed.
+ * This function is not able to parse all output from {@link #umsg_format }.
+ * @param fmt           The formatter to use 
+ * @param source        The text to parse.
+ * @param sourceLength  The length of source, or -1 if null-terminated.
+ * @param count         Output param to receive number of elements returned.
+ * @param ap            A variable-length argument list containing the arguments
+ * @param status        A pointer to an UErrorCode to receive any errors
+ *                      specified in pattern.
+ * @see u_formatMessage
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+umsg_vparse(const UMessageFormat *fmt,
+            const UChar    *source,
+            int32_t        sourceLength,
+            int32_t        *count,
+            va_list        ap,
+            UErrorCode     *status);
+
+
+/**
+ * Convert an 'apostrophe-friendly' pattern into a standard
+ * pattern.  Standard patterns treat all apostrophes as
+ * quotes, which is problematic in some languages, e.g. 
+ * French, where apostrophe is commonly used.  This utility
+ * assumes that only an unpaired apostrophe immediately before
+ * a brace is a true quote.  Other unpaired apostrophes are paired,
+ * and the resulting standard pattern string is returned.
+ *
+ * <p><b>Note</b> it is not guaranteed that the returned pattern
+ * is indeed a valid pattern.  The only effect is to convert
+ * between patterns having different quoting semantics.
+ *
+ * @param pattern the 'apostrophe-friendly' patttern to convert
+ * @param patternLength the length of pattern, or -1 if unknown and pattern is null-terminated
+ * @param dest the buffer for the result, or NULL if preflight only
+ * @param destCapacity the length of the buffer, or 0 if preflighting
+ * @param ec the error code
+ * @return the length of the resulting text, not including trailing null
+ *        if buffer has room for the trailing null, it is provided, otherwise
+ *        not
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2 
+umsg_autoQuoteApostrophe(const UChar* pattern, 
+                         int32_t patternLength,
+                         UChar* dest,
+                         int32_t destCapacity,
+                         UErrorCode* ec);
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/unifilt.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/unifilt.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/unifilt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,127 +0,0 @@
-/*
-**********************************************************************
-* Copyright (C) 1999-2006, International Business Machines Corporation and others.
-* All Rights Reserved.
-**********************************************************************
-*   Date        Name        Description
-*   11/17/99    aliu        Creation.
-**********************************************************************
-*/
-#ifndef UNIFILT_H
-#define UNIFILT_H
-
-#include "unicode/unifunct.h"
-#include "unicode/unimatch.h"
-
-/**
- * \file 
- * \brief C++ API: Unicode Filter
- */
-
-U_NAMESPACE_BEGIN
-
-/**
- * U_ETHER is used to represent character values for positions outside
- * a range.  For example, transliterator uses this to represent
- * characters outside the range contextStart..contextLimit-1.  This
- * allows explicit matching by rules and UnicodeSets of text outside a
- * defined range.
- * @stable ICU 3.0
- */
-#define U_ETHER ((UChar)0xFFFF)
-
-/**
- *
- * <code>UnicodeFilter</code> defines a protocol for selecting a
- * subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
- * Currently, filters are used in conjunction with classes like {@link
- * Transliterator} to only process selected characters through a
- * transformation.
- *
- * <p>Note: UnicodeFilter currently stubs out two pure virtual methods
- * of its base class, UnicodeMatcher.  These methods are toPattern()
- * and matchesIndexValue().  This is done so that filter classes that
- * are not actually used as matchers -- specifically, those in the
- * UnicodeFilterLogic component, and those in tests -- can continue to
- * work without defining these methods.  As long as a filter is not
- * used in an RBT during real transliteration, these methods will not
- * be called.  However, this breaks the UnicodeMatcher base class
- * protocol, and it is not a correct solution.
- *
- * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
- * hierarchy and either redesign it, or simply remove the stubs in
- * UnicodeFilter and force subclasses to implement the full
- * UnicodeMatcher protocol.
- *
- * @see UnicodeFilterLogic
- * @stable ICU 2.0
- */
-class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
-
-public:
-    /**
-     * Destructor
-     * @stable ICU 2.0
-     */
-    virtual ~UnicodeFilter();
-
-    /**
-     * Returns <tt>true</tt> for characters that are in the selected
-     * subset.  In other words, if a character is <b>to be
-     * filtered</b>, then <tt>contains()</tt> returns
-     * <b><tt>false</tt></b>.
-     * @stable ICU 2.0
-     */
-    virtual UBool contains(UChar32 c) const = 0;
-
-    /**
-     * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer
-     * and return the pointer.
-     * @stable ICU 2.4
-     */
-    virtual UnicodeMatcher* toMatcher() const;
-
-    /**
-     * Implement UnicodeMatcher API.
-     * @stable ICU 2.4
-     */
-    virtual UMatchDegree matches(const Replaceable& text,
-                                 int32_t& offset,
-                                 int32_t limit,
-                                 UBool incremental);
-
-    /**
-     * UnicodeFunctor API.  Nothing to do.
-     * @stable ICU 2.4
-     */
-    virtual void setData(const TransliterationRuleData*);
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.2
-     */
-    virtual UClassID getDynamicClassID() const = 0;
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.2
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-protected:
-
-    /*
-     * Since this class has pure virtual functions,
-     * a constructor can't be used.
-     * @stable ICU 2.0
-     */
-/*    UnicodeFilter();*/
-};
-
-/*inline UnicodeFilter::UnicodeFilter() {}*/
-
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/unifilt.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/unifilt.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/unifilt.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/unifilt.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,127 @@
+/*
+**********************************************************************
+* Copyright (C) 1999-2006, International Business Machines Corporation and others.
+* All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef UNIFILT_H
+#define UNIFILT_H
+
+#include "unicode/unifunct.h"
+#include "unicode/unimatch.h"
+
+/**
+ * \file 
+ * \brief C++ API: Unicode Filter
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * U_ETHER is used to represent character values for positions outside
+ * a range.  For example, transliterator uses this to represent
+ * characters outside the range contextStart..contextLimit-1.  This
+ * allows explicit matching by rules and UnicodeSets of text outside a
+ * defined range.
+ * @stable ICU 3.0
+ */
+#define U_ETHER ((UChar)0xFFFF)
+
+/**
+ *
+ * <code>UnicodeFilter</code> defines a protocol for selecting a
+ * subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
+ * Currently, filters are used in conjunction with classes like {@link
+ * Transliterator} to only process selected characters through a
+ * transformation.
+ *
+ * <p>Note: UnicodeFilter currently stubs out two pure virtual methods
+ * of its base class, UnicodeMatcher.  These methods are toPattern()
+ * and matchesIndexValue().  This is done so that filter classes that
+ * are not actually used as matchers -- specifically, those in the
+ * UnicodeFilterLogic component, and those in tests -- can continue to
+ * work without defining these methods.  As long as a filter is not
+ * used in an RBT during real transliteration, these methods will not
+ * be called.  However, this breaks the UnicodeMatcher base class
+ * protocol, and it is not a correct solution.
+ *
+ * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
+ * hierarchy and either redesign it, or simply remove the stubs in
+ * UnicodeFilter and force subclasses to implement the full
+ * UnicodeMatcher protocol.
+ *
+ * @see UnicodeFilterLogic
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
+
+public:
+    /**
+     * Destructor
+     * @stable ICU 2.0
+     */
+    virtual ~UnicodeFilter();
+
+    /**
+     * Returns <tt>true</tt> for characters that are in the selected
+     * subset.  In other words, if a character is <b>to be
+     * filtered</b>, then <tt>contains()</tt> returns
+     * <b><tt>false</tt></b>.
+     * @stable ICU 2.0
+     */
+    virtual UBool contains(UChar32 c) const = 0;
+
+    /**
+     * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer
+     * and return the pointer.
+     * @stable ICU 2.4
+     */
+    virtual UnicodeMatcher* toMatcher() const;
+
+    /**
+     * Implement UnicodeMatcher API.
+     * @stable ICU 2.4
+     */
+    virtual UMatchDegree matches(const Replaceable& text,
+                                 int32_t& offset,
+                                 int32_t limit,
+                                 UBool incremental);
+
+    /**
+     * UnicodeFunctor API.  Nothing to do.
+     * @stable ICU 2.4
+     */
+    virtual void setData(const TransliterationRuleData*);
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const = 0;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.2
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+protected:
+
+    /*
+     * Since this class has pure virtual functions,
+     * a constructor can't be used.
+     * @stable ICU 2.0
+     */
+/*    UnicodeFilter();*/
+};
+
+/*inline UnicodeFilter::UnicodeFilter() {}*/
+
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/unifunct.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/unifunct.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/unifunct.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,125 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (c) 2002-2005, International Business Machines Corporation
-*   and others.  All Rights Reserved.
-**********************************************************************
-*   Date        Name        Description
-*   01/14/2002  aliu        Creation.
-**********************************************************************
-*/
-#ifndef UNIFUNCT_H
-#define UNIFUNCT_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-
-/**
- * \file 
- * \brief C++ API: Unicode Functor
- */
- 
-U_NAMESPACE_BEGIN
-
-class UnicodeMatcher;
-class UnicodeReplacer;
-class TransliterationRuleData;
-
-/**
- * <code>UnicodeFunctor</code> is an abstract base class for objects
- * that perform match and/or replace operations on Unicode strings.
- * @author Alan Liu
- * @stable ICU 2.4
- */
-class U_COMMON_API UnicodeFunctor : public UObject {
-
-public:
-
-    /**
-     * Destructor
-     * @stable ICU 2.4
-     */
-    virtual ~UnicodeFunctor();
-
-    /**
-     * Return a copy of this object.  All UnicodeFunctor objects
-     * have to support cloning in order to allow classes using
-     * UnicodeFunctor to implement cloning.
-     * @stable ICU 2.4
-     */
-    virtual UnicodeFunctor* clone() const = 0;
-
-    /**
-     * Cast 'this' to a UnicodeMatcher* pointer and return the
-     * pointer, or null if this is not a UnicodeMatcher*.  Subclasses
-     * that mix in UnicodeMatcher as a base class must override this.
-     * This protocol is required because a pointer to a UnicodeFunctor
-     * cannot be cast to a pointer to a UnicodeMatcher, since
-     * UnicodeMatcher is a mixin that does not derive from
-     * UnicodeFunctor.
-     * @stable ICU 2.4
-     */
-    virtual UnicodeMatcher* toMatcher() const;
-
-    /**
-     * Cast 'this' to a UnicodeReplacer* pointer and return the
-     * pointer, or null if this is not a UnicodeReplacer*.  Subclasses
-     * that mix in UnicodeReplacer as a base class must override this.
-     * This protocol is required because a pointer to a UnicodeFunctor
-     * cannot be cast to a pointer to a UnicodeReplacer, since
-     * UnicodeReplacer is a mixin that does not derive from
-     * UnicodeFunctor.
-     * @stable ICU 2.4
-     */
-    virtual UnicodeReplacer* toReplacer() const;
-
-    /**
-     * Return the class ID for this class.  This is useful only for
-     * comparing to a return value from getDynamicClassID().
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID <b>polymorphically</b>.  This method
-     * is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI.  Polymorphic operator==() and
-     * clone() methods call this method.
-     *
-     * <p>Concrete subclasses of UnicodeFunctor should use the macro
-     *    UOBJECT_DEFINE_RTTI_IMPLEMENTATION from uobject.h to
-     *    provide definitios getStaticClassID and getDynamicClassID.
-     *
-     * @return The class ID for this object. All objects of a given
-     * class have the same class ID.  Objects of other classes have
-     * different class IDs.
-     * @stable ICU 2.4
-     */
-    virtual UClassID getDynamicClassID(void) const = 0;
-
-    /**
-     * Set the data object associated with this functor.  The data
-     * object provides context for functor-to-standin mapping.  This
-     * method is required when assigning a functor to a different data
-     * object.  This function MAY GO AWAY later if the architecture is
-     * changed to pass data object pointers through the API.
-     * @internal ICU 2.1
-     */
-    virtual void setData(const TransliterationRuleData*) = 0;
-
-protected:
-
-    /**
-     * Since this class has pure virtual functions,
-     * a constructor can't be used.
-     * @stable ICU 2.0
-     */
-    /*UnicodeFunctor();*/
-
-};
-
-/*inline UnicodeFunctor::UnicodeFunctor() {}*/
-
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/unifunct.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/unifunct.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/unifunct.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/unifunct.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,125 @@
+/*
+**********************************************************************
+*   Copyright (c) 2002-2005, International Business Machines Corporation
+*   and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   01/14/2002  aliu        Creation.
+**********************************************************************
+*/
+#ifndef UNIFUNCT_H
+#define UNIFUNCT_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file 
+ * \brief C++ API: Unicode Functor
+ */
+ 
+U_NAMESPACE_BEGIN
+
+class UnicodeMatcher;
+class UnicodeReplacer;
+class TransliterationRuleData;
+
+/**
+ * <code>UnicodeFunctor</code> is an abstract base class for objects
+ * that perform match and/or replace operations on Unicode strings.
+ * @author Alan Liu
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UnicodeFunctor : public UObject {
+
+public:
+
+    /**
+     * Destructor
+     * @stable ICU 2.4
+     */
+    virtual ~UnicodeFunctor();
+
+    /**
+     * Return a copy of this object.  All UnicodeFunctor objects
+     * have to support cloning in order to allow classes using
+     * UnicodeFunctor to implement cloning.
+     * @stable ICU 2.4
+     */
+    virtual UnicodeFunctor* clone() const = 0;
+
+    /**
+     * Cast 'this' to a UnicodeMatcher* pointer and return the
+     * pointer, or null if this is not a UnicodeMatcher*.  Subclasses
+     * that mix in UnicodeMatcher as a base class must override this.
+     * This protocol is required because a pointer to a UnicodeFunctor
+     * cannot be cast to a pointer to a UnicodeMatcher, since
+     * UnicodeMatcher is a mixin that does not derive from
+     * UnicodeFunctor.
+     * @stable ICU 2.4
+     */
+    virtual UnicodeMatcher* toMatcher() const;
+
+    /**
+     * Cast 'this' to a UnicodeReplacer* pointer and return the
+     * pointer, or null if this is not a UnicodeReplacer*.  Subclasses
+     * that mix in UnicodeReplacer as a base class must override this.
+     * This protocol is required because a pointer to a UnicodeFunctor
+     * cannot be cast to a pointer to a UnicodeReplacer, since
+     * UnicodeReplacer is a mixin that does not derive from
+     * UnicodeFunctor.
+     * @stable ICU 2.4
+     */
+    virtual UnicodeReplacer* toReplacer() const;
+
+    /**
+     * Return the class ID for this class.  This is useful only for
+     * comparing to a return value from getDynamicClassID().
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID <b>polymorphically</b>.  This method
+     * is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI.  Polymorphic operator==() and
+     * clone() methods call this method.
+     *
+     * <p>Concrete subclasses of UnicodeFunctor should use the macro
+     *    UOBJECT_DEFINE_RTTI_IMPLEMENTATION from uobject.h to
+     *    provide definitios getStaticClassID and getDynamicClassID.
+     *
+     * @return The class ID for this object. All objects of a given
+     * class have the same class ID.  Objects of other classes have
+     * different class IDs.
+     * @stable ICU 2.4
+     */
+    virtual UClassID getDynamicClassID(void) const = 0;
+
+    /**
+     * Set the data object associated with this functor.  The data
+     * object provides context for functor-to-standin mapping.  This
+     * method is required when assigning a functor to a different data
+     * object.  This function MAY GO AWAY later if the architecture is
+     * changed to pass data object pointers through the API.
+     * @internal ICU 2.1
+     */
+    virtual void setData(const TransliterationRuleData*) = 0;
+
+protected:
+
+    /**
+     * Since this class has pure virtual functions,
+     * a constructor can't be used.
+     * @stable ICU 2.0
+     */
+    /*UnicodeFunctor();*/
+
+};
+
+/*inline UnicodeFunctor::UnicodeFunctor() {}*/
+
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/unimatch.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/unimatch.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/unimatch.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,163 +0,0 @@
-/*
-* Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
-**********************************************************************
-*   Date        Name        Description
-*   07/18/01    aliu        Creation.
-**********************************************************************
-*/
-#ifndef UNIMATCH_H
-#define UNIMATCH_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: Unicode Matcher
- */
-
-
-U_NAMESPACE_BEGIN
-
-class Replaceable;
-class UnicodeString;
-class UnicodeSet;
-
-/**
- * Constants returned by <code>UnicodeMatcher::matches()</code>
- * indicating the degree of match.
- * @stable ICU 2.4
- */
-enum UMatchDegree {
-    /**
-     * Constant returned by <code>matches()</code> indicating a
-     * mismatch between the text and this matcher.  The text contains
-     * a character which does not match, or the text does not contain
-     * all desired characters for a non-incremental match.
-     * @stable ICU 2.4
-     */
-    U_MISMATCH,
-    
-    /**
-     * Constant returned by <code>matches()</code> indicating a
-     * partial match between the text and this matcher.  This value is
-     * only returned for incremental match operations.  All characters
-     * of the text match, but more characters are required for a
-     * complete match.  Alternatively, for variable-length matchers,
-     * all characters of the text match, and if more characters were
-     * supplied at limit, they might also match.
-     * @stable ICU 2.4
-     */
-    U_PARTIAL_MATCH,
-    
-    /**
-     * Constant returned by <code>matches()</code> indicating a
-     * complete match between the text and this matcher.  For an
-     * incremental variable-length match, this value is returned if
-     * the given text matches, and it is known that additional
-     * characters would not alter the extent of the match.
-     * @stable ICU 2.4
-     */
-    U_MATCH
-};
-
-/**
- * <code>UnicodeMatcher</code> defines a protocol for objects that can
- * match a range of characters in a Replaceable string.
- * @stable ICU 2.4
- */
-class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
-
-public:
-    /**
-     * Destructor.
-     * @stable ICU 2.4
-     */
-    virtual ~UnicodeMatcher();
-
-    /**
-     * Return a UMatchDegree value indicating the degree of match for
-     * the given text at the given offset.  Zero, one, or more
-     * characters may be matched.
-     *
-     * Matching in the forward direction is indicated by limit >
-     * offset.  Characters from offset forwards to limit-1 will be
-     * considered for matching.
-     * 
-     * Matching in the reverse direction is indicated by limit <
-     * offset.  Characters from offset backwards to limit+1 will be
-     * considered for matching.
-     *
-     * If limit == offset then the only match possible is a zero
-     * character match (which subclasses may implement if desired).
-     *
-     * As a side effect, advance the offset parameter to the limit of
-     * the matched substring.  In the forward direction, this will be
-     * the index of the last matched character plus one.  In the
-     * reverse direction, this will be the index of the last matched
-     * character minus one.
-     *
-     * <p>Note:  This method is not const because some classes may
-     * modify their state as the result of a match.
-     *
-     * @param text the text to be matched
-     * @param offset on input, the index into text at which to begin
-     * matching.  On output, the limit of the matched text.  The
-     * number of matched characters is the output value of offset
-     * minus the input value.  Offset should always point to the
-     * HIGH SURROGATE (leading code unit) of a pair of surrogates,
-     * both on entry and upon return.
-     * @param limit the limit index of text to be matched.  Greater
-     * than offset for a forward direction match, less than offset for
-     * a backward direction match.  The last character to be
-     * considered for matching will be text.charAt(limit-1) in the
-     * forward direction or text.charAt(limit+1) in the backward
-     * direction.
-     * @param incremental if TRUE, then assume further characters may
-     * be inserted at limit and check for partial matching.  Otherwise
-     * assume the text as given is complete.
-     * @return a match degree value indicating a full match, a partial
-     * match, or a mismatch.  If incremental is FALSE then
-     * U_PARTIAL_MATCH should never be returned.
-     * @stable ICU 2.4
-     */
-    virtual UMatchDegree matches(const Replaceable& text,
-                                 int32_t& offset,
-                                 int32_t limit,
-                                 UBool incremental) = 0;
-
-    /**
-     * Returns a string representation of this matcher.  If the result of
-     * calling this function is passed to the appropriate parser, it
-     * will produce another matcher that is equal to this one.
-     * @param result the string to receive the pattern.  Previous
-     * contents will be deleted.
-     * @param escapeUnprintable if TRUE then convert unprintable
-     * character to their hex escape representations, \\uxxxx or
-     * \\Uxxxxxxxx.  Unprintable characters are those other than
-     * U+000A, U+0020..U+007E.
-     * @stable ICU 2.4
-     */
-    virtual UnicodeString& toPattern(UnicodeString& result,
-                                     UBool escapeUnprintable = FALSE) const = 0;
-
-    /**
-     * Returns TRUE if this matcher will match a character c, where c
-     * & 0xFF == v, at offset, in the forward direction (with limit >
-     * offset).  This is used by <tt>RuleBasedTransliterator</tt> for
-     * indexing.
-     * @stable ICU 2.4
-     */
-    virtual UBool matchesIndexValue(uint8_t v) const = 0;
-
-    /**
-     * Union the set of all characters that may be matched by this object
-     * into the given set.
-     * @param toUnionTo the set into which to union the source characters
-     * @stable ICU 2.4
-     */
-    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
-};
-
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/unimatch.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/unimatch.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/unimatch.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/unimatch.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,163 @@
+/*
+* Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   07/18/01    aliu        Creation.
+**********************************************************************
+*/
+#ifndef UNIMATCH_H
+#define UNIMATCH_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Unicode Matcher
+ */
+
+
+U_NAMESPACE_BEGIN
+
+class Replaceable;
+class UnicodeString;
+class UnicodeSet;
+
+/**
+ * Constants returned by <code>UnicodeMatcher::matches()</code>
+ * indicating the degree of match.
+ * @stable ICU 2.4
+ */
+enum UMatchDegree {
+    /**
+     * Constant returned by <code>matches()</code> indicating a
+     * mismatch between the text and this matcher.  The text contains
+     * a character which does not match, or the text does not contain
+     * all desired characters for a non-incremental match.
+     * @stable ICU 2.4
+     */
+    U_MISMATCH,
+    
+    /**
+     * Constant returned by <code>matches()</code> indicating a
+     * partial match between the text and this matcher.  This value is
+     * only returned for incremental match operations.  All characters
+     * of the text match, but more characters are required for a
+     * complete match.  Alternatively, for variable-length matchers,
+     * all characters of the text match, and if more characters were
+     * supplied at limit, they might also match.
+     * @stable ICU 2.4
+     */
+    U_PARTIAL_MATCH,
+    
+    /**
+     * Constant returned by <code>matches()</code> indicating a
+     * complete match between the text and this matcher.  For an
+     * incremental variable-length match, this value is returned if
+     * the given text matches, and it is known that additional
+     * characters would not alter the extent of the match.
+     * @stable ICU 2.4
+     */
+    U_MATCH
+};
+
+/**
+ * <code>UnicodeMatcher</code> defines a protocol for objects that can
+ * match a range of characters in a Replaceable string.
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
+
+public:
+    /**
+     * Destructor.
+     * @stable ICU 2.4
+     */
+    virtual ~UnicodeMatcher();
+
+    /**
+     * Return a UMatchDegree value indicating the degree of match for
+     * the given text at the given offset.  Zero, one, or more
+     * characters may be matched.
+     *
+     * Matching in the forward direction is indicated by limit >
+     * offset.  Characters from offset forwards to limit-1 will be
+     * considered for matching.
+     * 
+     * Matching in the reverse direction is indicated by limit <
+     * offset.  Characters from offset backwards to limit+1 will be
+     * considered for matching.
+     *
+     * If limit == offset then the only match possible is a zero
+     * character match (which subclasses may implement if desired).
+     *
+     * As a side effect, advance the offset parameter to the limit of
+     * the matched substring.  In the forward direction, this will be
+     * the index of the last matched character plus one.  In the
+     * reverse direction, this will be the index of the last matched
+     * character minus one.
+     *
+     * <p>Note:  This method is not const because some classes may
+     * modify their state as the result of a match.
+     *
+     * @param text the text to be matched
+     * @param offset on input, the index into text at which to begin
+     * matching.  On output, the limit of the matched text.  The
+     * number of matched characters is the output value of offset
+     * minus the input value.  Offset should always point to the
+     * HIGH SURROGATE (leading code unit) of a pair of surrogates,
+     * both on entry and upon return.
+     * @param limit the limit index of text to be matched.  Greater
+     * than offset for a forward direction match, less than offset for
+     * a backward direction match.  The last character to be
+     * considered for matching will be text.charAt(limit-1) in the
+     * forward direction or text.charAt(limit+1) in the backward
+     * direction.
+     * @param incremental if TRUE, then assume further characters may
+     * be inserted at limit and check for partial matching.  Otherwise
+     * assume the text as given is complete.
+     * @return a match degree value indicating a full match, a partial
+     * match, or a mismatch.  If incremental is FALSE then
+     * U_PARTIAL_MATCH should never be returned.
+     * @stable ICU 2.4
+     */
+    virtual UMatchDegree matches(const Replaceable& text,
+                                 int32_t& offset,
+                                 int32_t limit,
+                                 UBool incremental) = 0;
+
+    /**
+     * Returns a string representation of this matcher.  If the result of
+     * calling this function is passed to the appropriate parser, it
+     * will produce another matcher that is equal to this one.
+     * @param result the string to receive the pattern.  Previous
+     * contents will be deleted.
+     * @param escapeUnprintable if TRUE then convert unprintable
+     * character to their hex escape representations, \\uxxxx or
+     * \\Uxxxxxxxx.  Unprintable characters are those other than
+     * U+000A, U+0020..U+007E.
+     * @stable ICU 2.4
+     */
+    virtual UnicodeString& toPattern(UnicodeString& result,
+                                     UBool escapeUnprintable = FALSE) const = 0;
+
+    /**
+     * Returns TRUE if this matcher will match a character c, where c
+     * & 0xFF == v, at offset, in the forward direction (with limit >
+     * offset).  This is used by <tt>RuleBasedTransliterator</tt> for
+     * indexing.
+     * @stable ICU 2.4
+     */
+    virtual UBool matchesIndexValue(uint8_t v) const = 0;
+
+    /**
+     * Union the set of all characters that may be matched by this object
+     * into the given set.
+     * @param toUnionTo the set into which to union the source characters
+     * @stable ICU 2.4
+     */
+    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
+};
+
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/unirepl.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/unirepl.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/unirepl.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,97 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (c) 2002-2005, International Business Machines Corporation
-*   and others.  All Rights Reserved.
-**********************************************************************
-*   Date        Name        Description
-*   01/14/2002  aliu        Creation.
-**********************************************************************
-*/
-#ifndef UNIREPL_H
-#define UNIREPL_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: UnicodeReplacer
- */
-
-U_NAMESPACE_BEGIN
-
-class Replaceable;
-class UnicodeString;
-class UnicodeSet;
-
-/**
- * <code>UnicodeReplacer</code> defines a protocol for objects that
- * replace a range of characters in a Replaceable string with output
- * text.  The replacement is done via the Replaceable API so as to
- * preserve out-of-band data.
- *
- * <p>This is a mixin class.
- * @author Alan Liu
- * @stable ICU 2.4
- */
-class U_I18N_API UnicodeReplacer /* not : public UObject because this is an interface/mixin class */ {
-
- public:
-
-    /**
-     * Destructor.
-     * @stable ICU 2.4
-     */
-    virtual ~UnicodeReplacer();
-
-    /**
-     * Replace characters in 'text' from 'start' to 'limit' with the
-     * output text of this object.  Update the 'cursor' parameter to
-     * give the cursor position and return the length of the
-     * replacement text.
-     *
-     * @param text the text to be matched
-     * @param start inclusive start index of text to be replaced
-     * @param limit exclusive end index of text to be replaced;
-     * must be greater than or equal to start
-     * @param cursor output parameter for the cursor position.
-     * Not all replacer objects will update this, but in a complete
-     * tree of replacer objects, representing the entire output side
-     * of a transliteration rule, at least one must update it.
-     * @return the number of 16-bit code units in the text replacing
-     * the characters at offsets start..(limit-1) in text
-     * @stable ICU 2.4
-     */
-    virtual int32_t replace(Replaceable& text,
-                            int32_t start,
-                            int32_t limit,
-                            int32_t& cursor) = 0;
-
-    /**
-     * Returns a string representation of this replacer.  If the
-     * result of calling this function is passed to the appropriate
-     * parser, typically TransliteratorParser, it will produce another
-     * replacer that is equal to this one.
-     * @param result the string to receive the pattern.  Previous
-     * contents will be deleted.
-     * @param escapeUnprintable if TRUE then convert unprintable
-     * character to their hex escape representations, \\uxxxx or
-     * \\Uxxxxxxxx.  Unprintable characters are defined by
-     * Utility.isUnprintable().
-     * @return a reference to 'result'.
-     * @stable ICU 2.4
-     */
-    virtual UnicodeString& toReplacerPattern(UnicodeString& result,
-                                             UBool escapeUnprintable) const = 0;
-
-    /**
-     * Union the set of all characters that may output by this object
-     * into the given set.
-     * @param toUnionTo the set into which to union the output characters
-     * @stable ICU 2.4
-     */
-    virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const = 0;
-};
-
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/unirepl.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/unirepl.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/unirepl.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/unirepl.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,97 @@
+/*
+**********************************************************************
+*   Copyright (c) 2002-2005, International Business Machines Corporation
+*   and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   01/14/2002  aliu        Creation.
+**********************************************************************
+*/
+#ifndef UNIREPL_H
+#define UNIREPL_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: UnicodeReplacer
+ */
+
+U_NAMESPACE_BEGIN
+
+class Replaceable;
+class UnicodeString;
+class UnicodeSet;
+
+/**
+ * <code>UnicodeReplacer</code> defines a protocol for objects that
+ * replace a range of characters in a Replaceable string with output
+ * text.  The replacement is done via the Replaceable API so as to
+ * preserve out-of-band data.
+ *
+ * <p>This is a mixin class.
+ * @author Alan Liu
+ * @stable ICU 2.4
+ */
+class U_I18N_API UnicodeReplacer /* not : public UObject because this is an interface/mixin class */ {
+
+ public:
+
+    /**
+     * Destructor.
+     * @stable ICU 2.4
+     */
+    virtual ~UnicodeReplacer();
+
+    /**
+     * Replace characters in 'text' from 'start' to 'limit' with the
+     * output text of this object.  Update the 'cursor' parameter to
+     * give the cursor position and return the length of the
+     * replacement text.
+     *
+     * @param text the text to be matched
+     * @param start inclusive start index of text to be replaced
+     * @param limit exclusive end index of text to be replaced;
+     * must be greater than or equal to start
+     * @param cursor output parameter for the cursor position.
+     * Not all replacer objects will update this, but in a complete
+     * tree of replacer objects, representing the entire output side
+     * of a transliteration rule, at least one must update it.
+     * @return the number of 16-bit code units in the text replacing
+     * the characters at offsets start..(limit-1) in text
+     * @stable ICU 2.4
+     */
+    virtual int32_t replace(Replaceable& text,
+                            int32_t start,
+                            int32_t limit,
+                            int32_t& cursor) = 0;
+
+    /**
+     * Returns a string representation of this replacer.  If the
+     * result of calling this function is passed to the appropriate
+     * parser, typically TransliteratorParser, it will produce another
+     * replacer that is equal to this one.
+     * @param result the string to receive the pattern.  Previous
+     * contents will be deleted.
+     * @param escapeUnprintable if TRUE then convert unprintable
+     * character to their hex escape representations, \\uxxxx or
+     * \\Uxxxxxxxx.  Unprintable characters are defined by
+     * Utility.isUnprintable().
+     * @return a reference to 'result'.
+     * @stable ICU 2.4
+     */
+    virtual UnicodeString& toReplacerPattern(UnicodeString& result,
+                                             UBool escapeUnprintable) const = 0;
+
+    /**
+     * Union the set of all characters that may output by this object
+     * into the given set.
+     * @param toUnionTo the set into which to union the output characters
+     * @stable ICU 2.4
+     */
+    virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const = 0;
+};
+
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/uniset.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uniset.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uniset.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1566 +0,0 @@
-/*
-***************************************************************************
-* Copyright (C) 1999-2008, International Business Machines Corporation
-* and others. All Rights Reserved.
-***************************************************************************
-*   Date        Name        Description
-*   10/20/99    alan        Creation.
-***************************************************************************
-*/
-
-#ifndef UNICODESET_H
-#define UNICODESET_H
-
-#include "unicode/unifilt.h"
-#include "unicode/unistr.h"
-#include "unicode/uset.h"
-
-/**
- * \file
- * \brief C++ API: Unicode Set
- */
-
-U_NAMESPACE_BEGIN
-
-class BMPSet;
-class ParsePosition;
-class SymbolTable;
-class UnicodeSetStringSpan;
-class UVector;
-class RuleCharacterIterator;
-
-/**
- * A mutable set of Unicode characters and multicharacter strings.  Objects of this class
- * represent <em>character classes</em> used in regular expressions.
- * A character specifies a subset of Unicode code points.  Legal
- * code points are U+0000 to U+10FFFF, inclusive.
- *
- * <p>The UnicodeSet class is not designed to be subclassed.
- *
- * <p><code>UnicodeSet</code> supports two APIs. The first is the
- * <em>operand</em> API that allows the caller to modify the value of
- * a <code>UnicodeSet</code> object. It conforms to Java 2's
- * <code>java.util.Set</code> interface, although
- * <code>UnicodeSet</code> does not actually implement that
- * interface. All methods of <code>Set</code> are supported, with the
- * modification that they take a character range or single character
- * instead of an <code>Object</code>, and they take a
- * <code>UnicodeSet</code> instead of a <code>Collection</code>.  The
- * operand API may be thought of in terms of boolean logic: a boolean
- * OR is implemented by <code>add</code>, a boolean AND is implemented
- * by <code>retain</code>, a boolean XOR is implemented by
- * <code>complement</code> taking an argument, and a boolean NOT is
- * implemented by <code>complement</code> with no argument.  In terms
- * of traditional set theory function names, <code>add</code> is a
- * union, <code>retain</code> is an intersection, <code>remove</code>
- * is an asymmetric difference, and <code>complement</code> with no
- * argument is a set complement with respect to the superset range
- * <code>MIN_VALUE-MAX_VALUE</code>
- *
- * <p>The second API is the
- * <code>applyPattern()</code>/<code>toPattern()</code> API from the
- * <code>java.text.Format</code>-derived classes.  Unlike the
- * methods that add characters, add categories, and control the logic
- * of the set, the method <code>applyPattern()</code> sets all
- * attributes of a <code>UnicodeSet</code> at once, based on a
- * string pattern.
- *
- * <p><b>Pattern syntax</b></p>
- *
- * Patterns are accepted by the constructors and the
- * <code>applyPattern()</code> methods and returned by the
- * <code>toPattern()</code> method.  These patterns follow a syntax
- * similar to that employed by version 8 regular expression character
- * classes.  Here are some simple examples:
- *
- * \htmlonly<blockquote>\endhtmlonly
- *   <table>
- *     <tr align="top">
- *       <td nowrap valign="top" align="left"><code>[]</code></td>
- *       <td valign="top">No characters</td>
- *     </tr><tr align="top">
- *       <td nowrap valign="top" align="left"><code>[a]</code></td>
- *       <td valign="top">The character 'a'</td>
- *     </tr><tr align="top">
- *       <td nowrap valign="top" align="left"><code>[ae]</code></td>
- *       <td valign="top">The characters 'a' and 'e'</td>
- *     </tr>
- *     <tr>
- *       <td nowrap valign="top" align="left"><code>[a-e]</code></td>
- *       <td valign="top">The characters 'a' through 'e' inclusive, in Unicode code
- *       point order</td>
- *     </tr>
- *     <tr>
- *       <td nowrap valign="top" align="left"><code>[\\u4E01]</code></td>
- *       <td valign="top">The character U+4E01</td>
- *     </tr>
- *     <tr>
- *       <td nowrap valign="top" align="left"><code>[a{ab}{ac}]</code></td>
- *       <td valign="top">The character 'a' and the multicharacter strings &quot;ab&quot; and
- *       &quot;ac&quot;</td>
- *     </tr>
- *     <tr>
- *       <td nowrap valign="top" align="left"><code>[\\p{Lu}]</code></td>
- *       <td valign="top">All characters in the general category Uppercase Letter</td>
- *     </tr>
- *   </table>
- * \htmlonly</blockquote>\endhtmlonly
- *
- * Any character may be preceded by a backslash in order to remove any special
- * meaning.  White space characters, as defined by UCharacter.isWhitespace(), are
- * ignored, unless they are escaped.
- *
- * <p>Property patterns specify a set of characters having a certain
- * property as defined by the Unicode standard.  Both the POSIX-like
- * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized.  For a
- * complete list of supported property patterns, see the User's Guide
- * for UnicodeSet at
- * <a href="http://icu-project.org/userguide/unicodeSet.html">
- * http://icu-project.org/userguide/unicodeSet.html</a>.
- * Actual determination of property data is defined by the underlying
- * Unicode database as implemented by UCharacter.
- *
- * <p>Patterns specify individual characters, ranges of characters, and
- * Unicode property sets.  When elements are concatenated, they
- * specify their union.  To complement a set, place a '^' immediately
- * after the opening '['.  Property patterns are inverted by modifying
- * their delimiters; "[:^foo]" and "\\P{foo}".  In any other location,
- * '^' has no special meaning.
- *
- * <p>Ranges are indicated by placing two a '-' between two
- * characters, as in "a-z".  This specifies the range of all
- * characters from the left to the right, in Unicode order.  If the
- * left character is greater than or equal to the
- * right character it is a syntax error.  If a '-' occurs as the first
- * character after the opening '[' or '[^', or if it occurs as the
- * last character before the closing ']', then it is taken as a
- * literal.  Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same
- * set of three characters, 'a', 'b', and '-'.
- *
- * <p>Sets may be intersected using the '&' operator or the asymmetric
- * set difference may be taken using the '-' operator, for example,
- * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters
- * with values less than 4096.  Operators ('&' and '|') have equal
- * precedence and bind left-to-right.  Thus
- * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to
- * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]".  This only really matters for
- * difference; intersection is commutative.
- *
- * <table>
- * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
- * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
- * through 'z' and all letters in between, in Unicode order
- * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
- * all characters but 'a' through 'z',
- * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
- * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
- * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
- * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
- * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
- * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
- * <td>The asymmetric difference of sets specified by <em>pat1</em> and
- * <em>pat2</em>
- * <tr valign=top><td nowrap><code>[:Lu:] or \\p{Lu}</code>
- * <td>The set of characters having the specified
- * Unicode property; in
- * this case, Unicode uppercase letters
- * <tr valign=top><td nowrap><code>[:^Lu:] or \\P{Lu}</code>
- * <td>The set of characters <em>not</em> having the given
- * Unicode property
- * </table>
- *
- * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
- *
- * <p><b>Formal syntax</b></p>
- *
- * \htmlonly<blockquote>\endhtmlonly
- *   <table>
- *     <tr align="top">
- *       <td nowrap valign="top" align="right"><code>pattern :=&nbsp; </code></td>
- *       <td valign="top"><code>('[' '^'? item* ']') |
- *       property</code></td>
- *     </tr>
- *     <tr align="top">
- *       <td nowrap valign="top" align="right"><code>item :=&nbsp; </code></td>
- *       <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
- *       </code></td>
- *     </tr>
- *     <tr align="top">
- *       <td nowrap valign="top" align="right"><code>pattern-expr :=&nbsp; </code></td>
- *       <td valign="top"><code>pattern | pattern-expr pattern |
- *       pattern-expr op pattern<br>
- *       </code></td>
- *     </tr>
- *     <tr align="top">
- *       <td nowrap valign="top" align="right"><code>op :=&nbsp; </code></td>
- *       <td valign="top"><code>'&amp;' | '-'<br>
- *       </code></td>
- *     </tr>
- *     <tr align="top">
- *       <td nowrap valign="top" align="right"><code>special :=&nbsp; </code></td>
- *       <td valign="top"><code>'[' | ']' | '-'<br>
- *       </code></td>
- *     </tr>
- *     <tr align="top">
- *       <td nowrap valign="top" align="right"><code>char :=&nbsp; </code></td>
- *       <td valign="top"><em>any character that is not</em><code> special<br>
- *       | ('\' </code><em>any character</em><code>)<br>
- *       | ('\\u' hex hex hex hex)<br>
- *       </code></td>
- *     </tr>
- *     <tr align="top">
- *       <td nowrap valign="top" align="right"><code>hex :=&nbsp; </code></td>
- *       <td valign="top"><em>any character for which
- *       </em><code>Character.digit(c, 16)</code><em>
- *       returns a non-negative result</em></td>
- *     </tr>
- *     <tr>
- *       <td nowrap valign="top" align="right"><code>property :=&nbsp; </code></td>
- *       <td valign="top"><em>a Unicode property set pattern</em></td>
- *     </tr>
- *   </table>
- *   <br>
- *   <table border="1">
- *     <tr>
- *       <td>Legend: <table>
- *         <tr>
- *           <td nowrap valign="top"><code>a := b</code></td>
- *           <td width="20" valign="top">&nbsp; </td>
- *           <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
- *         </tr>
- *         <tr>
- *           <td nowrap valign="top"><code>a?</code></td>
- *           <td valign="top"></td>
- *           <td valign="top">zero or one instance of <code>a</code><br>
- *           </td>
- *         </tr>
- *         <tr>
- *           <td nowrap valign="top"><code>a*</code></td>
- *           <td valign="top"></td>
- *           <td valign="top">one or more instances of <code>a</code><br>
- *           </td>
- *         </tr>
- *         <tr>
- *           <td nowrap valign="top"><code>a | b</code></td>
- *           <td valign="top"></td>
- *           <td valign="top">either <code>a</code> or <code>b</code><br>
- *           </td>
- *         </tr>
- *         <tr>
- *           <td nowrap valign="top"><code>'a'</code></td>
- *           <td valign="top"></td>
- *           <td valign="top">the literal string between the quotes </td>
- *         </tr>
- *       </table>
- *       </td>
- *     </tr>
- *   </table>
- * \htmlonly</blockquote>\endhtmlonly
- * 
- * <p>Note:
- *  - Most UnicodeSet methods do not take a UErrorCode parameter because
- *   there are usually very few opportunities for failure other than a shortage
- *   of memory, error codes in low-level C++ string methods would be inconvenient,
- *   and the error code as the last parameter (ICU convention) would prevent
- *   the use of default parameter values.
- *   Instead, such methods set the UnicodeSet into a "bogus" state
- *   (see isBogus()) if an error occurs.
- *
- * @author Alan Liu
- * @stable ICU 2.0
- */
-class U_COMMON_API UnicodeSet : public UnicodeFilter {
-
-    int32_t len; // length of list used; 0 <= len <= capacity
-    int32_t capacity; // capacity of list
-    UChar32* list; // MUST be terminated with HIGH
-    BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
-    UChar32* buffer; // internal buffer, may be NULL
-    int32_t bufferCapacity; // capacity of buffer
-    int32_t patLen;
-
-    /**
-     * The pattern representation of this set.  This may not be the
-     * most economical pattern.  It is the pattern supplied to
-     * applyPattern(), with variables substituted and whitespace
-     * removed.  For sets constructed without applyPattern(), or
-     * modified using the non-pattern API, this string will be empty,
-     * indicating that toPattern() must generate a pattern
-     * representation from the inversion list.
-     */
-    UChar *pat;
-    UVector* strings; // maintained in sorted order
-    UnicodeSetStringSpan *stringSpan;
-
-private:
-    enum { // constants
-        kIsBogus = 1       // This set is bogus (i.e. not valid)
-    };
-    uint8_t fFlags;         // Bit flag (see constants above)
-public:
-    /**
-     * Determine if this object contains a valid set.
-     * A bogus set has no value. It is different from an empty set.
-     * It can be used to indicate that no set value is available.
-     *
-     * @return TRUE if the set is valid, FALSE otherwise
-     * @see setToBogus()
-     * @draft ICU 4.0
-     */
-    inline UBool isBogus(void) const;
-    
-    /**
-     * Make this UnicodeSet object invalid.
-     * The string will test TRUE with isBogus().
-     *
-     * A bogus set has no value. It is different from an empty set.
-     * It can be used to indicate that no set value is available.
-     *
-     * This utility function is used throughout the UnicodeSet
-     * implementation to indicate that a UnicodeSet operation failed,
-     * and may be used in other functions,
-     * especially but not exclusively when such functions do not
-     * take a UErrorCode for simplicity.
-     *
-     * @see isBogus()
-     * @draft ICU 4.0
-     */
-    void setToBogus();
-
-public:
-
-    enum {
-        /**
-         * Minimum value that can be stored in a UnicodeSet.
-         * @stable ICU 2.4
-         */
-        MIN_VALUE = 0,
-
-        /**
-         * Maximum value that can be stored in a UnicodeSet.
-         * @stable ICU 2.4
-         */
-        MAX_VALUE = 0x10ffff
-    };
-
-    //----------------------------------------------------------------
-    // Constructors &c
-    //----------------------------------------------------------------
-
-public:
-
-    /**
-     * Constructs an empty set.
-     * @stable ICU 2.0
-     */
-    UnicodeSet();
-
-    /**
-     * Constructs a set containing the given range. If <code>end >
-     * start</code> then an empty set is created.
-     *
-     * @param start first character, inclusive, of range
-     * @param end last character, inclusive, of range
-     * @stable ICU 2.4
-     */
-    UnicodeSet(UChar32 start, UChar32 end);
-
-    /**
-     * Constructs a set from the given pattern.  See the class
-     * description for the syntax of the pattern language.
-     * @param pattern a string specifying what characters are in the set
-     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
-     * contains a syntax error.
-     * @stable ICU 2.0
-     */
-    UnicodeSet(const UnicodeString& pattern,
-               UErrorCode& status);
-
-    /**
-     * Constructs a set from the given pattern.  See the class
-     * description for the syntax of the pattern language.
-     * @param pattern a string specifying what characters are in the set
-     * @param options bitmask for options to apply to the pattern.
-     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
-     * @param symbols a symbol table mapping variable names to values
-     * and stand-in characters to UnicodeSets; may be NULL
-     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
-     * contains a syntax error.
-     * @internal
-     */
-    UnicodeSet(const UnicodeString& pattern,
-               uint32_t options,
-               const SymbolTable* symbols,
-               UErrorCode& status);
-
-    /**
-     * Constructs a set from the given pattern.  See the class description
-     * for the syntax of the pattern language.
-     * @param pattern a string specifying what characters are in the set
-     * @param pos on input, the position in pattern at which to start parsing.
-     * On output, the position after the last character parsed.
-     * @param options bitmask for options to apply to the pattern.
-     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
-     * @param symbols a symbol table mapping variable names to values
-     * and stand-in characters to UnicodeSets; may be NULL
-     * @param status input-output error code
-     * @stable ICU 2.8
-     */
-    UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
-               uint32_t options,
-               const SymbolTable* symbols,
-               UErrorCode& status);
-
-    /**
-     * Constructs a set that is identical to the given UnicodeSet.
-     * @stable ICU 2.0
-     */
-    UnicodeSet(const UnicodeSet& o);
-
-    /**
-     * Destructs the set.
-     * @stable ICU 2.0
-     */
-    virtual ~UnicodeSet();
-
-    /**
-     * Assigns this object to be a copy of another.
-     * A frozen set will not be modified.
-     * @stable ICU 2.0
-     */
-    UnicodeSet& operator=(const UnicodeSet& o);
-
-    /**
-     * Compares the specified object with this set for equality.  Returns
-     * <tt>true</tt> if the two sets
-     * have the same size, and every member of the specified set is
-     * contained in this set (or equivalently, every member of this set is
-     * contained in the specified set).
-     *
-     * @param o set to be compared for equality with this set.
-     * @return <tt>true</tt> if the specified set is equal to this set.
-     * @stable ICU 2.0
-     */
-    virtual UBool operator==(const UnicodeSet& o) const;
-
-    /**
-     * Compares the specified object with this set for equality.  Returns
-     * <tt>true</tt> if the specified set is not equal to this set.
-     * @stable ICU 2.0
-     */
-    UBool operator!=(const UnicodeSet& o) const;
-
-    /**
-     * Returns a copy of this object.  All UnicodeFunctor objects have
-     * to support cloning in order to allow classes using
-     * UnicodeFunctors, such as Transliterator, to implement cloning.
-     * If this set is frozen, then the clone will be frozen as well.
-     * Use cloneAsThawed() for a mutable clone of a frozen set.
-     * @see cloneAsThawed
-     * @stable ICU 2.0
-     */
-    virtual UnicodeFunctor* clone() const;
-
-    /**
-     * Returns the hash code value for this set.
-     *
-     * @return the hash code value for this set.
-     * @see Object#hashCode()
-     * @stable ICU 2.0
-     */
-    virtual int32_t hashCode(void) const;
-
-    //----------------------------------------------------------------
-    // Freezable API
-    //----------------------------------------------------------------
-
-    /**
-     * Determines whether the set has been frozen (made immutable) or not.
-     * See the ICU4J Freezable interface for details.
-     * @return TRUE/FALSE for whether the set has been frozen
-     * @see freeze
-     * @see cloneAsThawed
-     * @stable ICU 4.0
-     */
-    inline UBool isFrozen() const;
-
-    /**
-     * Freeze the set (make it immutable).
-     * Once frozen, it cannot be unfrozen and is therefore thread-safe
-     * until it is deleted.
-     * See the ICU4J Freezable interface for details.
-     * Freezing the set may also make some operations faster, for example
-     * contains() and span().
-     * A frozen set will not be modified. (It remains frozen.)
-     * @return this set.
-     * @see isFrozen
-     * @see cloneAsThawed
-     * @stable ICU 4.0
-     */
-    UnicodeFunctor *freeze();
-
-    /**
-     * Clone the set and make the clone mutable.
-     * See the ICU4J Freezable interface for details.
-     * @return the mutable clone
-     * @see freeze
-     * @see isFrozen
-     * @stable ICU 4.0
-     */
-    UnicodeFunctor *cloneAsThawed() const;
-
-    //----------------------------------------------------------------
-    // Public API
-    //----------------------------------------------------------------
-
-    /**
-     * Make this object represent the range <code>start - end</code>.
-     * If <code>end > start</code> then this object is set to an
-     * an empty range.
-     * A frozen set will not be modified.
-     *
-     * @param start first character in the set, inclusive
-     * @param end last character in the set, inclusive
-     * @stable ICU 2.4
-     */
-    UnicodeSet& set(UChar32 start, UChar32 end);
-
-    /**
-     * Return true if the given position, in the given pattern, appears
-     * to be the start of a UnicodeSet pattern.
-     * @stable ICU 2.4
-     */
-    static UBool resemblesPattern(const UnicodeString& pattern,
-                                  int32_t pos);
-
-    /**
-     * Modifies this set to represent the set specified by the given
-     * pattern, optionally ignoring white space.  See the class
-     * description for the syntax of the pattern language.
-     * A frozen set will not be modified.
-     * @param pattern a string specifying what characters are in the set
-     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
-     * contains a syntax error.
-     * <em> Empties the set passed before applying the pattern.</em>
-     * @return a reference to this
-     * @stable ICU 2.0
-     */
-    UnicodeSet& applyPattern(const UnicodeString& pattern,
-                             UErrorCode& status);
-
-    /**
-     * Modifies this set to represent the set specified by the given
-     * pattern, optionally ignoring white space.  See the class
-     * description for the syntax of the pattern language.
-     * A frozen set will not be modified.
-     * @param pattern a string specifying what characters are in the set
-     * @param options bitmask for options to apply to the pattern.
-     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
-     * @param symbols a symbol table mapping variable names to
-     * values and stand-ins to UnicodeSets; may be NULL
-     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
-     * contains a syntax error.
-     *<em> Empties the set passed before applying the pattern.</em>
-     * @return a reference to this
-     * @internal
-     */
-    UnicodeSet& applyPattern(const UnicodeString& pattern,
-                             uint32_t options,
-                             const SymbolTable* symbols,
-                             UErrorCode& status);
-
-    /**
-     * Parses the given pattern, starting at the given position.  The
-     * character at pattern.charAt(pos.getIndex()) must be '[', or the
-     * parse fails.  Parsing continues until the corresponding closing
-     * ']'.  If a syntax error is encountered between the opening and
-     * closing brace, the parse fails.  Upon return from a successful
-     * parse, the ParsePosition is updated to point to the character
-     * following the closing ']', and a StringBuffer containing a
-     * pairs list for the parsed pattern is returned.  This method calls
-     * itself recursively to parse embedded subpatterns.
-     *<em> Empties the set passed before applying the pattern.</em>
-     * A frozen set will not be modified.
-     *
-     * @param pattern the string containing the pattern to be parsed.
-     * The portion of the string from pos.getIndex(), which must be a
-     * '[', to the corresponding closing ']', is parsed.
-     * @param pos upon entry, the position at which to being parsing.
-     * The character at pattern.charAt(pos.getIndex()) must be a '['.
-     * Upon return from a successful parse, pos.getIndex() is either
-     * the character after the closing ']' of the parsed pattern, or
-     * pattern.length() if the closing ']' is the last character of
-     * the pattern string.
-     * @param options bitmask for options to apply to the pattern.
-     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
-     * @param symbols a symbol table mapping variable names to
-     * values and stand-ins to UnicodeSets; may be NULL
-     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
-     * contains a syntax error.
-     * @return a reference to this
-     * @stable ICU 2.8
-     */
-    UnicodeSet& applyPattern(const UnicodeString& pattern,
-                             ParsePosition& pos,
-                             uint32_t options,
-                             const SymbolTable* symbols,
-                             UErrorCode& status);
-
-    /**
-     * Returns a string representation of this set.  If the result of
-     * calling this function is passed to a UnicodeSet constructor, it
-     * will produce another set that is equal to this one.
-     * A frozen set will not be modified.
-     * @param result the string to receive the rules.  Previous
-     * contents will be deleted.
-     * @param escapeUnprintable if TRUE then convert unprintable
-     * character to their hex escape representations, \\uxxxx or
-     * \\Uxxxxxxxx.  Unprintable characters are those other than
-     * U+000A, U+0020..U+007E.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeString& toPattern(UnicodeString& result,
-                             UBool escapeUnprintable = FALSE) const;
-
-    /**
-     * Modifies this set to contain those code points which have the given value
-     * for the given binary or enumerated property, as returned by
-     * u_getIntPropertyValue.  Prior contents of this set are lost.
-     * A frozen set will not be modified.
-     *
-     * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
-     * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
-     * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
-     *
-     * @param value a value in the range u_getIntPropertyMinValue(prop)..
-     * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
-     * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
-     * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
-     * categories such as [:L:] to be represented.
-     *
-     * @param ec error code input/output parameter
-     *
-     * @return a reference to this set
-     *
-     * @stable ICU 2.4
-     */
-    UnicodeSet& applyIntPropertyValue(UProperty prop,
-                                      int32_t value,
-                                      UErrorCode& ec);
-
-    /**
-     * Modifies this set to contain those code points which have the
-     * given value for the given property.  Prior contents of this
-     * set are lost.
-     * A frozen set will not be modified.
-     *
-     * @param prop a property alias, either short or long.  The name is matched
-     * loosely.  See PropertyAliases.txt for names and a description of loose
-     * matching.  If the value string is empty, then this string is interpreted
-     * as either a General_Category value alias, a Script value alias, a binary
-     * property alias, or a special ID.  Special IDs are matched loosely and
-     * correspond to the following sets:
-     *
-     * "ANY" = [\\u0000-\\U0010FFFF],
-     * "ASCII" = [\\u0000-\\u007F],
-     * "Assigned" = [:^Cn:].
-     *
-     * @param value a value alias, either short or long.  The name is matched
-     * loosely.  See PropertyValueAliases.txt for names and a description of
-     * loose matching.  In addition to aliases listed, numeric values and
-     * canonical combining classes may be expressed numerically, e.g., ("nv",
-     * "0.5") or ("ccc", "220").  The value string may also be empty.
-     *
-     * @param ec error code input/output parameter
-     *
-     * @return a reference to this set
-     *
-     * @stable ICU 2.4
-     */
-    UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
-                                   const UnicodeString& value,
-                                   UErrorCode& ec);
-
-    /**
-     * Returns the number of elements in this set (its cardinality).
-     * Note than the elements of a set may include both individual
-     * codepoints and strings.
-     *
-     * @return the number of elements in this set (its cardinality).
-     * @stable ICU 2.0
-     */
-    virtual int32_t size(void) const;
-
-    /**
-     * Returns <tt>true</tt> if this set contains no elements.
-     *
-     * @return <tt>true</tt> if this set contains no elements.
-     * @stable ICU 2.0
-     */
-    virtual UBool isEmpty(void) const;
-
-    /**
-     * Returns true if this set contains the given character.
-     * This function works faster with a frozen set.
-     * @param c character to be checked for containment
-     * @return true if the test condition is met
-     * @stable ICU 2.0
-     */
-    virtual UBool contains(UChar32 c) const;
-
-    /**
-     * Returns true if this set contains every character
-     * of the given range.
-     * @param start first character, inclusive, of the range
-     * @param end last character, inclusive, of the range
-     * @return true if the test condition is met
-     * @stable ICU 2.0
-     */
-    virtual UBool contains(UChar32 start, UChar32 end) const;
-
-    /**
-     * Returns <tt>true</tt> if this set contains the given
-     * multicharacter string.
-     * @param s string to be checked for containment
-     * @return <tt>true</tt> if this set contains the specified string
-     * @stable ICU 2.4
-     */
-    UBool contains(const UnicodeString& s) const;
-
-    /**
-     * Returns true if this set contains all the characters and strings
-     * of the given set.
-     * @param c set to be checked for containment
-     * @return true if the test condition is met
-     * @stable ICU 2.4
-     */
-    virtual UBool containsAll(const UnicodeSet& c) const;
-
-    /**
-     * Returns true if this set contains all the characters
-     * of the given string.
-     * @param s string containing characters to be checked for containment
-     * @return true if the test condition is met
-     * @stable ICU 2.4
-     */
-    UBool containsAll(const UnicodeString& s) const;
-
-    /**
-     * Returns true if this set contains none of the characters
-     * of the given range.
-     * @param start first character, inclusive, of the range
-     * @param end last character, inclusive, of the range
-     * @return true if the test condition is met
-     * @stable ICU 2.4
-     */
-    UBool containsNone(UChar32 start, UChar32 end) const;
-
-    /**
-     * Returns true if this set contains none of the characters and strings
-     * of the given set.
-     * @param c set to be checked for containment
-     * @return true if the test condition is met
-     * @stable ICU 2.4
-     */
-    UBool containsNone(const UnicodeSet& c) const;
-
-    /**
-     * Returns true if this set contains none of the characters
-     * of the given string.
-     * @param s string containing characters to be checked for containment
-     * @return true if the test condition is met
-     * @stable ICU 2.4
-     */
-    UBool containsNone(const UnicodeString& s) const;
-
-    /**
-     * Returns true if this set contains one or more of the characters
-     * in the given range.
-     * @param start first character, inclusive, of the range
-     * @param end last character, inclusive, of the range
-     * @return true if the condition is met
-     * @stable ICU 2.4
-     */
-    inline UBool containsSome(UChar32 start, UChar32 end) const;
-
-    /**
-     * Returns true if this set contains one or more of the characters
-     * and strings of the given set.
-     * @param s The set to be checked for containment
-     * @return true if the condition is met
-     * @stable ICU 2.4
-     */
-    inline UBool containsSome(const UnicodeSet& s) const;
-
-    /**
-     * Returns true if this set contains one or more of the characters
-     * of the given string.
-     * @param s string containing characters to be checked for containment
-     * @return true if the condition is met
-     * @stable ICU 2.4
-     */
-    inline UBool containsSome(const UnicodeString& s) const;
-
-    /**
-     * Returns the length of the initial substring of the input string which
-     * consists only of characters and strings that are contained in this set
-     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
-     * or only of characters and strings that are not contained
-     * in this set (USET_SPAN_NOT_CONTAINED).
-     * See USetSpanCondition for details.
-     * Similar to the strspn() C library function.
-     * Unpaired surrogates are treated according to contains() of their surrogate code points.
-     * This function works faster with a frozen set and with a non-negative string length argument.
-     * @param s start of the string
-     * @param length of the string; can be -1 for NUL-terminated
-     * @param spanCondition specifies the containment condition
-     * @return the length of the initial substring according to the spanCondition;
-     *         0 if the start of the string does not fit the spanCondition
-     * @stable ICU 4.0
-     * @see USetSpanCondition
-     */
-    int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
-
-    /**
-     * Returns the start of the trailing substring of the input string which
-     * consists only of characters and strings that are contained in this set
-     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
-     * or only of characters and strings that are not contained
-     * in this set (USET_SPAN_NOT_CONTAINED).
-     * See USetSpanCondition for details.
-     * Unpaired surrogates are treated according to contains() of their surrogate code points.
-     * This function works faster with a frozen set and with a non-negative string length argument.
-     * @param s start of the string
-     * @param length of the string; can be -1 for NUL-terminated
-     * @param spanCondition specifies the containment condition
-     * @return the start of the trailing substring according to the spanCondition;
-     *         the string length if the end of the string does not fit the spanCondition
-     * @stable ICU 4.0
-     * @see USetSpanCondition
-     */
-    int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
-
-    /**
-     * Returns the length of the initial substring of the input string which
-     * consists only of characters and strings that are contained in this set
-     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
-     * or only of characters and strings that are not contained
-     * in this set (USET_SPAN_NOT_CONTAINED).
-     * See USetSpanCondition for details.
-     * Similar to the strspn() C library function.
-     * Malformed byte sequences are treated according to contains(0xfffd).
-     * This function works faster with a frozen set and with a non-negative string length argument.
-     * @param s start of the string (UTF-8)
-     * @param length of the string; can be -1 for NUL-terminated
-     * @param spanCondition specifies the containment condition
-     * @return the length of the initial substring according to the spanCondition;
-     *         0 if the start of the string does not fit the spanCondition
-     * @stable ICU 4.0
-     * @see USetSpanCondition
-     */
-    int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
-
-    /**
-     * Returns the start of the trailing substring of the input string which
-     * consists only of characters and strings that are contained in this set
-     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
-     * or only of characters and strings that are not contained
-     * in this set (USET_SPAN_NOT_CONTAINED).
-     * See USetSpanCondition for details.
-     * Malformed byte sequences are treated according to contains(0xfffd).
-     * This function works faster with a frozen set and with a non-negative string length argument.
-     * @param s start of the string (UTF-8)
-     * @param length of the string; can be -1 for NUL-terminated
-     * @param spanCondition specifies the containment condition
-     * @return the start of the trailing substring according to the spanCondition;
-     *         the string length if the end of the string does not fit the spanCondition
-     * @stable ICU 4.0
-     * @see USetSpanCondition
-     */
-    int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
-
-    /**
-     * Implement UnicodeMatcher::matches()
-     * @stable ICU 2.4
-     */
-    virtual UMatchDegree matches(const Replaceable& text,
-                         int32_t& offset,
-                         int32_t limit,
-                         UBool incremental);
-
-private:
-    /**
-     * Returns the longest match for s in text at the given position.
-     * If limit > start then match forward from start+1 to limit
-     * matching all characters except s.charAt(0).  If limit < start,
-     * go backward starting from start-1 matching all characters
-     * except s.charAt(s.length()-1).  This method assumes that the
-     * first character, text.charAt(start), matches s, so it does not
-     * check it.
-     * @param text the text to match
-     * @param start the first character to match.  In the forward
-     * direction, text.charAt(start) is matched against s.charAt(0).
-     * In the reverse direction, it is matched against
-     * s.charAt(s.length()-1).
-     * @param limit the limit offset for matching, either last+1 in
-     * the forward direction, or last-1 in the reverse direction,
-     * where last is the index of the last character to match.
-     * @return If part of s matches up to the limit, return |limit -
-     * start|.  If all of s matches before reaching the limit, return
-     * s.length().  If there is a mismatch between s and text, return
-     * 0
-     */
-    static int32_t matchRest(const Replaceable& text,
-                             int32_t start, int32_t limit,
-                             const UnicodeString& s);
-
-    /**
-     * Returns the smallest value i such that c < list[i].  Caller
-     * must ensure that c is a legal value or this method will enter
-     * an infinite loop.  This method performs a binary search.
-     * @param c a character in the range MIN_VALUE..MAX_VALUE
-     * inclusive
-     * @return the smallest integer i in the range 0..len-1,
-     * inclusive, such that c < list[i]
-     */
-    int32_t findCodePoint(UChar32 c) const;
-
-public:
-
-    /**
-     * Implementation of UnicodeMatcher API.  Union the set of all
-     * characters that may be matched by this object into the given
-     * set.
-     * @param toUnionTo the set into which to union the source characters
-     * @stable ICU 2.4
-     */
-    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
-
-    /**
-     * Returns the index of the given character within this set, where
-     * the set is ordered by ascending code point.  If the character
-     * is not in this set, return -1.  The inverse of this method is
-     * <code>charAt()</code>.
-     * @return an index from 0..size()-1, or -1
-     * @stable ICU 2.4
-     */
-    int32_t indexOf(UChar32 c) const;
-
-    /**
-     * Returns the character at the given index within this set, where
-     * the set is ordered by ascending code point.  If the index is
-     * out of range, return (UChar32)-1.  The inverse of this method is
-     * <code>indexOf()</code>.
-     * @param index an index from 0..size()-1
-     * @return the character at the given index, or (UChar32)-1.
-     * @stable ICU 2.4
-     */
-    UChar32 charAt(int32_t index) const;
-
-    /**
-     * Adds the specified range to this set if it is not already
-     * present.  If this set already contains the specified range,
-     * the call leaves this set unchanged.  If <code>end > start</code>
-     * then an empty range is added, leaving the set unchanged.
-     * This is equivalent to a boolean logic OR, or a set UNION.
-     * A frozen set will not be modified.
-     *
-     * @param start first character, inclusive, of range to be added
-     * to this set.
-     * @param end last character, inclusive, of range to be added
-     * to this set.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeSet& add(UChar32 start, UChar32 end);
-
-    /**
-     * Adds the specified character to this set if it is not already
-     * present.  If this set already contains the specified character,
-     * the call leaves this set unchanged.
-     * A frozen set will not be modified.
-     * @stable ICU 2.0
-     */
-    UnicodeSet& add(UChar32 c);
-
-    /**
-     * Adds the specified multicharacter to this set if it is not already
-     * present.  If this set already contains the multicharacter,
-     * the call leaves this set unchanged.
-     * Thus "ch" => {"ch"}
-     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
-     * A frozen set will not be modified.
-     * @param s the source string
-     * @return this object, for chaining
-     * @stable ICU 2.4
-     */
-    UnicodeSet& add(const UnicodeString& s);
-
- private:
-    /**
-     * @return a code point IF the string consists of a single one.
-     * otherwise returns -1.
-     * @param string to test
-     */
-    static int32_t getSingleCP(const UnicodeString& s);
-
-    void _add(const UnicodeString& s);
-
- public:
-    /**
-     * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
-     * If this set already any particular character, it has no effect on that character.
-     * A frozen set will not be modified.
-     * @param s the source string
-     * @return this object, for chaining
-     * @stable ICU 2.4
-     */
-    UnicodeSet& addAll(const UnicodeString& s);
-
-    /**
-     * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
-     * If this set already any particular character, it has no effect on that character.
-     * A frozen set will not be modified.
-     * @param s the source string
-     * @return this object, for chaining
-     * @stable ICU 2.4
-     */
-    UnicodeSet& retainAll(const UnicodeString& s);
-
-    /**
-     * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
-     * If this set already any particular character, it has no effect on that character.
-     * A frozen set will not be modified.
-     * @param s the source string
-     * @return this object, for chaining
-     * @stable ICU 2.4
-     */
-    UnicodeSet& complementAll(const UnicodeString& s);
-
-    /**
-     * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
-     * If this set already any particular character, it has no effect on that character.
-     * A frozen set will not be modified.
-     * @param s the source string
-     * @return this object, for chaining
-     * @stable ICU 2.4
-     */
-    UnicodeSet& removeAll(const UnicodeString& s);
-
-    /**
-     * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
-     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
-     * @param s the source string
-     * @return a newly created set containing the given string.
-     * The caller owns the return object and is responsible for deleting it.
-     * @stable ICU 2.4
-     */
-    static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
-
-
-    /**
-     * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
-     * @param s the source string
-     * @return a newly created set containing the given characters
-     * The caller owns the return object and is responsible for deleting it.
-     * @stable ICU 2.4
-     */
-    static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
-
-    /**
-     * Retain only the elements in this set that are contained in the
-     * specified range.  If <code>end > start</code> then an empty range is
-     * retained, leaving the set empty.  This is equivalent to
-     * a boolean logic AND, or a set INTERSECTION.
-     * A frozen set will not be modified.
-     *
-     * @param start first character, inclusive, of range to be retained
-     * to this set.
-     * @param end last character, inclusive, of range to be retained
-     * to this set.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeSet& retain(UChar32 start, UChar32 end);
-
-
-    /**
-     * Retain the specified character from this set if it is present.
-     * A frozen set will not be modified.
-     * @stable ICU 2.0
-     */
-    UnicodeSet& retain(UChar32 c);
-
-    /**
-     * Removes the specified range from this set if it is present.
-     * The set will not contain the specified range once the call
-     * returns.  If <code>end > start</code> then an empty range is
-     * removed, leaving the set unchanged.
-     * A frozen set will not be modified.
-     *
-     * @param start first character, inclusive, of range to be removed
-     * from this set.
-     * @param end last character, inclusive, of range to be removed
-     * from this set.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeSet& remove(UChar32 start, UChar32 end);
-
-    /**
-     * Removes the specified character from this set if it is present.
-     * The set will not contain the specified range once the call
-     * returns.
-     * A frozen set will not be modified.
-     * @stable ICU 2.0
-     */
-    UnicodeSet& remove(UChar32 c);
-
-    /**
-     * Removes the specified string from this set if it is present.
-     * The set will not contain the specified character once the call
-     * returns.
-     * A frozen set will not be modified.
-     * @param s the source string
-     * @return this object, for chaining
-     * @stable ICU 2.4
-     */
-    UnicodeSet& remove(const UnicodeString& s);
-
-    /**
-     * Inverts this set.  This operation modifies this set so that
-     * its value is its complement.  This is equivalent to
-     * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
-     * A frozen set will not be modified.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeSet& complement(void);
-
-    /**
-     * Complements the specified range in this set.  Any character in
-     * the range will be removed if it is in this set, or will be
-     * added if it is not in this set.  If <code>end > start</code>
-     * then an empty range is complemented, leaving the set unchanged.
-     * This is equivalent to a boolean logic XOR.
-     * A frozen set will not be modified.
-     *
-     * @param start first character, inclusive, of range to be removed
-     * from this set.
-     * @param end last character, inclusive, of range to be removed
-     * from this set.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeSet& complement(UChar32 start, UChar32 end);
-
-    /**
-     * Complements the specified character in this set.  The character
-     * will be removed if it is in this set, or will be added if it is
-     * not in this set.
-     * A frozen set will not be modified.
-     * @stable ICU 2.0
-     */
-    UnicodeSet& complement(UChar32 c);
-
-    /**
-     * Complement the specified string in this set.
-     * The set will not contain the specified string once the call
-     * returns.
-     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
-     * A frozen set will not be modified.
-     * @param s the string to complement
-     * @return this object, for chaining
-     * @stable ICU 2.4
-     */
-    UnicodeSet& complement(const UnicodeString& s);
-
-    /**
-     * Adds all of the elements in the specified set to this set if
-     * they're not already present.  This operation effectively
-     * modifies this set so that its value is the <i>union</i> of the two
-     * sets.  The behavior of this operation is unspecified if the specified
-     * collection is modified while the operation is in progress.
-     * A frozen set will not be modified.
-     *
-     * @param c set whose elements are to be added to this set.
-     * @see #add(UChar32, UChar32)
-     * @stable ICU 2.0
-     */
-    virtual UnicodeSet& addAll(const UnicodeSet& c);
-
-    /**
-     * Retains only the elements in this set that are contained in the
-     * specified set.  In other words, removes from this set all of
-     * its elements that are not contained in the specified set.  This
-     * operation effectively modifies this set so that its value is
-     * the <i>intersection</i> of the two sets.
-     * A frozen set will not be modified.
-     *
-     * @param c set that defines which elements this set will retain.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeSet& retainAll(const UnicodeSet& c);
-
-    /**
-     * Removes from this set all of its elements that are contained in the
-     * specified set.  This operation effectively modifies this
-     * set so that its value is the <i>asymmetric set difference</i> of
-     * the two sets.
-     * A frozen set will not be modified.
-     *
-     * @param c set that defines which elements will be removed from
-     *          this set.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeSet& removeAll(const UnicodeSet& c);
-
-    /**
-     * Complements in this set all elements contained in the specified
-     * set.  Any character in the other set will be removed if it is
-     * in this set, or will be added if it is not in this set.
-     * A frozen set will not be modified.
-     *
-     * @param c set that defines which elements will be xor'ed from
-     *          this set.
-     * @stable ICU 2.4
-     */
-    virtual UnicodeSet& complementAll(const UnicodeSet& c);
-
-    /**
-     * Removes all of the elements from this set.  This set will be
-     * empty after this call returns.
-     * A frozen set will not be modified.
-     * @stable ICU 2.0
-     */
-    virtual UnicodeSet& clear(void);
-
-    /**
-     * Close this set over the given attribute.  For the attribute
-     * USET_CASE, the result is to modify this set so that:
-     *
-     * 1. For each character or string 'a' in this set, all strings or
-     * characters 'b' such that foldCase(a) == foldCase(b) are added
-     * to this set.
-     *
-     * 2. For each string 'e' in the resulting set, if e !=
-     * foldCase(e), 'e' will be removed.
-     *
-     * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
-     *
-     * (Here foldCase(x) refers to the operation u_strFoldCase, and a
-     * == b denotes that the contents are the same, not pointer
-     * comparison.)
-     *
-     * A frozen set will not be modified.
-     *
-     * @param attribute bitmask for attributes to close over.
-     * Currently only the USET_CASE bit is supported.  Any undefined bits
-     * are ignored.
-     * @return a reference to this set.
-     * @internal
-     */
-    UnicodeSet& closeOver(int32_t attribute);
-
-    /**
-     * Remove all strings from this set.
-     *
-     * @return a reference to this set.
-     * @internal
-     */
-    virtual UnicodeSet &removeAllStrings();
-
-    /**
-     * Iteration method that returns the number of ranges contained in
-     * this set.
-     * @see #getRangeStart
-     * @see #getRangeEnd
-     * @stable ICU 2.4
-     */
-    virtual int32_t getRangeCount(void) const;
-
-    /**
-     * Iteration method that returns the first character in the
-     * specified range of this set.
-     * @see #getRangeCount
-     * @see #getRangeEnd
-     * @stable ICU 2.4
-     */
-    virtual UChar32 getRangeStart(int32_t index) const;
-
-    /**
-     * Iteration method that returns the last character in the
-     * specified range of this set.
-     * @see #getRangeStart
-     * @see #getRangeEnd
-     * @stable ICU 2.4
-     */
-    virtual UChar32 getRangeEnd(int32_t index) const;
-
-    /**
-     * Serializes this set into an array of 16-bit integers.  Serialization
-     * (currently) only records the characters in the set; multicharacter
-     * strings are ignored.
-     *
-     * The array has following format (each line is one 16-bit
-     * integer):
-     *
-     *  length     = (n+2*m) | (m!=0?0x8000:0)
-     *  bmpLength  = n; present if m!=0
-     *  bmp[0]
-     *  bmp[1]
-     *  ...
-     *  bmp[n-1]
-     *  supp-high[0]
-     *  supp-low[0]
-     *  supp-high[1]
-     *  supp-low[1]
-     *  ...
-     *  supp-high[m-1]
-     *  supp-low[m-1]
-     *
-     * The array starts with a header.  After the header are n bmp
-     * code points, then m supplementary code points.  Either n or m
-     * or both may be zero.  n+2*m is always <= 0x7FFF.
-     *
-     * If there are no supplementary characters (if m==0) then the
-     * header is one 16-bit integer, 'length', with value n.
-     *
-     * If there are supplementary characters (if m!=0) then the header
-     * is two 16-bit integers.  The first, 'length', has value
-     * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
-     *
-     * After the header the code points are stored in ascending order.
-     * Supplementary code points are stored as most significant 16
-     * bits followed by least significant 16 bits.
-     *
-     * @param dest pointer to buffer of destCapacity 16-bit integers.
-     * May be NULL only if destCapacity is zero.
-     * @param destCapacity size of dest, or zero.  Must not be negative.
-     * @param ec error code.  Will be set to U_INDEX_OUTOFBOUNDS_ERROR
-     * if n+2*m > 0x7FFF.  Will be set to U_BUFFER_OVERFLOW_ERROR if
-     * n+2*m+(m!=0?2:1) > destCapacity.
-     * @return the total length of the serialized format, including
-     * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
-     * than U_BUFFER_OVERFLOW_ERROR.
-     * @stable ICU 2.4
-     */
-    int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
-
-    /**
-     * Reallocate this objects internal structures to take up the least
-     * possible space, without changing this object's value.
-     * A frozen set will not be modified.
-     * @stable ICU 2.4
-     */
-    virtual UnicodeSet& compact();
-
-    /**
-     * Return the class ID for this class.  This is useful only for
-     * comparing to a return value from getDynamicClassID().  For example:
-     * <pre>
-     * .      Base* polymorphic_pointer = createPolymorphicObject();
-     * .      if (polymorphic_pointer->getDynamicClassID() ==
-     * .          Derived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Implement UnicodeFunctor API.
-     *
-     * @return The class ID for this object. All objects of a given
-     * class have the same class ID.  Objects of other classes have
-     * different class IDs.
-     * @stable ICU 2.4
-     */
-    virtual UClassID getDynamicClassID(void) const;
-
-private:
-
-    // Private API for the USet API
-
-    friend class USetAccess;
-
-    int32_t getStringCount() const;
-
-    const UnicodeString* getString(int32_t index) const;
-
-    //----------------------------------------------------------------
-    // RuleBasedTransliterator support
-    //----------------------------------------------------------------
-
-private:
-
-    /**
-     * Returns <tt>true</tt> if this set contains any character whose low byte
-     * is the given value.  This is used by <tt>RuleBasedTransliterator</tt> for
-     * indexing.
-     */
-    virtual UBool matchesIndexValue(uint8_t v) const;
-
-private:
-
-    //----------------------------------------------------------------
-    // Implementation: Clone as thawed (see ICU4J Freezable)
-    //----------------------------------------------------------------
-
-    UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
-
-    //----------------------------------------------------------------
-    // Implementation: Pattern parsing
-    //----------------------------------------------------------------
-
-    void applyPattern(RuleCharacterIterator& chars,
-                      const SymbolTable* symbols,
-                      UnicodeString& rebuiltPat,
-                      uint32_t options,
-                      UErrorCode& ec);
-
-    //----------------------------------------------------------------
-    // Implementation: Utility methods
-    //----------------------------------------------------------------
-
-    void ensureCapacity(int32_t newLen, UErrorCode& ec);
-
-    void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
-
-    void swapBuffers(void);
-
-    UBool allocateStrings(UErrorCode &status);
-
-    UnicodeString& _toPattern(UnicodeString& result,
-                              UBool escapeUnprintable) const;
-
-    UnicodeString& _generatePattern(UnicodeString& result,
-                                    UBool escapeUnprintable) const;
-
-    static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
-
-    static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
-
-    //----------------------------------------------------------------
-    // Implementation: Fundamental operators
-    //----------------------------------------------------------------
-
-    void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
-
-    void add(const UChar32* other, int32_t otherLen, int8_t polarity);
-
-    void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
-
-    /**
-     * Return true if the given position, in the given pattern, appears
-     * to be the start of a property set pattern [:foo:], \\p{foo}, or
-     * \\P{foo}, or \\N{name}.
-     */
-    static UBool resemblesPropertyPattern(const UnicodeString& pattern,
-                                          int32_t pos);
-
-    static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
-                                          int32_t iterOpts);
-
-    /**
-     * Parse the given property pattern at the given parse position
-     * and set this UnicodeSet to the result.
-     *
-     * The original design document is out of date, but still useful.
-     * Ignore the property and value names:
-     * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/unicodeset_properties.html
-     *
-     * Recognized syntax:
-     *
-     * [:foo:] [:^foo:] - white space not allowed within "[:" or ":]"
-     * \\p{foo} \\P{foo}  - white space not allowed within "\\p" or "\\P"
-     * \\N{name}         - white space not allowed within "\\N"
-     *
-     * Other than the above restrictions, white space is ignored.  Case
-     * is ignored except in "\\p" and "\\P" and "\\N".  In 'name' leading
-     * and trailing space is deleted, and internal runs of whitespace
-     * are collapsed to a single space.
-     *
-     * We support binary properties, enumerated properties, and the
-     * following non-enumerated properties:
-     *
-     *  Numeric_Value
-     *  Name
-     *  Unicode_1_Name
-     *
-     * @param pattern the pattern string
-     * @param ppos on entry, the position at which to begin parsing.
-     * This should be one of the locations marked '^':
-     *
-     *   [:blah:]     \\p{blah}     \\P{blah}     \\N{name}
-     *   ^       %    ^       %    ^       %    ^       %
-     *
-     * On return, the position after the last character parsed, that is,
-     * the locations marked '%'.  If the parse fails, ppos is returned
-     * unchanged.
-     * @return a reference to this.
-     */
-    UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
-                                     ParsePosition& ppos,
-                                     UErrorCode &ec);
-
-    void applyPropertyPattern(RuleCharacterIterator& chars,
-                              UnicodeString& rebuiltPat,
-                              UErrorCode& ec);
-
-    static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
-
-    /**
-     * A filter that returns TRUE if the given code point should be
-     * included in the UnicodeSet being constructed.
-     */
-    typedef UBool (*Filter)(UChar32 codePoint, void* context);
-
-    /**
-     * Given a filter, set this UnicodeSet to the code points
-     * contained by that filter.  The filter MUST be
-     * property-conformant.  That is, if it returns value v for one
-     * code point, then it must return v for all affiliated code
-     * points, as defined by the inclusions list.  See
-     * getInclusions().
-     * src is a UPropertySource value.
-     */
-    void applyFilter(Filter filter,
-                     void* context,
-                     int32_t src,
-                     UErrorCode &status);
-
-    /**
-     * Set the new pattern to cache.
-     */
-    void setPattern(const UnicodeString& newPat);
-    /**
-     * Release existing cached pattern.
-     */
-    void releasePattern();
-
-    friend class UnicodeSetIterator;
-};
-
-inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
-    return !operator==(o);
-}
-
-inline UBool UnicodeSet::isFrozen() const {
-    return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
-}
-
-inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
-    return !containsNone(start, end);
-}
-
-inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
-    return !containsNone(s);
-}
-
-inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
-    return !containsNone(s);
-}
-
-inline UBool UnicodeSet::isBogus() const {
-    return (UBool)(fFlags & kIsBogus);
-}
-
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/uniset.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uniset.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uniset.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uniset.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1566 @@
+/*
+***************************************************************************
+* Copyright (C) 1999-2008, International Business Machines Corporation
+* and others. All Rights Reserved.
+***************************************************************************
+*   Date        Name        Description
+*   10/20/99    alan        Creation.
+***************************************************************************
+*/
+
+#ifndef UNICODESET_H
+#define UNICODESET_H
+
+#include "unicode/unifilt.h"
+#include "unicode/unistr.h"
+#include "unicode/uset.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode Set
+ */
+
+U_NAMESPACE_BEGIN
+
+class BMPSet;
+class ParsePosition;
+class SymbolTable;
+class UnicodeSetStringSpan;
+class UVector;
+class RuleCharacterIterator;
+
+/**
+ * A mutable set of Unicode characters and multicharacter strings.  Objects of this class
+ * represent <em>character classes</em> used in regular expressions.
+ * A character specifies a subset of Unicode code points.  Legal
+ * code points are U+0000 to U+10FFFF, inclusive.
+ *
+ * <p>The UnicodeSet class is not designed to be subclassed.
+ *
+ * <p><code>UnicodeSet</code> supports two APIs. The first is the
+ * <em>operand</em> API that allows the caller to modify the value of
+ * a <code>UnicodeSet</code> object. It conforms to Java 2's
+ * <code>java.util.Set</code> interface, although
+ * <code>UnicodeSet</code> does not actually implement that
+ * interface. All methods of <code>Set</code> are supported, with the
+ * modification that they take a character range or single character
+ * instead of an <code>Object</code>, and they take a
+ * <code>UnicodeSet</code> instead of a <code>Collection</code>.  The
+ * operand API may be thought of in terms of boolean logic: a boolean
+ * OR is implemented by <code>add</code>, a boolean AND is implemented
+ * by <code>retain</code>, a boolean XOR is implemented by
+ * <code>complement</code> taking an argument, and a boolean NOT is
+ * implemented by <code>complement</code> with no argument.  In terms
+ * of traditional set theory function names, <code>add</code> is a
+ * union, <code>retain</code> is an intersection, <code>remove</code>
+ * is an asymmetric difference, and <code>complement</code> with no
+ * argument is a set complement with respect to the superset range
+ * <code>MIN_VALUE-MAX_VALUE</code>
+ *
+ * <p>The second API is the
+ * <code>applyPattern()</code>/<code>toPattern()</code> API from the
+ * <code>java.text.Format</code>-derived classes.  Unlike the
+ * methods that add characters, add categories, and control the logic
+ * of the set, the method <code>applyPattern()</code> sets all
+ * attributes of a <code>UnicodeSet</code> at once, based on a
+ * string pattern.
+ *
+ * <p><b>Pattern syntax</b></p>
+ *
+ * Patterns are accepted by the constructors and the
+ * <code>applyPattern()</code> methods and returned by the
+ * <code>toPattern()</code> method.  These patterns follow a syntax
+ * similar to that employed by version 8 regular expression character
+ * classes.  Here are some simple examples:
+ *
+ * \htmlonly<blockquote>\endhtmlonly
+ *   <table>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="left"><code>[]</code></td>
+ *       <td valign="top">No characters</td>
+ *     </tr><tr align="top">
+ *       <td nowrap valign="top" align="left"><code>[a]</code></td>
+ *       <td valign="top">The character 'a'</td>
+ *     </tr><tr align="top">
+ *       <td nowrap valign="top" align="left"><code>[ae]</code></td>
+ *       <td valign="top">The characters 'a' and 'e'</td>
+ *     </tr>
+ *     <tr>
+ *       <td nowrap valign="top" align="left"><code>[a-e]</code></td>
+ *       <td valign="top">The characters 'a' through 'e' inclusive, in Unicode code
+ *       point order</td>
+ *     </tr>
+ *     <tr>
+ *       <td nowrap valign="top" align="left"><code>[\\u4E01]</code></td>
+ *       <td valign="top">The character U+4E01</td>
+ *     </tr>
+ *     <tr>
+ *       <td nowrap valign="top" align="left"><code>[a{ab}{ac}]</code></td>
+ *       <td valign="top">The character 'a' and the multicharacter strings &quot;ab&quot; and
+ *       &quot;ac&quot;</td>
+ *     </tr>
+ *     <tr>
+ *       <td nowrap valign="top" align="left"><code>[\\p{Lu}]</code></td>
+ *       <td valign="top">All characters in the general category Uppercase Letter</td>
+ *     </tr>
+ *   </table>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * Any character may be preceded by a backslash in order to remove any special
+ * meaning.  White space characters, as defined by UCharacter.isWhitespace(), are
+ * ignored, unless they are escaped.
+ *
+ * <p>Property patterns specify a set of characters having a certain
+ * property as defined by the Unicode standard.  Both the POSIX-like
+ * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized.  For a
+ * complete list of supported property patterns, see the User's Guide
+ * for UnicodeSet at
+ * <a href="http://icu-project.org/userguide/unicodeSet.html">
+ * http://icu-project.org/userguide/unicodeSet.html</a>.
+ * Actual determination of property data is defined by the underlying
+ * Unicode database as implemented by UCharacter.
+ *
+ * <p>Patterns specify individual characters, ranges of characters, and
+ * Unicode property sets.  When elements are concatenated, they
+ * specify their union.  To complement a set, place a '^' immediately
+ * after the opening '['.  Property patterns are inverted by modifying
+ * their delimiters; "[:^foo]" and "\\P{foo}".  In any other location,
+ * '^' has no special meaning.
+ *
+ * <p>Ranges are indicated by placing two a '-' between two
+ * characters, as in "a-z".  This specifies the range of all
+ * characters from the left to the right, in Unicode order.  If the
+ * left character is greater than or equal to the
+ * right character it is a syntax error.  If a '-' occurs as the first
+ * character after the opening '[' or '[^', or if it occurs as the
+ * last character before the closing ']', then it is taken as a
+ * literal.  Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same
+ * set of three characters, 'a', 'b', and '-'.
+ *
+ * <p>Sets may be intersected using the '&' operator or the asymmetric
+ * set difference may be taken using the '-' operator, for example,
+ * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters
+ * with values less than 4096.  Operators ('&' and '|') have equal
+ * precedence and bind left-to-right.  Thus
+ * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to
+ * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]".  This only really matters for
+ * difference; intersection is commutative.
+ *
+ * <table>
+ * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
+ * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
+ * through 'z' and all letters in between, in Unicode order
+ * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
+ * all characters but 'a' through 'z',
+ * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
+ * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
+ * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
+ * <td>The asymmetric difference of sets specified by <em>pat1</em> and
+ * <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[:Lu:] or \\p{Lu}</code>
+ * <td>The set of characters having the specified
+ * Unicode property; in
+ * this case, Unicode uppercase letters
+ * <tr valign=top><td nowrap><code>[:^Lu:] or \\P{Lu}</code>
+ * <td>The set of characters <em>not</em> having the given
+ * Unicode property
+ * </table>
+ *
+ * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
+ *
+ * <p><b>Formal syntax</b></p>
+ *
+ * \htmlonly<blockquote>\endhtmlonly
+ *   <table>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>pattern :=&nbsp; </code></td>
+ *       <td valign="top"><code>('[' '^'? item* ']') |
+ *       property</code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>item :=&nbsp; </code></td>
+ *       <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>pattern-expr :=&nbsp; </code></td>
+ *       <td valign="top"><code>pattern | pattern-expr pattern |
+ *       pattern-expr op pattern<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>op :=&nbsp; </code></td>
+ *       <td valign="top"><code>'&amp;' | '-'<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>special :=&nbsp; </code></td>
+ *       <td valign="top"><code>'[' | ']' | '-'<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>char :=&nbsp; </code></td>
+ *       <td valign="top"><em>any character that is not</em><code> special<br>
+ *       | ('\' </code><em>any character</em><code>)<br>
+ *       | ('\\u' hex hex hex hex)<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>hex :=&nbsp; </code></td>
+ *       <td valign="top"><em>any character for which
+ *       </em><code>Character.digit(c, 16)</code><em>
+ *       returns a non-negative result</em></td>
+ *     </tr>
+ *     <tr>
+ *       <td nowrap valign="top" align="right"><code>property :=&nbsp; </code></td>
+ *       <td valign="top"><em>a Unicode property set pattern</em></td>
+ *     </tr>
+ *   </table>
+ *   <br>
+ *   <table border="1">
+ *     <tr>
+ *       <td>Legend: <table>
+ *         <tr>
+ *           <td nowrap valign="top"><code>a := b</code></td>
+ *           <td width="20" valign="top">&nbsp; </td>
+ *           <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
+ *         </tr>
+ *         <tr>
+ *           <td nowrap valign="top"><code>a?</code></td>
+ *           <td valign="top"></td>
+ *           <td valign="top">zero or one instance of <code>a</code><br>
+ *           </td>
+ *         </tr>
+ *         <tr>
+ *           <td nowrap valign="top"><code>a*</code></td>
+ *           <td valign="top"></td>
+ *           <td valign="top">one or more instances of <code>a</code><br>
+ *           </td>
+ *         </tr>
+ *         <tr>
+ *           <td nowrap valign="top"><code>a | b</code></td>
+ *           <td valign="top"></td>
+ *           <td valign="top">either <code>a</code> or <code>b</code><br>
+ *           </td>
+ *         </tr>
+ *         <tr>
+ *           <td nowrap valign="top"><code>'a'</code></td>
+ *           <td valign="top"></td>
+ *           <td valign="top">the literal string between the quotes </td>
+ *         </tr>
+ *       </table>
+ *       </td>
+ *     </tr>
+ *   </table>
+ * \htmlonly</blockquote>\endhtmlonly
+ * 
+ * <p>Note:
+ *  - Most UnicodeSet methods do not take a UErrorCode parameter because
+ *   there are usually very few opportunities for failure other than a shortage
+ *   of memory, error codes in low-level C++ string methods would be inconvenient,
+ *   and the error code as the last parameter (ICU convention) would prevent
+ *   the use of default parameter values.
+ *   Instead, such methods set the UnicodeSet into a "bogus" state
+ *   (see isBogus()) if an error occurs.
+ *
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeSet : public UnicodeFilter {
+
+    int32_t len; // length of list used; 0 <= len <= capacity
+    int32_t capacity; // capacity of list
+    UChar32* list; // MUST be terminated with HIGH
+    BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
+    UChar32* buffer; // internal buffer, may be NULL
+    int32_t bufferCapacity; // capacity of buffer
+    int32_t patLen;
+
+    /**
+     * The pattern representation of this set.  This may not be the
+     * most economical pattern.  It is the pattern supplied to
+     * applyPattern(), with variables substituted and whitespace
+     * removed.  For sets constructed without applyPattern(), or
+     * modified using the non-pattern API, this string will be empty,
+     * indicating that toPattern() must generate a pattern
+     * representation from the inversion list.
+     */
+    UChar *pat;
+    UVector* strings; // maintained in sorted order
+    UnicodeSetStringSpan *stringSpan;
+
+private:
+    enum { // constants
+        kIsBogus = 1       // This set is bogus (i.e. not valid)
+    };
+    uint8_t fFlags;         // Bit flag (see constants above)
+public:
+    /**
+     * Determine if this object contains a valid set.
+     * A bogus set has no value. It is different from an empty set.
+     * It can be used to indicate that no set value is available.
+     *
+     * @return TRUE if the set is valid, FALSE otherwise
+     * @see setToBogus()
+     * @draft ICU 4.0
+     */
+    inline UBool isBogus(void) const;
+    
+    /**
+     * Make this UnicodeSet object invalid.
+     * The string will test TRUE with isBogus().
+     *
+     * A bogus set has no value. It is different from an empty set.
+     * It can be used to indicate that no set value is available.
+     *
+     * This utility function is used throughout the UnicodeSet
+     * implementation to indicate that a UnicodeSet operation failed,
+     * and may be used in other functions,
+     * especially but not exclusively when such functions do not
+     * take a UErrorCode for simplicity.
+     *
+     * @see isBogus()
+     * @draft ICU 4.0
+     */
+    void setToBogus();
+
+public:
+
+    enum {
+        /**
+         * Minimum value that can be stored in a UnicodeSet.
+         * @stable ICU 2.4
+         */
+        MIN_VALUE = 0,
+
+        /**
+         * Maximum value that can be stored in a UnicodeSet.
+         * @stable ICU 2.4
+         */
+        MAX_VALUE = 0x10ffff
+    };
+
+    //----------------------------------------------------------------
+    // Constructors &c
+    //----------------------------------------------------------------
+
+public:
+
+    /**
+     * Constructs an empty set.
+     * @stable ICU 2.0
+     */
+    UnicodeSet();
+
+    /**
+     * Constructs a set containing the given range. If <code>end >
+     * start</code> then an empty set is created.
+     *
+     * @param start first character, inclusive, of range
+     * @param end last character, inclusive, of range
+     * @stable ICU 2.4
+     */
+    UnicodeSet(UChar32 start, UChar32 end);
+
+    /**
+     * Constructs a set from the given pattern.  See the class
+     * description for the syntax of the pattern language.
+     * @param pattern a string specifying what characters are in the set
+     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+     * contains a syntax error.
+     * @stable ICU 2.0
+     */
+    UnicodeSet(const UnicodeString& pattern,
+               UErrorCode& status);
+
+    /**
+     * Constructs a set from the given pattern.  See the class
+     * description for the syntax of the pattern language.
+     * @param pattern a string specifying what characters are in the set
+     * @param options bitmask for options to apply to the pattern.
+     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+     * @param symbols a symbol table mapping variable names to values
+     * and stand-in characters to UnicodeSets; may be NULL
+     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+     * contains a syntax error.
+     * @internal
+     */
+    UnicodeSet(const UnicodeString& pattern,
+               uint32_t options,
+               const SymbolTable* symbols,
+               UErrorCode& status);
+
+    /**
+     * Constructs a set from the given pattern.  See the class description
+     * for the syntax of the pattern language.
+     * @param pattern a string specifying what characters are in the set
+     * @param pos on input, the position in pattern at which to start parsing.
+     * On output, the position after the last character parsed.
+     * @param options bitmask for options to apply to the pattern.
+     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+     * @param symbols a symbol table mapping variable names to values
+     * and stand-in characters to UnicodeSets; may be NULL
+     * @param status input-output error code
+     * @stable ICU 2.8
+     */
+    UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
+               uint32_t options,
+               const SymbolTable* symbols,
+               UErrorCode& status);
+
+    /**
+     * Constructs a set that is identical to the given UnicodeSet.
+     * @stable ICU 2.0
+     */
+    UnicodeSet(const UnicodeSet& o);
+
+    /**
+     * Destructs the set.
+     * @stable ICU 2.0
+     */
+    virtual ~UnicodeSet();
+
+    /**
+     * Assigns this object to be a copy of another.
+     * A frozen set will not be modified.
+     * @stable ICU 2.0
+     */
+    UnicodeSet& operator=(const UnicodeSet& o);
+
+    /**
+     * Compares the specified object with this set for equality.  Returns
+     * <tt>true</tt> if the two sets
+     * have the same size, and every member of the specified set is
+     * contained in this set (or equivalently, every member of this set is
+     * contained in the specified set).
+     *
+     * @param o set to be compared for equality with this set.
+     * @return <tt>true</tt> if the specified set is equal to this set.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const UnicodeSet& o) const;
+
+    /**
+     * Compares the specified object with this set for equality.  Returns
+     * <tt>true</tt> if the specified set is not equal to this set.
+     * @stable ICU 2.0
+     */
+    UBool operator!=(const UnicodeSet& o) const;
+
+    /**
+     * Returns a copy of this object.  All UnicodeFunctor objects have
+     * to support cloning in order to allow classes using
+     * UnicodeFunctors, such as Transliterator, to implement cloning.
+     * If this set is frozen, then the clone will be frozen as well.
+     * Use cloneAsThawed() for a mutable clone of a frozen set.
+     * @see cloneAsThawed
+     * @stable ICU 2.0
+     */
+    virtual UnicodeFunctor* clone() const;
+
+    /**
+     * Returns the hash code value for this set.
+     *
+     * @return the hash code value for this set.
+     * @see Object#hashCode()
+     * @stable ICU 2.0
+     */
+    virtual int32_t hashCode(void) const;
+
+    //----------------------------------------------------------------
+    // Freezable API
+    //----------------------------------------------------------------
+
+    /**
+     * Determines whether the set has been frozen (made immutable) or not.
+     * See the ICU4J Freezable interface for details.
+     * @return TRUE/FALSE for whether the set has been frozen
+     * @see freeze
+     * @see cloneAsThawed
+     * @stable ICU 4.0
+     */
+    inline UBool isFrozen() const;
+
+    /**
+     * Freeze the set (make it immutable).
+     * Once frozen, it cannot be unfrozen and is therefore thread-safe
+     * until it is deleted.
+     * See the ICU4J Freezable interface for details.
+     * Freezing the set may also make some operations faster, for example
+     * contains() and span().
+     * A frozen set will not be modified. (It remains frozen.)
+     * @return this set.
+     * @see isFrozen
+     * @see cloneAsThawed
+     * @stable ICU 4.0
+     */
+    UnicodeFunctor *freeze();
+
+    /**
+     * Clone the set and make the clone mutable.
+     * See the ICU4J Freezable interface for details.
+     * @return the mutable clone
+     * @see freeze
+     * @see isFrozen
+     * @stable ICU 4.0
+     */
+    UnicodeFunctor *cloneAsThawed() const;
+
+    //----------------------------------------------------------------
+    // Public API
+    //----------------------------------------------------------------
+
+    /**
+     * Make this object represent the range <code>start - end</code>.
+     * If <code>end > start</code> then this object is set to an
+     * an empty range.
+     * A frozen set will not be modified.
+     *
+     * @param start first character in the set, inclusive
+     * @param end last character in the set, inclusive
+     * @stable ICU 2.4
+     */
+    UnicodeSet& set(UChar32 start, UChar32 end);
+
+    /**
+     * Return true if the given position, in the given pattern, appears
+     * to be the start of a UnicodeSet pattern.
+     * @stable ICU 2.4
+     */
+    static UBool resemblesPattern(const UnicodeString& pattern,
+                                  int32_t pos);
+
+    /**
+     * Modifies this set to represent the set specified by the given
+     * pattern, optionally ignoring white space.  See the class
+     * description for the syntax of the pattern language.
+     * A frozen set will not be modified.
+     * @param pattern a string specifying what characters are in the set
+     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+     * contains a syntax error.
+     * <em> Empties the set passed before applying the pattern.</em>
+     * @return a reference to this
+     * @stable ICU 2.0
+     */
+    UnicodeSet& applyPattern(const UnicodeString& pattern,
+                             UErrorCode& status);
+
+    /**
+     * Modifies this set to represent the set specified by the given
+     * pattern, optionally ignoring white space.  See the class
+     * description for the syntax of the pattern language.
+     * A frozen set will not be modified.
+     * @param pattern a string specifying what characters are in the set
+     * @param options bitmask for options to apply to the pattern.
+     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+     * @param symbols a symbol table mapping variable names to
+     * values and stand-ins to UnicodeSets; may be NULL
+     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+     * contains a syntax error.
+     *<em> Empties the set passed before applying the pattern.</em>
+     * @return a reference to this
+     * @internal
+     */
+    UnicodeSet& applyPattern(const UnicodeString& pattern,
+                             uint32_t options,
+                             const SymbolTable* symbols,
+                             UErrorCode& status);
+
+    /**
+     * Parses the given pattern, starting at the given position.  The
+     * character at pattern.charAt(pos.getIndex()) must be '[', or the
+     * parse fails.  Parsing continues until the corresponding closing
+     * ']'.  If a syntax error is encountered between the opening and
+     * closing brace, the parse fails.  Upon return from a successful
+     * parse, the ParsePosition is updated to point to the character
+     * following the closing ']', and a StringBuffer containing a
+     * pairs list for the parsed pattern is returned.  This method calls
+     * itself recursively to parse embedded subpatterns.
+     *<em> Empties the set passed before applying the pattern.</em>
+     * A frozen set will not be modified.
+     *
+     * @param pattern the string containing the pattern to be parsed.
+     * The portion of the string from pos.getIndex(), which must be a
+     * '[', to the corresponding closing ']', is parsed.
+     * @param pos upon entry, the position at which to being parsing.
+     * The character at pattern.charAt(pos.getIndex()) must be a '['.
+     * Upon return from a successful parse, pos.getIndex() is either
+     * the character after the closing ']' of the parsed pattern, or
+     * pattern.length() if the closing ']' is the last character of
+     * the pattern string.
+     * @param options bitmask for options to apply to the pattern.
+     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+     * @param symbols a symbol table mapping variable names to
+     * values and stand-ins to UnicodeSets; may be NULL
+     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+     * contains a syntax error.
+     * @return a reference to this
+     * @stable ICU 2.8
+     */
+    UnicodeSet& applyPattern(const UnicodeString& pattern,
+                             ParsePosition& pos,
+                             uint32_t options,
+                             const SymbolTable* symbols,
+                             UErrorCode& status);
+
+    /**
+     * Returns a string representation of this set.  If the result of
+     * calling this function is passed to a UnicodeSet constructor, it
+     * will produce another set that is equal to this one.
+     * A frozen set will not be modified.
+     * @param result the string to receive the rules.  Previous
+     * contents will be deleted.
+     * @param escapeUnprintable if TRUE then convert unprintable
+     * character to their hex escape representations, \\uxxxx or
+     * \\Uxxxxxxxx.  Unprintable characters are those other than
+     * U+000A, U+0020..U+007E.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeString& toPattern(UnicodeString& result,
+                             UBool escapeUnprintable = FALSE) const;
+
+    /**
+     * Modifies this set to contain those code points which have the given value
+     * for the given binary or enumerated property, as returned by
+     * u_getIntPropertyValue.  Prior contents of this set are lost.
+     * A frozen set will not be modified.
+     *
+     * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
+     * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
+     * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
+     *
+     * @param value a value in the range u_getIntPropertyMinValue(prop)..
+     * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
+     * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
+     * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
+     * categories such as [:L:] to be represented.
+     *
+     * @param ec error code input/output parameter
+     *
+     * @return a reference to this set
+     *
+     * @stable ICU 2.4
+     */
+    UnicodeSet& applyIntPropertyValue(UProperty prop,
+                                      int32_t value,
+                                      UErrorCode& ec);
+
+    /**
+     * Modifies this set to contain those code points which have the
+     * given value for the given property.  Prior contents of this
+     * set are lost.
+     * A frozen set will not be modified.
+     *
+     * @param prop a property alias, either short or long.  The name is matched
+     * loosely.  See PropertyAliases.txt for names and a description of loose
+     * matching.  If the value string is empty, then this string is interpreted
+     * as either a General_Category value alias, a Script value alias, a binary
+     * property alias, or a special ID.  Special IDs are matched loosely and
+     * correspond to the following sets:
+     *
+     * "ANY" = [\\u0000-\\U0010FFFF],
+     * "ASCII" = [\\u0000-\\u007F],
+     * "Assigned" = [:^Cn:].
+     *
+     * @param value a value alias, either short or long.  The name is matched
+     * loosely.  See PropertyValueAliases.txt for names and a description of
+     * loose matching.  In addition to aliases listed, numeric values and
+     * canonical combining classes may be expressed numerically, e.g., ("nv",
+     * "0.5") or ("ccc", "220").  The value string may also be empty.
+     *
+     * @param ec error code input/output parameter
+     *
+     * @return a reference to this set
+     *
+     * @stable ICU 2.4
+     */
+    UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
+                                   const UnicodeString& value,
+                                   UErrorCode& ec);
+
+    /**
+     * Returns the number of elements in this set (its cardinality).
+     * Note than the elements of a set may include both individual
+     * codepoints and strings.
+     *
+     * @return the number of elements in this set (its cardinality).
+     * @stable ICU 2.0
+     */
+    virtual int32_t size(void) const;
+
+    /**
+     * Returns <tt>true</tt> if this set contains no elements.
+     *
+     * @return <tt>true</tt> if this set contains no elements.
+     * @stable ICU 2.0
+     */
+    virtual UBool isEmpty(void) const;
+
+    /**
+     * Returns true if this set contains the given character.
+     * This function works faster with a frozen set.
+     * @param c character to be checked for containment
+     * @return true if the test condition is met
+     * @stable ICU 2.0
+     */
+    virtual UBool contains(UChar32 c) const;
+
+    /**
+     * Returns true if this set contains every character
+     * of the given range.
+     * @param start first character, inclusive, of the range
+     * @param end last character, inclusive, of the range
+     * @return true if the test condition is met
+     * @stable ICU 2.0
+     */
+    virtual UBool contains(UChar32 start, UChar32 end) const;
+
+    /**
+     * Returns <tt>true</tt> if this set contains the given
+     * multicharacter string.
+     * @param s string to be checked for containment
+     * @return <tt>true</tt> if this set contains the specified string
+     * @stable ICU 2.4
+     */
+    UBool contains(const UnicodeString& s) const;
+
+    /**
+     * Returns true if this set contains all the characters and strings
+     * of the given set.
+     * @param c set to be checked for containment
+     * @return true if the test condition is met
+     * @stable ICU 2.4
+     */
+    virtual UBool containsAll(const UnicodeSet& c) const;
+
+    /**
+     * Returns true if this set contains all the characters
+     * of the given string.
+     * @param s string containing characters to be checked for containment
+     * @return true if the test condition is met
+     * @stable ICU 2.4
+     */
+    UBool containsAll(const UnicodeString& s) const;
+
+    /**
+     * Returns true if this set contains none of the characters
+     * of the given range.
+     * @param start first character, inclusive, of the range
+     * @param end last character, inclusive, of the range
+     * @return true if the test condition is met
+     * @stable ICU 2.4
+     */
+    UBool containsNone(UChar32 start, UChar32 end) const;
+
+    /**
+     * Returns true if this set contains none of the characters and strings
+     * of the given set.
+     * @param c set to be checked for containment
+     * @return true if the test condition is met
+     * @stable ICU 2.4
+     */
+    UBool containsNone(const UnicodeSet& c) const;
+
+    /**
+     * Returns true if this set contains none of the characters
+     * of the given string.
+     * @param s string containing characters to be checked for containment
+     * @return true if the test condition is met
+     * @stable ICU 2.4
+     */
+    UBool containsNone(const UnicodeString& s) const;
+
+    /**
+     * Returns true if this set contains one or more of the characters
+     * in the given range.
+     * @param start first character, inclusive, of the range
+     * @param end last character, inclusive, of the range
+     * @return true if the condition is met
+     * @stable ICU 2.4
+     */
+    inline UBool containsSome(UChar32 start, UChar32 end) const;
+
+    /**
+     * Returns true if this set contains one or more of the characters
+     * and strings of the given set.
+     * @param s The set to be checked for containment
+     * @return true if the condition is met
+     * @stable ICU 2.4
+     */
+    inline UBool containsSome(const UnicodeSet& s) const;
+
+    /**
+     * Returns true if this set contains one or more of the characters
+     * of the given string.
+     * @param s string containing characters to be checked for containment
+     * @return true if the condition is met
+     * @stable ICU 2.4
+     */
+    inline UBool containsSome(const UnicodeString& s) const;
+
+    /**
+     * Returns the length of the initial substring of the input string which
+     * consists only of characters and strings that are contained in this set
+     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+     * or only of characters and strings that are not contained
+     * in this set (USET_SPAN_NOT_CONTAINED).
+     * See USetSpanCondition for details.
+     * Similar to the strspn() C library function.
+     * Unpaired surrogates are treated according to contains() of their surrogate code points.
+     * This function works faster with a frozen set and with a non-negative string length argument.
+     * @param s start of the string
+     * @param length of the string; can be -1 for NUL-terminated
+     * @param spanCondition specifies the containment condition
+     * @return the length of the initial substring according to the spanCondition;
+     *         0 if the start of the string does not fit the spanCondition
+     * @stable ICU 4.0
+     * @see USetSpanCondition
+     */
+    int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
+
+    /**
+     * Returns the start of the trailing substring of the input string which
+     * consists only of characters and strings that are contained in this set
+     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+     * or only of characters and strings that are not contained
+     * in this set (USET_SPAN_NOT_CONTAINED).
+     * See USetSpanCondition for details.
+     * Unpaired surrogates are treated according to contains() of their surrogate code points.
+     * This function works faster with a frozen set and with a non-negative string length argument.
+     * @param s start of the string
+     * @param length of the string; can be -1 for NUL-terminated
+     * @param spanCondition specifies the containment condition
+     * @return the start of the trailing substring according to the spanCondition;
+     *         the string length if the end of the string does not fit the spanCondition
+     * @stable ICU 4.0
+     * @see USetSpanCondition
+     */
+    int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
+
+    /**
+     * Returns the length of the initial substring of the input string which
+     * consists only of characters and strings that are contained in this set
+     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+     * or only of characters and strings that are not contained
+     * in this set (USET_SPAN_NOT_CONTAINED).
+     * See USetSpanCondition for details.
+     * Similar to the strspn() C library function.
+     * Malformed byte sequences are treated according to contains(0xfffd).
+     * This function works faster with a frozen set and with a non-negative string length argument.
+     * @param s start of the string (UTF-8)
+     * @param length of the string; can be -1 for NUL-terminated
+     * @param spanCondition specifies the containment condition
+     * @return the length of the initial substring according to the spanCondition;
+     *         0 if the start of the string does not fit the spanCondition
+     * @stable ICU 4.0
+     * @see USetSpanCondition
+     */
+    int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
+
+    /**
+     * Returns the start of the trailing substring of the input string which
+     * consists only of characters and strings that are contained in this set
+     * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+     * or only of characters and strings that are not contained
+     * in this set (USET_SPAN_NOT_CONTAINED).
+     * See USetSpanCondition for details.
+     * Malformed byte sequences are treated according to contains(0xfffd).
+     * This function works faster with a frozen set and with a non-negative string length argument.
+     * @param s start of the string (UTF-8)
+     * @param length of the string; can be -1 for NUL-terminated
+     * @param spanCondition specifies the containment condition
+     * @return the start of the trailing substring according to the spanCondition;
+     *         the string length if the end of the string does not fit the spanCondition
+     * @stable ICU 4.0
+     * @see USetSpanCondition
+     */
+    int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
+
+    /**
+     * Implement UnicodeMatcher::matches()
+     * @stable ICU 2.4
+     */
+    virtual UMatchDegree matches(const Replaceable& text,
+                         int32_t& offset,
+                         int32_t limit,
+                         UBool incremental);
+
+private:
+    /**
+     * Returns the longest match for s in text at the given position.
+     * If limit > start then match forward from start+1 to limit
+     * matching all characters except s.charAt(0).  If limit < start,
+     * go backward starting from start-1 matching all characters
+     * except s.charAt(s.length()-1).  This method assumes that the
+     * first character, text.charAt(start), matches s, so it does not
+     * check it.
+     * @param text the text to match
+     * @param start the first character to match.  In the forward
+     * direction, text.charAt(start) is matched against s.charAt(0).
+     * In the reverse direction, it is matched against
+     * s.charAt(s.length()-1).
+     * @param limit the limit offset for matching, either last+1 in
+     * the forward direction, or last-1 in the reverse direction,
+     * where last is the index of the last character to match.
+     * @return If part of s matches up to the limit, return |limit -
+     * start|.  If all of s matches before reaching the limit, return
+     * s.length().  If there is a mismatch between s and text, return
+     * 0
+     */
+    static int32_t matchRest(const Replaceable& text,
+                             int32_t start, int32_t limit,
+                             const UnicodeString& s);
+
+    /**
+     * Returns the smallest value i such that c < list[i].  Caller
+     * must ensure that c is a legal value or this method will enter
+     * an infinite loop.  This method performs a binary search.
+     * @param c a character in the range MIN_VALUE..MAX_VALUE
+     * inclusive
+     * @return the smallest integer i in the range 0..len-1,
+     * inclusive, such that c < list[i]
+     */
+    int32_t findCodePoint(UChar32 c) const;
+
+public:
+
+    /**
+     * Implementation of UnicodeMatcher API.  Union the set of all
+     * characters that may be matched by this object into the given
+     * set.
+     * @param toUnionTo the set into which to union the source characters
+     * @stable ICU 2.4
+     */
+    virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
+
+    /**
+     * Returns the index of the given character within this set, where
+     * the set is ordered by ascending code point.  If the character
+     * is not in this set, return -1.  The inverse of this method is
+     * <code>charAt()</code>.
+     * @return an index from 0..size()-1, or -1
+     * @stable ICU 2.4
+     */
+    int32_t indexOf(UChar32 c) const;
+
+    /**
+     * Returns the character at the given index within this set, where
+     * the set is ordered by ascending code point.  If the index is
+     * out of range, return (UChar32)-1.  The inverse of this method is
+     * <code>indexOf()</code>.
+     * @param index an index from 0..size()-1
+     * @return the character at the given index, or (UChar32)-1.
+     * @stable ICU 2.4
+     */
+    UChar32 charAt(int32_t index) const;
+
+    /**
+     * Adds the specified range to this set if it is not already
+     * present.  If this set already contains the specified range,
+     * the call leaves this set unchanged.  If <code>end > start</code>
+     * then an empty range is added, leaving the set unchanged.
+     * This is equivalent to a boolean logic OR, or a set UNION.
+     * A frozen set will not be modified.
+     *
+     * @param start first character, inclusive, of range to be added
+     * to this set.
+     * @param end last character, inclusive, of range to be added
+     * to this set.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeSet& add(UChar32 start, UChar32 end);
+
+    /**
+     * Adds the specified character to this set if it is not already
+     * present.  If this set already contains the specified character,
+     * the call leaves this set unchanged.
+     * A frozen set will not be modified.
+     * @stable ICU 2.0
+     */
+    UnicodeSet& add(UChar32 c);
+
+    /**
+     * Adds the specified multicharacter to this set if it is not already
+     * present.  If this set already contains the multicharacter,
+     * the call leaves this set unchanged.
+     * Thus "ch" => {"ch"}
+     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+     * A frozen set will not be modified.
+     * @param s the source string
+     * @return this object, for chaining
+     * @stable ICU 2.4
+     */
+    UnicodeSet& add(const UnicodeString& s);
+
+ private:
+    /**
+     * @return a code point IF the string consists of a single one.
+     * otherwise returns -1.
+     * @param string to test
+     */
+    static int32_t getSingleCP(const UnicodeString& s);
+
+    void _add(const UnicodeString& s);
+
+ public:
+    /**
+     * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
+     * If this set already any particular character, it has no effect on that character.
+     * A frozen set will not be modified.
+     * @param s the source string
+     * @return this object, for chaining
+     * @stable ICU 2.4
+     */
+    UnicodeSet& addAll(const UnicodeString& s);
+
+    /**
+     * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
+     * If this set already any particular character, it has no effect on that character.
+     * A frozen set will not be modified.
+     * @param s the source string
+     * @return this object, for chaining
+     * @stable ICU 2.4
+     */
+    UnicodeSet& retainAll(const UnicodeString& s);
+
+    /**
+     * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
+     * If this set already any particular character, it has no effect on that character.
+     * A frozen set will not be modified.
+     * @param s the source string
+     * @return this object, for chaining
+     * @stable ICU 2.4
+     */
+    UnicodeSet& complementAll(const UnicodeString& s);
+
+    /**
+     * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
+     * If this set already any particular character, it has no effect on that character.
+     * A frozen set will not be modified.
+     * @param s the source string
+     * @return this object, for chaining
+     * @stable ICU 2.4
+     */
+    UnicodeSet& removeAll(const UnicodeString& s);
+
+    /**
+     * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
+     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+     * @param s the source string
+     * @return a newly created set containing the given string.
+     * The caller owns the return object and is responsible for deleting it.
+     * @stable ICU 2.4
+     */
+    static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
+
+
+    /**
+     * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
+     * @param s the source string
+     * @return a newly created set containing the given characters
+     * The caller owns the return object and is responsible for deleting it.
+     * @stable ICU 2.4
+     */
+    static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
+
+    /**
+     * Retain only the elements in this set that are contained in the
+     * specified range.  If <code>end > start</code> then an empty range is
+     * retained, leaving the set empty.  This is equivalent to
+     * a boolean logic AND, or a set INTERSECTION.
+     * A frozen set will not be modified.
+     *
+     * @param start first character, inclusive, of range to be retained
+     * to this set.
+     * @param end last character, inclusive, of range to be retained
+     * to this set.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeSet& retain(UChar32 start, UChar32 end);
+
+
+    /**
+     * Retain the specified character from this set if it is present.
+     * A frozen set will not be modified.
+     * @stable ICU 2.0
+     */
+    UnicodeSet& retain(UChar32 c);
+
+    /**
+     * Removes the specified range from this set if it is present.
+     * The set will not contain the specified range once the call
+     * returns.  If <code>end > start</code> then an empty range is
+     * removed, leaving the set unchanged.
+     * A frozen set will not be modified.
+     *
+     * @param start first character, inclusive, of range to be removed
+     * from this set.
+     * @param end last character, inclusive, of range to be removed
+     * from this set.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeSet& remove(UChar32 start, UChar32 end);
+
+    /**
+     * Removes the specified character from this set if it is present.
+     * The set will not contain the specified range once the call
+     * returns.
+     * A frozen set will not be modified.
+     * @stable ICU 2.0
+     */
+    UnicodeSet& remove(UChar32 c);
+
+    /**
+     * Removes the specified string from this set if it is present.
+     * The set will not contain the specified character once the call
+     * returns.
+     * A frozen set will not be modified.
+     * @param s the source string
+     * @return this object, for chaining
+     * @stable ICU 2.4
+     */
+    UnicodeSet& remove(const UnicodeString& s);
+
+    /**
+     * Inverts this set.  This operation modifies this set so that
+     * its value is its complement.  This is equivalent to
+     * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
+     * A frozen set will not be modified.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeSet& complement(void);
+
+    /**
+     * Complements the specified range in this set.  Any character in
+     * the range will be removed if it is in this set, or will be
+     * added if it is not in this set.  If <code>end > start</code>
+     * then an empty range is complemented, leaving the set unchanged.
+     * This is equivalent to a boolean logic XOR.
+     * A frozen set will not be modified.
+     *
+     * @param start first character, inclusive, of range to be removed
+     * from this set.
+     * @param end last character, inclusive, of range to be removed
+     * from this set.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeSet& complement(UChar32 start, UChar32 end);
+
+    /**
+     * Complements the specified character in this set.  The character
+     * will be removed if it is in this set, or will be added if it is
+     * not in this set.
+     * A frozen set will not be modified.
+     * @stable ICU 2.0
+     */
+    UnicodeSet& complement(UChar32 c);
+
+    /**
+     * Complement the specified string in this set.
+     * The set will not contain the specified string once the call
+     * returns.
+     * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+     * A frozen set will not be modified.
+     * @param s the string to complement
+     * @return this object, for chaining
+     * @stable ICU 2.4
+     */
+    UnicodeSet& complement(const UnicodeString& s);
+
+    /**
+     * Adds all of the elements in the specified set to this set if
+     * they're not already present.  This operation effectively
+     * modifies this set so that its value is the <i>union</i> of the two
+     * sets.  The behavior of this operation is unspecified if the specified
+     * collection is modified while the operation is in progress.
+     * A frozen set will not be modified.
+     *
+     * @param c set whose elements are to be added to this set.
+     * @see #add(UChar32, UChar32)
+     * @stable ICU 2.0
+     */
+    virtual UnicodeSet& addAll(const UnicodeSet& c);
+
+    /**
+     * Retains only the elements in this set that are contained in the
+     * specified set.  In other words, removes from this set all of
+     * its elements that are not contained in the specified set.  This
+     * operation effectively modifies this set so that its value is
+     * the <i>intersection</i> of the two sets.
+     * A frozen set will not be modified.
+     *
+     * @param c set that defines which elements this set will retain.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeSet& retainAll(const UnicodeSet& c);
+
+    /**
+     * Removes from this set all of its elements that are contained in the
+     * specified set.  This operation effectively modifies this
+     * set so that its value is the <i>asymmetric set difference</i> of
+     * the two sets.
+     * A frozen set will not be modified.
+     *
+     * @param c set that defines which elements will be removed from
+     *          this set.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeSet& removeAll(const UnicodeSet& c);
+
+    /**
+     * Complements in this set all elements contained in the specified
+     * set.  Any character in the other set will be removed if it is
+     * in this set, or will be added if it is not in this set.
+     * A frozen set will not be modified.
+     *
+     * @param c set that defines which elements will be xor'ed from
+     *          this set.
+     * @stable ICU 2.4
+     */
+    virtual UnicodeSet& complementAll(const UnicodeSet& c);
+
+    /**
+     * Removes all of the elements from this set.  This set will be
+     * empty after this call returns.
+     * A frozen set will not be modified.
+     * @stable ICU 2.0
+     */
+    virtual UnicodeSet& clear(void);
+
+    /**
+     * Close this set over the given attribute.  For the attribute
+     * USET_CASE, the result is to modify this set so that:
+     *
+     * 1. For each character or string 'a' in this set, all strings or
+     * characters 'b' such that foldCase(a) == foldCase(b) are added
+     * to this set.
+     *
+     * 2. For each string 'e' in the resulting set, if e !=
+     * foldCase(e), 'e' will be removed.
+     *
+     * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
+     *
+     * (Here foldCase(x) refers to the operation u_strFoldCase, and a
+     * == b denotes that the contents are the same, not pointer
+     * comparison.)
+     *
+     * A frozen set will not be modified.
+     *
+     * @param attribute bitmask for attributes to close over.
+     * Currently only the USET_CASE bit is supported.  Any undefined bits
+     * are ignored.
+     * @return a reference to this set.
+     * @internal
+     */
+    UnicodeSet& closeOver(int32_t attribute);
+
+    /**
+     * Remove all strings from this set.
+     *
+     * @return a reference to this set.
+     * @internal
+     */
+    virtual UnicodeSet &removeAllStrings();
+
+    /**
+     * Iteration method that returns the number of ranges contained in
+     * this set.
+     * @see #getRangeStart
+     * @see #getRangeEnd
+     * @stable ICU 2.4
+     */
+    virtual int32_t getRangeCount(void) const;
+
+    /**
+     * Iteration method that returns the first character in the
+     * specified range of this set.
+     * @see #getRangeCount
+     * @see #getRangeEnd
+     * @stable ICU 2.4
+     */
+    virtual UChar32 getRangeStart(int32_t index) const;
+
+    /**
+     * Iteration method that returns the last character in the
+     * specified range of this set.
+     * @see #getRangeStart
+     * @see #getRangeEnd
+     * @stable ICU 2.4
+     */
+    virtual UChar32 getRangeEnd(int32_t index) const;
+
+    /**
+     * Serializes this set into an array of 16-bit integers.  Serialization
+     * (currently) only records the characters in the set; multicharacter
+     * strings are ignored.
+     *
+     * The array has following format (each line is one 16-bit
+     * integer):
+     *
+     *  length     = (n+2*m) | (m!=0?0x8000:0)
+     *  bmpLength  = n; present if m!=0
+     *  bmp[0]
+     *  bmp[1]
+     *  ...
+     *  bmp[n-1]
+     *  supp-high[0]
+     *  supp-low[0]
+     *  supp-high[1]
+     *  supp-low[1]
+     *  ...
+     *  supp-high[m-1]
+     *  supp-low[m-1]
+     *
+     * The array starts with a header.  After the header are n bmp
+     * code points, then m supplementary code points.  Either n or m
+     * or both may be zero.  n+2*m is always <= 0x7FFF.
+     *
+     * If there are no supplementary characters (if m==0) then the
+     * header is one 16-bit integer, 'length', with value n.
+     *
+     * If there are supplementary characters (if m!=0) then the header
+     * is two 16-bit integers.  The first, 'length', has value
+     * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
+     *
+     * After the header the code points are stored in ascending order.
+     * Supplementary code points are stored as most significant 16
+     * bits followed by least significant 16 bits.
+     *
+     * @param dest pointer to buffer of destCapacity 16-bit integers.
+     * May be NULL only if destCapacity is zero.
+     * @param destCapacity size of dest, or zero.  Must not be negative.
+     * @param ec error code.  Will be set to U_INDEX_OUTOFBOUNDS_ERROR
+     * if n+2*m > 0x7FFF.  Will be set to U_BUFFER_OVERFLOW_ERROR if
+     * n+2*m+(m!=0?2:1) > destCapacity.
+     * @return the total length of the serialized format, including
+     * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
+     * than U_BUFFER_OVERFLOW_ERROR.
+     * @stable ICU 2.4
+     */
+    int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
+
+    /**
+     * Reallocate this objects internal structures to take up the least
+     * possible space, without changing this object's value.
+     * A frozen set will not be modified.
+     * @stable ICU 2.4
+     */
+    virtual UnicodeSet& compact();
+
+    /**
+     * Return the class ID for this class.  This is useful only for
+     * comparing to a return value from getDynamicClassID().  For example:
+     * <pre>
+     * .      Base* polymorphic_pointer = createPolymorphicObject();
+     * .      if (polymorphic_pointer->getDynamicClassID() ==
+     * .          Derived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Implement UnicodeFunctor API.
+     *
+     * @return The class ID for this object. All objects of a given
+     * class have the same class ID.  Objects of other classes have
+     * different class IDs.
+     * @stable ICU 2.4
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+private:
+
+    // Private API for the USet API
+
+    friend class USetAccess;
+
+    int32_t getStringCount() const;
+
+    const UnicodeString* getString(int32_t index) const;
+
+    //----------------------------------------------------------------
+    // RuleBasedTransliterator support
+    //----------------------------------------------------------------
+
+private:
+
+    /**
+     * Returns <tt>true</tt> if this set contains any character whose low byte
+     * is the given value.  This is used by <tt>RuleBasedTransliterator</tt> for
+     * indexing.
+     */
+    virtual UBool matchesIndexValue(uint8_t v) const;
+
+private:
+
+    //----------------------------------------------------------------
+    // Implementation: Clone as thawed (see ICU4J Freezable)
+    //----------------------------------------------------------------
+
+    UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
+
+    //----------------------------------------------------------------
+    // Implementation: Pattern parsing
+    //----------------------------------------------------------------
+
+    void applyPattern(RuleCharacterIterator& chars,
+                      const SymbolTable* symbols,
+                      UnicodeString& rebuiltPat,
+                      uint32_t options,
+                      UErrorCode& ec);
+
+    //----------------------------------------------------------------
+    // Implementation: Utility methods
+    //----------------------------------------------------------------
+
+    void ensureCapacity(int32_t newLen, UErrorCode& ec);
+
+    void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
+
+    void swapBuffers(void);
+
+    UBool allocateStrings(UErrorCode &status);
+
+    UnicodeString& _toPattern(UnicodeString& result,
+                              UBool escapeUnprintable) const;
+
+    UnicodeString& _generatePattern(UnicodeString& result,
+                                    UBool escapeUnprintable) const;
+
+    static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
+
+    static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
+
+    //----------------------------------------------------------------
+    // Implementation: Fundamental operators
+    //----------------------------------------------------------------
+
+    void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
+
+    void add(const UChar32* other, int32_t otherLen, int8_t polarity);
+
+    void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
+
+    /**
+     * Return true if the given position, in the given pattern, appears
+     * to be the start of a property set pattern [:foo:], \\p{foo}, or
+     * \\P{foo}, or \\N{name}.
+     */
+    static UBool resemblesPropertyPattern(const UnicodeString& pattern,
+                                          int32_t pos);
+
+    static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
+                                          int32_t iterOpts);
+
+    /**
+     * Parse the given property pattern at the given parse position
+     * and set this UnicodeSet to the result.
+     *
+     * The original design document is out of date, but still useful.
+     * Ignore the property and value names:
+     * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/unicodeset_properties.html
+     *
+     * Recognized syntax:
+     *
+     * [:foo:] [:^foo:] - white space not allowed within "[:" or ":]"
+     * \\p{foo} \\P{foo}  - white space not allowed within "\\p" or "\\P"
+     * \\N{name}         - white space not allowed within "\\N"
+     *
+     * Other than the above restrictions, white space is ignored.  Case
+     * is ignored except in "\\p" and "\\P" and "\\N".  In 'name' leading
+     * and trailing space is deleted, and internal runs of whitespace
+     * are collapsed to a single space.
+     *
+     * We support binary properties, enumerated properties, and the
+     * following non-enumerated properties:
+     *
+     *  Numeric_Value
+     *  Name
+     *  Unicode_1_Name
+     *
+     * @param pattern the pattern string
+     * @param ppos on entry, the position at which to begin parsing.
+     * This should be one of the locations marked '^':
+     *
+     *   [:blah:]     \\p{blah}     \\P{blah}     \\N{name}
+     *   ^       %    ^       %    ^       %    ^       %
+     *
+     * On return, the position after the last character parsed, that is,
+     * the locations marked '%'.  If the parse fails, ppos is returned
+     * unchanged.
+     * @return a reference to this.
+     */
+    UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
+                                     ParsePosition& ppos,
+                                     UErrorCode &ec);
+
+    void applyPropertyPattern(RuleCharacterIterator& chars,
+                              UnicodeString& rebuiltPat,
+                              UErrorCode& ec);
+
+    static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
+
+    /**
+     * A filter that returns TRUE if the given code point should be
+     * included in the UnicodeSet being constructed.
+     */
+    typedef UBool (*Filter)(UChar32 codePoint, void* context);
+
+    /**
+     * Given a filter, set this UnicodeSet to the code points
+     * contained by that filter.  The filter MUST be
+     * property-conformant.  That is, if it returns value v for one
+     * code point, then it must return v for all affiliated code
+     * points, as defined by the inclusions list.  See
+     * getInclusions().
+     * src is a UPropertySource value.
+     */
+    void applyFilter(Filter filter,
+                     void* context,
+                     int32_t src,
+                     UErrorCode &status);
+
+    /**
+     * Set the new pattern to cache.
+     */
+    void setPattern(const UnicodeString& newPat);
+    /**
+     * Release existing cached pattern.
+     */
+    void releasePattern();
+
+    friend class UnicodeSetIterator;
+};
+
+inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
+    return !operator==(o);
+}
+
+inline UBool UnicodeSet::isFrozen() const {
+    return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
+}
+
+inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
+    return !containsNone(start, end);
+}
+
+inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
+    return !containsNone(s);
+}
+
+inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
+    return !containsNone(s);
+}
+
+inline UBool UnicodeSet::isBogus() const {
+    return (UBool)(fFlags & kIsBogus);
+}
+
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/unistr.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/unistr.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/unistr.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,4230 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1998-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*
-* File unistr.h
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   09/25/98    stephen     Creation.
-*   11/11/98    stephen     Changed per 11/9 code review.
-*   04/20/99    stephen     Overhauled per 4/16 code review.
-*   11/18/99    aliu        Made to inherit from Replaceable.  Added method
-*                           handleReplaceBetween(); other methods unchanged.
-*   06/25/01    grhoten     Remove dependency on iostream.
-******************************************************************************
-*/
-
-#ifndef UNISTR_H
-#define UNISTR_H
-
-/**
- * \file 
- * \brief C++ API: Unicode String 
- */
-
-#include "unicode/rep.h"
-
-struct UConverter;          // unicode/ucnv.h
-class  StringThreadTest;
-
-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also ustring.h and unorm.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER  0x8000
-#endif
-
-#ifndef USTRING_H
-/**
- * \ingroup ustring_ustrlen
- */
-U_STABLE int32_t U_EXPORT2
-u_strlen(const UChar *s);
-#endif
-
-U_NAMESPACE_BEGIN
-
-class Locale;               // unicode/locid.h
-class StringCharacterIterator;
-class BreakIterator;        // unicode/brkiter.h
-
-/* The <iostream> include has been moved to unicode/ustream.h */
-
-/**
- * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
- * which constructs a Unicode string from an invariant-character char * string.
- * About invariant characters see utypes.h.
- * This constructor has no runtime dependency on conversion code and is
- * therefore recommended over ones taking a charset name string
- * (where the empty string "" indicates invariant-character conversion).
- *
- * @stable ICU 3.2
- */
-#define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
-
-/**
- * Unicode String literals in C++.
- * Dependent on the platform properties, different UnicodeString
- * constructors should be used to create a UnicodeString object from
- * a string literal.
- * The macros are defined for maximum performance.
- * They work only for strings that contain "invariant characters", i.e.,
- * only latin letters, digits, and some punctuation.
- * See utypes.h for details.
- *
- * The string parameter must be a C string literal.
- * The length of the string, not including the terminating
- * <code>NUL</code>, must be specified as a constant.
- * The U_STRING_DECL macro should be invoked exactly once for one
- * such string variable before it is used.
- * @stable ICU 2.0
- */
-#if defined(U_DECLARE_UTF16)
-#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
-#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
-#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
-#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
-#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
-#else
-#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
-#endif
-
-/**
- * Unicode String literals in C++.
- * Dependent on the platform properties, different UnicodeString
- * constructors should be used to create a UnicodeString object from
- * a string literal.
- * The macros are defined for improved performance.
- * They work only for strings that contain "invariant characters", i.e.,
- * only latin letters, digits, and some punctuation.
- * See utypes.h for details.
- *
- * The string parameter must be a C string literal.
- * @stable ICU 2.0
- */
-#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
-
-/**
- * UnicodeString is a string class that stores Unicode characters directly and provides
- * similar functionality as the Java String and StringBuffer classes.
- * It is a concrete implementation of the abstract class Replaceable (for transliteration).
- *
- * The UnicodeString class is not suitable for subclassing.
- *
- * <p>For an overview of Unicode strings in C and C++ see the
- * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
- *
- * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
- * A Unicode character may be stored with either one code unit
- * (the most common case) or with a matched pair of special code units
- * ("surrogates"). The data type for code units is UChar. 
- * For single-character handling, a Unicode character code <em>point</em> is a value
- * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
- *
- * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
- * This is the same as with multi-byte char* strings in traditional string handling.
- * Operations on partial strings typically do not test for code point boundaries.
- * If necessary, the user needs to take care of such boundaries by testing for the code unit
- * values or by using functions like
- * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
- * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
- *
- * UnicodeString methods are more lenient with regard to input parameter values
- * than other ICU APIs. In particular:
- * - If indexes are out of bounds for a UnicodeString object
- *   (<0 or >length()) then they are "pinned" to the nearest boundary.
- * - If primitive string pointer values (e.g., const UChar * or char *)
- *   for input strings are NULL, then those input string parameters are treated
- *   as if they pointed to an empty string.
- *   However, this is <em>not</em> the case for char * parameters for charset names
- *   or other IDs.
- * - Most UnicodeString methods do not take a UErrorCode parameter because
- *   there are usually very few opportunities for failure other than a shortage
- *   of memory, error codes in low-level C++ string methods would be inconvenient,
- *   and the error code as the last parameter (ICU convention) would prevent
- *   the use of default parameter values.
- *   Instead, such methods set the UnicodeString into a "bogus" state
- *   (see isBogus()) if an error occurs.
- *
- * In string comparisons, two UnicodeString objects that are both "bogus"
- * compare equal (to be transitive and prevent endless loops in sorting),
- * and a "bogus" string compares less than any non-"bogus" one.
- *
- * Const UnicodeString methods are thread-safe. Multiple threads can use
- * const methods on the same UnicodeString object simultaneously,
- * but non-const methods must not be called concurrently (in multiple threads)
- * with any other (const or non-const) methods.
- *
- * Similarly, const UnicodeString & parameters are thread-safe.
- * One object may be passed in as such a parameter concurrently in multiple threads.
- * This includes the const UnicodeString & parameters for
- * copy construction, assignment, and cloning.
- *
- * <p>UnicodeString uses several storage methods.
- * String contents can be stored inside the UnicodeString object itself,
- * in an allocated and shared buffer, or in an outside buffer that is "aliased".
- * Most of this is done transparently, but careful aliasing in particular provides
- * significant performance improvements.
- * Also, the internal buffer is accessible via special functions.
- * For details see the
- * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
- *
- * @see utf.h
- * @see CharacterIterator
- * @stable ICU 2.0
- */
-class U_COMMON_API UnicodeString : public Replaceable
-{
-public:
-
-  /**
-   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
-   * which constructs a Unicode string from an invariant-character char * string.
-   * Use the macro US_INV instead of the full qualification for this value.
-   *
-   * @see US_INV
-   * @stable ICU 3.2
-   */
-  enum EInvariant {
-    /**
-     * @see EInvariant
-     * @stable ICU 3.2
-     */
-    kInvariant
-  };
-
-  //========================================
-  // Read-only operations
-  //========================================
-
-  /* Comparison - bitwise only - for international comparison use collation */
-
-  /**
-   * Equality operator. Performs only bitwise comparison.
-   * @param text The UnicodeString to compare to this one.
-   * @return TRUE if <TT>text</TT> contains the same characters as this one,
-   * FALSE otherwise.
-   * @stable ICU 2.0
-   */
-  inline UBool operator== (const UnicodeString& text) const;
-
-  /**
-   * Inequality operator. Performs only bitwise comparison.
-   * @param text The UnicodeString to compare to this one.
-   * @return FALSE if <TT>text</TT> contains the same characters as this one,
-   * TRUE otherwise.
-   * @stable ICU 2.0
-   */
-  inline UBool operator!= (const UnicodeString& text) const;
-
-  /**
-   * Greater than operator. Performs only bitwise comparison.
-   * @param text The UnicodeString to compare to this one.
-   * @return TRUE if the characters in this are bitwise
-   * greater than the characters in <code>text</code>, FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool operator> (const UnicodeString& text) const;
-
-  /**
-   * Less than operator. Performs only bitwise comparison.
-   * @param text The UnicodeString to compare to this one.
-   * @return TRUE if the characters in this are bitwise
-   * less than the characters in <code>text</code>, FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool operator< (const UnicodeString& text) const;
-
-  /**
-   * Greater than or equal operator. Performs only bitwise comparison.
-   * @param text The UnicodeString to compare to this one.
-   * @return TRUE if the characters in this are bitwise
-   * greater than or equal to the characters in <code>text</code>, FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool operator>= (const UnicodeString& text) const;
-
-  /**
-   * Less than or equal operator. Performs only bitwise comparison.
-   * @param text The UnicodeString to compare to this one.
-   * @return TRUE if the characters in this are bitwise
-   * less than or equal to the characters in <code>text</code>, FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool operator<= (const UnicodeString& text) const;
-
-  /**
-   * Compare the characters bitwise in this UnicodeString to
-   * the characters in <code>text</code>.
-   * @param text The UnicodeString to compare to this one.
-   * @return The result of bitwise character comparison: 0 if this
-   * contains the same characters as <code>text</code>, -1 if the characters in
-   * this are bitwise less than the characters in <code>text</code>, +1 if the
-   * characters in this are bitwise greater than the characters
-   * in <code>text</code>.
-   * @stable ICU 2.0
-   */
-  inline int8_t compare(const UnicodeString& text) const;
-
-  /**
-   * Compare the characters bitwise in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
-   * in <TT>text</TT>
-   * @param start the offset at which the compare operation begins
-   * @param length the number of characters of text to compare.
-   * @param text the other text to be compared against this string.
-   * @return The result of bitwise character comparison: 0 if this
-   * contains the same characters as <code>text</code>, -1 if the characters in
-   * this are bitwise less than the characters in <code>text</code>, +1 if the
-   * characters in this are bitwise greater than the characters
-   * in <code>text</code>.
-   * @stable ICU 2.0
-   */
-  inline int8_t compare(int32_t start,
-         int32_t length,
-         const UnicodeString& text) const;
-
-  /**
-   * Compare the characters bitwise in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
-   * in <TT>srcText</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
-   * @param start the offset at which the compare operation begins
-   * @param length the number of characters in this to compare.
-   * @param srcText the text to be compared
-   * @param srcStart the offset into <TT>srcText</TT> to start comparison
-   * @param srcLength the number of characters in <TT>src</TT> to compare
-   * @return The result of bitwise character comparison: 0 if this
-   * contains the same characters as <code>srcText</code>, -1 if the characters in
-   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
-   * characters in this are bitwise greater than the characters
-   * in <code>srcText</code>.
-   * @stable ICU 2.0
-   */
-   inline int8_t compare(int32_t start,
-         int32_t length,
-         const UnicodeString& srcText,
-         int32_t srcStart,
-         int32_t srcLength) const;
-
-  /**
-   * Compare the characters bitwise in this UnicodeString with the first
-   * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
-   * @param srcChars The characters to compare to this UnicodeString.
-   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
-   * @return The result of bitwise character comparison: 0 if this
-   * contains the same characters as <code>srcChars</code>, -1 if the characters in
-   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
-   * characters in this are bitwise greater than the characters
-   * in <code>srcChars</code>.
-   * @stable ICU 2.0
-   */
-  inline int8_t compare(const UChar *srcChars,
-         int32_t srcLength) const;
-
-  /**
-   * Compare the characters bitwise in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) with the first
-   * <TT>length</TT> characters in <TT>srcChars</TT>
-   * @param start the offset at which the compare operation begins
-   * @param length the number of characters to compare.
-   * @param srcChars the characters to be compared
-   * @return The result of bitwise character comparison: 0 if this
-   * contains the same characters as <code>srcChars</code>, -1 if the characters in
-   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
-   * characters in this are bitwise greater than the characters
-   * in <code>srcChars</code>.
-   * @stable ICU 2.0
-   */
-  inline int8_t compare(int32_t start,
-         int32_t length,
-         const UChar *srcChars) const;
-
-  /**
-   * Compare the characters bitwise in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
-   * in <TT>srcChars</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
-   * @param start the offset at which the compare operation begins
-   * @param length the number of characters in this to compare
-   * @param srcChars the characters to be compared
-   * @param srcStart the offset into <TT>srcChars</TT> to start comparison
-   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
-   * @return The result of bitwise character comparison: 0 if this
-   * contains the same characters as <code>srcChars</code>, -1 if the characters in
-   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
-   * characters in this are bitwise greater than the characters
-   * in <code>srcChars</code>.
-   * @stable ICU 2.0
-   */
-  inline int8_t compare(int32_t start,
-         int32_t length,
-         const UChar *srcChars,
-         int32_t srcStart,
-         int32_t srcLength) const;
-
-  /**
-   * Compare the characters bitwise in the range
-   * [<TT>start</TT>, <TT>limit</TT>) with the characters
-   * in <TT>srcText</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
-   * @param start the offset at which the compare operation begins
-   * @param limit the offset immediately following the compare operation
-   * @param srcText the text to be compared
-   * @param srcStart the offset into <TT>srcText</TT> to start comparison
-   * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
-   * @return The result of bitwise character comparison: 0 if this
-   * contains the same characters as <code>srcText</code>, -1 if the characters in
-   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
-   * characters in this are bitwise greater than the characters
-   * in <code>srcText</code>.
-   * @stable ICU 2.0
-   */
-  inline int8_t compareBetween(int32_t start,
-            int32_t limit,
-            const UnicodeString& srcText,
-            int32_t srcStart,
-            int32_t srcLimit) const;
-
-  /**
-   * Compare two Unicode strings in code point order.
-   * The result may be different from the results of compare(), operator<, etc.
-   * if supplementary characters are present:
-   *
-   * In UTF-16, supplementary characters (with code points U+10000 and above) are
-   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
-   * which means that they compare as less than some other BMP characters like U+feff.
-   * This function compares Unicode strings in code point order.
-   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
-   *
-   * @param text Another string to compare this one to.
-   * @return a negative/zero/positive integer corresponding to whether
-   * this string is less than/equal to/greater than the second one
-   * in code point order
-   * @stable ICU 2.0
-   */
-  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
-
-  /**
-   * Compare two Unicode strings in code point order.
-   * The result may be different from the results of compare(), operator<, etc.
-   * if supplementary characters are present:
-   *
-   * In UTF-16, supplementary characters (with code points U+10000 and above) are
-   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
-   * which means that they compare as less than some other BMP characters like U+feff.
-   * This function compares Unicode strings in code point order.
-   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
-   *
-   * @param start The start offset in this string at which the compare operation begins.
-   * @param length The number of code units from this string to compare.
-   * @param srcText Another string to compare this one to.
-   * @return a negative/zero/positive integer corresponding to whether
-   * this string is less than/equal to/greater than the second one
-   * in code point order
-   * @stable ICU 2.0
-   */
-  inline int8_t compareCodePointOrder(int32_t start,
-                                      int32_t length,
-                                      const UnicodeString& srcText) const;
-
-  /**
-   * Compare two Unicode strings in code point order.
-   * The result may be different from the results of compare(), operator<, etc.
-   * if supplementary characters are present:
-   *
-   * In UTF-16, supplementary characters (with code points U+10000 and above) are
-   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
-   * which means that they compare as less than some other BMP characters like U+feff.
-   * This function compares Unicode strings in code point order.
-   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
-   *
-   * @param start The start offset in this string at which the compare operation begins.
-   * @param length The number of code units from this string to compare.
-   * @param srcText Another string to compare this one to.
-   * @param srcStart The start offset in that string at which the compare operation begins.
-   * @param srcLength The number of code units from that string to compare.
-   * @return a negative/zero/positive integer corresponding to whether
-   * this string is less than/equal to/greater than the second one
-   * in code point order
-   * @stable ICU 2.0
-   */
-   inline int8_t compareCodePointOrder(int32_t start,
-                                       int32_t length,
-                                       const UnicodeString& srcText,
-                                       int32_t srcStart,
-                                       int32_t srcLength) const;
-
-  /**
-   * Compare two Unicode strings in code point order.
-   * The result may be different from the results of compare(), operator<, etc.
-   * if supplementary characters are present:
-   *
-   * In UTF-16, supplementary characters (with code points U+10000 and above) are
-   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
-   * which means that they compare as less than some other BMP characters like U+feff.
-   * This function compares Unicode strings in code point order.
-   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
-   *
-   * @param srcChars A pointer to another string to compare this one to.
-   * @param srcLength The number of code units from that string to compare.
-   * @return a negative/zero/positive integer corresponding to whether
-   * this string is less than/equal to/greater than the second one
-   * in code point order
-   * @stable ICU 2.0
-   */
-  inline int8_t compareCodePointOrder(const UChar *srcChars,
-                                      int32_t srcLength) const;
-
-  /**
-   * Compare two Unicode strings in code point order.
-   * The result may be different from the results of compare(), operator<, etc.
-   * if supplementary characters are present:
-   *
-   * In UTF-16, supplementary characters (with code points U+10000 and above) are
-   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
-   * which means that they compare as less than some other BMP characters like U+feff.
-   * This function compares Unicode strings in code point order.
-   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
-   *
-   * @param start The start offset in this string at which the compare operation begins.
-   * @param length The number of code units from this string to compare.
-   * @param srcChars A pointer to another string to compare this one to.
-   * @return a negative/zero/positive integer corresponding to whether
-   * this string is less than/equal to/greater than the second one
-   * in code point order
-   * @stable ICU 2.0
-   */
-  inline int8_t compareCodePointOrder(int32_t start,
-                                      int32_t length,
-                                      const UChar *srcChars) const;
-
-  /**
-   * Compare two Unicode strings in code point order.
-   * The result may be different from the results of compare(), operator<, etc.
-   * if supplementary characters are present:
-   *
-   * In UTF-16, supplementary characters (with code points U+10000 and above) are
-   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
-   * which means that they compare as less than some other BMP characters like U+feff.
-   * This function compares Unicode strings in code point order.
-   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
-   *
-   * @param start The start offset in this string at which the compare operation begins.
-   * @param length The number of code units from this string to compare.
-   * @param srcChars A pointer to another string to compare this one to.
-   * @param srcStart The start offset in that string at which the compare operation begins.
-   * @param srcLength The number of code units from that string to compare.
-   * @return a negative/zero/positive integer corresponding to whether
-   * this string is less than/equal to/greater than the second one
-   * in code point order
-   * @stable ICU 2.0
-   */
-  inline int8_t compareCodePointOrder(int32_t start,
-                                      int32_t length,
-                                      const UChar *srcChars,
-                                      int32_t srcStart,
-                                      int32_t srcLength) const;
-
-  /**
-   * Compare two Unicode strings in code point order.
-   * The result may be different from the results of compare(), operator<, etc.
-   * if supplementary characters are present:
-   *
-   * In UTF-16, supplementary characters (with code points U+10000 and above) are
-   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
-   * which means that they compare as less than some other BMP characters like U+feff.
-   * This function compares Unicode strings in code point order.
-   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
-   *
-   * @param start The start offset in this string at which the compare operation begins.
-   * @param limit The offset after the last code unit from this string to compare.
-   * @param srcText Another string to compare this one to.
-   * @param srcStart The start offset in that string at which the compare operation begins.
-   * @param srcLimit The offset after the last code unit from that string to compare.
-   * @return a negative/zero/positive integer corresponding to whether
-   * this string is less than/equal to/greater than the second one
-   * in code point order
-   * @stable ICU 2.0
-   */
-  inline int8_t compareCodePointOrderBetween(int32_t start,
-                                             int32_t limit,
-                                             const UnicodeString& srcText,
-                                             int32_t srcStart,
-                                             int32_t srcLimit) const;
-
-  /**
-   * Compare two strings case-insensitively using full case folding.
-   * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
-   *
-   * @param text Another string to compare this one to.
-   * @param options A bit set of options:
-   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
-   *     Comparison in code unit order with default case folding.
-   *
-   *   - U_COMPARE_CODE_POINT_ORDER
-   *     Set to choose code point order instead of code unit order
-   *     (see u_strCompare for details).
-   *
-   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
-   *
-   * @return A negative, zero, or positive integer indicating the comparison result.
-   * @stable ICU 2.0
-   */
-  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
-
-  /**
-   * Compare two strings case-insensitively using full case folding.
-   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
-   *
-   * @param start The start offset in this string at which the compare operation begins.
-   * @param length The number of code units from this string to compare.
-   * @param srcText Another string to compare this one to.
-   * @param options A bit set of options:
-   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
-   *     Comparison in code unit order with default case folding.
-   *
-   *   - U_COMPARE_CODE_POINT_ORDER
-   *     Set to choose code point order instead of code unit order
-   *     (see u_strCompare for details).
-   *
-   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
-   *
-   * @return A negative, zero, or positive integer indicating the comparison result.
-   * @stable ICU 2.0
-   */
-  inline int8_t caseCompare(int32_t start,
-         int32_t length,
-         const UnicodeString& srcText,
-         uint32_t options) const;
-
-  /**
-   * Compare two strings case-insensitively using full case folding.
-   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
-   *
-   * @param start The start offset in this string at which the compare operation begins.
-   * @param length The number of code units from this string to compare.
-   * @param srcText Another string to compare this one to.
-   * @param srcStart The start offset in that string at which the compare operation begins.
-   * @param srcLength The number of code units from that string to compare.
-   * @param options A bit set of options:
-   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
-   *     Comparison in code unit order with default case folding.
-   *
-   *   - U_COMPARE_CODE_POINT_ORDER
-   *     Set to choose code point order instead of code unit order
-   *     (see u_strCompare for details).
-   *
-   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
-   *
-   * @return A negative, zero, or positive integer indicating the comparison result.
-   * @stable ICU 2.0
-   */
-  inline int8_t caseCompare(int32_t start,
-         int32_t length,
-         const UnicodeString& srcText,
-         int32_t srcStart,
-         int32_t srcLength,
-         uint32_t options) const;
-
-  /**
-   * Compare two strings case-insensitively using full case folding.
-   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
-   *
-   * @param srcChars A pointer to another string to compare this one to.
-   * @param srcLength The number of code units from that string to compare.
-   * @param options A bit set of options:
-   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
-   *     Comparison in code unit order with default case folding.
-   *
-   *   - U_COMPARE_CODE_POINT_ORDER
-   *     Set to choose code point order instead of code unit order
-   *     (see u_strCompare for details).
-   *
-   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
-   *
-   * @return A negative, zero, or positive integer indicating the comparison result.
-   * @stable ICU 2.0
-   */
-  inline int8_t caseCompare(const UChar *srcChars,
-         int32_t srcLength,
-         uint32_t options) const;
-
-  /**
-   * Compare two strings case-insensitively using full case folding.
-   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
-   *
-   * @param start The start offset in this string at which the compare operation begins.
-   * @param length The number of code units from this string to compare.
-   * @param srcChars A pointer to another string to compare this one to.
-   * @param options A bit set of options:
-   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
-   *     Comparison in code unit order with default case folding.
-   *
-   *   - U_COMPARE_CODE_POINT_ORDER
-   *     Set to choose code point order instead of code unit order
-   *     (see u_strCompare for details).
-   *
-   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
-   *
-   * @return A negative, zero, or positive integer indicating the comparison result.
-   * @stable ICU 2.0
-   */
-  inline int8_t caseCompare(int32_t start,
-         int32_t length,
-         const UChar *srcChars,
-         uint32_t options) const;
-
-  /**
-   * Compare two strings case-insensitively using full case folding.
-   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
-   *
-   * @param start The start offset in this string at which the compare operation begins.
-   * @param length The number of code units from this string to compare.
-   * @param srcChars A pointer to another string to compare this one to.
-   * @param srcStart The start offset in that string at which the compare operation begins.
-   * @param srcLength The number of code units from that string to compare.
-   * @param options A bit set of options:
-   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
-   *     Comparison in code unit order with default case folding.
-   *
-   *   - U_COMPARE_CODE_POINT_ORDER
-   *     Set to choose code point order instead of code unit order
-   *     (see u_strCompare for details).
-   *
-   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
-   *
-   * @return A negative, zero, or positive integer indicating the comparison result.
-   * @stable ICU 2.0
-   */
-  inline int8_t caseCompare(int32_t start,
-         int32_t length,
-         const UChar *srcChars,
-         int32_t srcStart,
-         int32_t srcLength,
-         uint32_t options) const;
-
-  /**
-   * Compare two strings case-insensitively using full case folding.
-   * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
-   *
-   * @param start The start offset in this string at which the compare operation begins.
-   * @param limit The offset after the last code unit from this string to compare.
-   * @param srcText Another string to compare this one to.
-   * @param srcStart The start offset in that string at which the compare operation begins.
-   * @param srcLimit The offset after the last code unit from that string to compare.
-   * @param options A bit set of options:
-   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
-   *     Comparison in code unit order with default case folding.
-   *
-   *   - U_COMPARE_CODE_POINT_ORDER
-   *     Set to choose code point order instead of code unit order
-   *     (see u_strCompare for details).
-   *
-   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
-   *
-   * @return A negative, zero, or positive integer indicating the comparison result.
-   * @stable ICU 2.0
-   */
-  inline int8_t caseCompareBetween(int32_t start,
-            int32_t limit,
-            const UnicodeString& srcText,
-            int32_t srcStart,
-            int32_t srcLimit,
-            uint32_t options) const;
-
-  /**
-   * Determine if this starts with the characters in <TT>text</TT>
-   * @param text The text to match.
-   * @return TRUE if this starts with the characters in <TT>text</TT>,
-   * FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool startsWith(const UnicodeString& text) const;
-
-  /**
-   * Determine if this starts with the characters in <TT>srcText</TT>
-   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
-   * @param srcText The text to match.
-   * @param srcStart the offset into <TT>srcText</TT> to start matching
-   * @param srcLength the number of characters in <TT>srcText</TT> to match
-   * @return TRUE if this starts with the characters in <TT>text</TT>,
-   * FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool startsWith(const UnicodeString& srcText,
-            int32_t srcStart,
-            int32_t srcLength) const;
-
-  /**
-   * Determine if this starts with the characters in <TT>srcChars</TT>
-   * @param srcChars The characters to match.
-   * @param srcLength the number of characters in <TT>srcChars</TT>
-   * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
-   * FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool startsWith(const UChar *srcChars,
-            int32_t srcLength) const;
-
-  /**
-   * Determine if this ends with the characters in <TT>srcChars</TT>
-   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
-   * @param srcChars The characters to match.
-   * @param srcStart the offset into <TT>srcText</TT> to start matching
-   * @param srcLength the number of characters in <TT>srcChars</TT> to match
-   * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool startsWith(const UChar *srcChars,
-            int32_t srcStart,
-            int32_t srcLength) const;
-
-  /**
-   * Determine if this ends with the characters in <TT>text</TT>
-   * @param text The text to match.
-   * @return TRUE if this ends with the characters in <TT>text</TT>,
-   * FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool endsWith(const UnicodeString& text) const;
-
-  /**
-   * Determine if this ends with the characters in <TT>srcText</TT>
-   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
-   * @param srcText The text to match.
-   * @param srcStart the offset into <TT>srcText</TT> to start matching
-   * @param srcLength the number of characters in <TT>srcText</TT> to match
-   * @return TRUE if this ends with the characters in <TT>text</TT>,
-   * FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool endsWith(const UnicodeString& srcText,
-          int32_t srcStart,
-          int32_t srcLength) const;
-
-  /**
-   * Determine if this ends with the characters in <TT>srcChars</TT>
-   * @param srcChars The characters to match.
-   * @param srcLength the number of characters in <TT>srcChars</TT>
-   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
-   * FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool endsWith(const UChar *srcChars,
-          int32_t srcLength) const;
-
-  /**
-   * Determine if this ends with the characters in <TT>srcChars</TT>
-   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
-   * @param srcChars The characters to match.
-   * @param srcStart the offset into <TT>srcText</TT> to start matching
-   * @param srcLength the number of characters in <TT>srcChars</TT> to match
-   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
-   * FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool endsWith(const UChar *srcChars,
-          int32_t srcStart,
-          int32_t srcLength) const;
-
-
-  /* Searching - bitwise only */
-
-  /**
-   * Locate in this the first occurrence of the characters in <TT>text</TT>,
-   * using bitwise comparison.
-   * @param text The text to search for.
-   * @return The offset into this of the start of <TT>text</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t indexOf(const UnicodeString& text) const;
-
-  /**
-   * Locate in this the first occurrence of the characters in <TT>text</TT>
-   * starting at offset <TT>start</TT>, using bitwise comparison.
-   * @param text The text to search for.
-   * @param start The offset at which searching will start.
-   * @return The offset into this of the start of <TT>text</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t indexOf(const UnicodeString& text,
-              int32_t start) const;
-
-  /**
-   * Locate in this the first occurrence in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
-   * in <TT>text</TT>, using bitwise comparison.
-   * @param text The text to search for.
-   * @param start The offset at which searching will start.
-   * @param length The number of characters to search
-   * @return The offset into this of the start of <TT>text</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t indexOf(const UnicodeString& text,
-              int32_t start,
-              int32_t length) const;
-
-  /**
-   * Locate in this the first occurrence in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
-   *  in <TT>srcText</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
-   * using bitwise comparison.
-   * @param srcText The text to search for.
-   * @param srcStart the offset into <TT>srcText</TT> at which
-   * to start matching
-   * @param srcLength the number of characters in <TT>srcText</TT> to match
-   * @param start the offset into this at which to start matching
-   * @param length the number of characters in this to search
-   * @return The offset into this of the start of <TT>text</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t indexOf(const UnicodeString& srcText,
-              int32_t srcStart,
-              int32_t srcLength,
-              int32_t start,
-              int32_t length) const;
-
-  /**
-   * Locate in this the first occurrence of the characters in
-   * <TT>srcChars</TT>
-   * starting at offset <TT>start</TT>, using bitwise comparison.
-   * @param srcChars The text to search for.
-   * @param srcLength the number of characters in <TT>srcChars</TT> to match
-   * @param start the offset into this at which to start matching
-   * @return The offset into this of the start of <TT>text</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t indexOf(const UChar *srcChars,
-              int32_t srcLength,
-              int32_t start) const;
-
-  /**
-   * Locate in this the first occurrence in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
-   * in <TT>srcChars</TT>, using bitwise comparison.
-   * @param srcChars The text to search for.
-   * @param srcLength the number of characters in <TT>srcChars</TT>
-   * @param start The offset at which searching will start.
-   * @param length The number of characters to search
-   * @return The offset into this of the start of <TT>srcChars</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t indexOf(const UChar *srcChars,
-              int32_t srcLength,
-              int32_t start,
-              int32_t length) const;
-
-  /**
-   * Locate in this the first occurrence in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
-   * in <TT>srcChars</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
-   * using bitwise comparison.
-   * @param srcChars The text to search for.
-   * @param srcStart the offset into <TT>srcChars</TT> at which
-   * to start matching
-   * @param srcLength the number of characters in <TT>srcChars</TT> to match
-   * @param start the offset into this at which to start matching
-   * @param length the number of characters in this to search
-   * @return The offset into this of the start of <TT>text</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  int32_t indexOf(const UChar *srcChars,
-              int32_t srcStart,
-              int32_t srcLength,
-              int32_t start,
-              int32_t length) const;
-
-  /**
-   * Locate in this the first occurrence of the BMP code point <code>c</code>,
-   * using bitwise comparison.
-   * @param c The code unit to search for.
-   * @return The offset into this of <TT>c</TT>, or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t indexOf(UChar c) const;
-
-  /**
-   * Locate in this the first occurrence of the code point <TT>c</TT>,
-   * using bitwise comparison.
-   *
-   * @param c The code point to search for.
-   * @return The offset into this of <TT>c</TT>, or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t indexOf(UChar32 c) const;
-
-  /**
-   * Locate in this the first occurrence of the BMP code point <code>c</code>,
-   * starting at offset <TT>start</TT>, using bitwise comparison.
-   * @param c The code unit to search for.
-   * @param start The offset at which searching will start.
-   * @return The offset into this of <TT>c</TT>, or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t indexOf(UChar c,
-              int32_t start) const;
-
-  /**
-   * Locate in this the first occurrence of the code point <TT>c</TT>
-   * starting at offset <TT>start</TT>, using bitwise comparison.
-   *
-   * @param c The code point to search for.
-   * @param start The offset at which searching will start.
-   * @return The offset into this of <TT>c</TT>, or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t indexOf(UChar32 c,
-              int32_t start) const;
-
-  /**
-   * Locate in this the first occurrence of the BMP code point <code>c</code>
-   * in the range [<TT>start</TT>, <TT>start + length</TT>),
-   * using bitwise comparison.
-   * @param c The code unit to search for.
-   * @param start the offset into this at which to start matching
-   * @param length the number of characters in this to search
-   * @return The offset into this of <TT>c</TT>, or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t indexOf(UChar c,
-              int32_t start,
-              int32_t length) const;
-
-  /**
-   * Locate in this the first occurrence of the code point <TT>c</TT>
-   * in the range [<TT>start</TT>, <TT>start + length</TT>),
-   * using bitwise comparison.
-   *
-   * @param c The code point to search for.
-   * @param start the offset into this at which to start matching
-   * @param length the number of characters in this to search
-   * @return The offset into this of <TT>c</TT>, or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t indexOf(UChar32 c,
-              int32_t start,
-              int32_t length) const;
-
-  /**
-   * Locate in this the last occurrence of the characters in <TT>text</TT>,
-   * using bitwise comparison.
-   * @param text The text to search for.
-   * @return The offset into this of the start of <TT>text</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t lastIndexOf(const UnicodeString& text) const;
-
-  /**
-   * Locate in this the last occurrence of the characters in <TT>text</TT>
-   * starting at offset <TT>start</TT>, using bitwise comparison.
-   * @param text The text to search for.
-   * @param start The offset at which searching will start.
-   * @return The offset into this of the start of <TT>text</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t lastIndexOf(const UnicodeString& text,
-              int32_t start) const;
-
-  /**
-   * Locate in this the last occurrence in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
-   * in <TT>text</TT>, using bitwise comparison.
-   * @param text The text to search for.
-   * @param start The offset at which searching will start.
-   * @param length The number of characters to search
-   * @return The offset into this of the start of <TT>text</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t lastIndexOf(const UnicodeString& text,
-              int32_t start,
-              int32_t length) const;
-
-  /**
-   * Locate in this the last occurrence in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
-   * in <TT>srcText</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
-   * using bitwise comparison.
-   * @param srcText The text to search for.
-   * @param srcStart the offset into <TT>srcText</TT> at which
-   * to start matching
-   * @param srcLength the number of characters in <TT>srcText</TT> to match
-   * @param start the offset into this at which to start matching
-   * @param length the number of characters in this to search
-   * @return The offset into this of the start of <TT>text</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t lastIndexOf(const UnicodeString& srcText,
-              int32_t srcStart,
-              int32_t srcLength,
-              int32_t start,
-              int32_t length) const;
-
-  /**
-   * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
-   * starting at offset <TT>start</TT>, using bitwise comparison.
-   * @param srcChars The text to search for.
-   * @param srcLength the number of characters in <TT>srcChars</TT> to match
-   * @param start the offset into this at which to start matching
-   * @return The offset into this of the start of <TT>text</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t lastIndexOf(const UChar *srcChars,
-              int32_t srcLength,
-              int32_t start) const;
-
-  /**
-   * Locate in this the last occurrence in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
-   * in <TT>srcChars</TT>, using bitwise comparison.
-   * @param srcChars The text to search for.
-   * @param srcLength the number of characters in <TT>srcChars</TT>
-   * @param start The offset at which searching will start.
-   * @param length The number of characters to search
-   * @return The offset into this of the start of <TT>srcChars</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t lastIndexOf(const UChar *srcChars,
-              int32_t srcLength,
-              int32_t start,
-              int32_t length) const;
-
-  /**
-   * Locate in this the last occurrence in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
-   * in <TT>srcChars</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
-   * using bitwise comparison.
-   * @param srcChars The text to search for.
-   * @param srcStart the offset into <TT>srcChars</TT> at which
-   * to start matching
-   * @param srcLength the number of characters in <TT>srcChars</TT> to match
-   * @param start the offset into this at which to start matching
-   * @param length the number of characters in this to search
-   * @return The offset into this of the start of <TT>text</TT>,
-   * or -1 if not found.
-   * @stable ICU 2.0
-   */
-  int32_t lastIndexOf(const UChar *srcChars,
-              int32_t srcStart,
-              int32_t srcLength,
-              int32_t start,
-              int32_t length) const;
-
-  /**
-   * Locate in this the last occurrence of the BMP code point <code>c</code>,
-   * using bitwise comparison.
-   * @param c The code unit to search for.
-   * @return The offset into this of <TT>c</TT>, or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t lastIndexOf(UChar c) const;
-
-  /**
-   * Locate in this the last occurrence of the code point <TT>c</TT>,
-   * using bitwise comparison.
-   *
-   * @param c The code point to search for.
-   * @return The offset into this of <TT>c</TT>, or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t lastIndexOf(UChar32 c) const;
-
-  /**
-   * Locate in this the last occurrence of the BMP code point <code>c</code>
-   * starting at offset <TT>start</TT>, using bitwise comparison.
-   * @param c The code unit to search for.
-   * @param start The offset at which searching will start.
-   * @return The offset into this of <TT>c</TT>, or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t lastIndexOf(UChar c,
-              int32_t start) const;
-
-  /**
-   * Locate in this the last occurrence of the code point <TT>c</TT>
-   * starting at offset <TT>start</TT>, using bitwise comparison.
-   *
-   * @param c The code point to search for.
-   * @param start The offset at which searching will start.
-   * @return The offset into this of <TT>c</TT>, or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t lastIndexOf(UChar32 c,
-              int32_t start) const;
-
-  /**
-   * Locate in this the last occurrence of the BMP code point <code>c</code>
-   * in the range [<TT>start</TT>, <TT>start + length</TT>),
-   * using bitwise comparison.
-   * @param c The code unit to search for.
-   * @param start the offset into this at which to start matching
-   * @param length the number of characters in this to search
-   * @return The offset into this of <TT>c</TT>, or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t lastIndexOf(UChar c,
-              int32_t start,
-              int32_t length) const;
-
-  /**
-   * Locate in this the last occurrence of the code point <TT>c</TT>
-   * in the range [<TT>start</TT>, <TT>start + length</TT>),
-   * using bitwise comparison.
-   *
-   * @param c The code point to search for.
-   * @param start the offset into this at which to start matching
-   * @param length the number of characters in this to search
-   * @return The offset into this of <TT>c</TT>, or -1 if not found.
-   * @stable ICU 2.0
-   */
-  inline int32_t lastIndexOf(UChar32 c,
-              int32_t start,
-              int32_t length) const;
-
-
-  /* Character access */
-
-  /**
-   * Return the code unit at offset <tt>offset</tt>.
-   * If the offset is not valid (0..length()-1) then U+ffff is returned.
-   * @param offset a valid offset into the text
-   * @return the code unit at offset <tt>offset</tt>
-   *         or 0xffff if the offset is not valid for this string
-   * @stable ICU 2.0
-   */
-  inline UChar charAt(int32_t offset) const;
-
-  /**
-   * Return the code unit at offset <tt>offset</tt>.
-   * If the offset is not valid (0..length()-1) then U+ffff is returned.
-   * @param offset a valid offset into the text
-   * @return the code unit at offset <tt>offset</tt>
-   * @stable ICU 2.0
-   */
-  inline UChar operator[] (int32_t offset) const;
-
-  /**
-   * Return the code point that contains the code unit
-   * at offset <tt>offset</tt>.
-   * If the offset is not valid (0..length()-1) then U+ffff is returned.
-   * @param offset a valid offset into the text
-   * that indicates the text offset of any of the code units
-   * that will be assembled into a code point (21-bit value) and returned
-   * @return the code point of text at <tt>offset</tt>
-   *         or 0xffff if the offset is not valid for this string
-   * @stable ICU 2.0
-   */
-  inline UChar32 char32At(int32_t offset) const;
-
-  /**
-   * Adjust a random-access offset so that
-   * it points to the beginning of a Unicode character.
-   * The offset that is passed in points to
-   * any code unit of a code point,
-   * while the returned offset will point to the first code unit
-   * of the same code point.
-   * In UTF-16, if the input offset points to a second surrogate
-   * of a surrogate pair, then the returned offset will point
-   * to the first surrogate.
-   * @param offset a valid offset into one code point of the text
-   * @return offset of the first code unit of the same code point
-   * @see U16_SET_CP_START
-   * @stable ICU 2.0
-   */
-  inline int32_t getChar32Start(int32_t offset) const;
-
-  /**
-   * Adjust a random-access offset so that
-   * it points behind a Unicode character.
-   * The offset that is passed in points behind
-   * any code unit of a code point,
-   * while the returned offset will point behind the last code unit
-   * of the same code point.
-   * In UTF-16, if the input offset points behind the first surrogate
-   * (i.e., to the second surrogate)
-   * of a surrogate pair, then the returned offset will point
-   * behind the second surrogate (i.e., to the first surrogate).
-   * @param offset a valid offset after any code unit of a code point of the text
-   * @return offset of the first code unit after the same code point
-   * @see U16_SET_CP_LIMIT
-   * @stable ICU 2.0
-   */
-  inline int32_t getChar32Limit(int32_t offset) const;
-
-  /**
-   * Move the code unit index along the string by delta code points.
-   * Interpret the input index as a code unit-based offset into the string,
-   * move the index forward or backward by delta code points, and
-   * return the resulting index.
-   * The input index should point to the first code unit of a code point,
-   * if there is more than one.
-   *
-   * Both input and output indexes are code unit-based as for all
-   * string indexes/offsets in ICU (and other libraries, like MBCS char*).
-   * If delta<0 then the index is moved backward (toward the start of the string).
-   * If delta>0 then the index is moved forward (toward the end of the string).
-   *
-   * This behaves like CharacterIterator::move32(delta, kCurrent).
-   *
-   * Behavior for out-of-bounds indexes:
-   * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
-   * if the input index<0 then it is pinned to 0;
-   * if it is index>length() then it is pinned to length().
-   * Afterwards, the index is moved by <code>delta</code> code points
-   * forward or backward,
-   * but no further backward than to 0 and no further forward than to length().
-   * The resulting index return value will be in between 0 and length(), inclusively.
-   *
-   * Examples:
-   * <pre>
-   * // s has code points 'a' U+10000 'b' U+10ffff U+2029
-   * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
-   *
-   * // initial index: position of U+10000
-   * int32_t index=1;
-   *
-   * // the following examples will all result in index==4, position of U+10ffff
-   *
-   * // skip 2 code points from some position in the string
-   * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
-   *
-   * // go to the 3rd code point from the start of s (0-based)
-   * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
-   *
-   * // go to the next-to-last code point of s
-   * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
-   * </pre>
-   *
-   * @param index input code unit index
-   * @param delta (signed) code point count to move the index forward or backward
-   *        in the string
-   * @return the resulting code unit index
-   * @stable ICU 2.0
-   */
-  int32_t moveIndex32(int32_t index, int32_t delta) const;
-
-  /* Substring extraction */
-
-  /**
-   * Copy the characters in the range
-   * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
-   * beginning at <tt>dstStart</tt>.
-   * If the string aliases to <code>dst</code> itself as an external buffer,
-   * then extract() will not copy the contents.
-   *
-   * @param start offset of first character which will be copied into the array
-   * @param length the number of characters to extract
-   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
-   * must be at least (<tt>dstStart + length</tt>).
-   * @param dstStart the offset in <TT>dst</TT> where the first character
-   * will be extracted
-   * @stable ICU 2.0
-   */
-  inline void extract(int32_t start,
-           int32_t length,
-           UChar *dst,
-           int32_t dstStart = 0) const;
-
-  /**
-   * Copy the contents of the string into dest.
-   * This is a convenience function that
-   * checks if there is enough space in dest,
-   * extracts the entire string if possible,
-   * and NUL-terminates dest if possible.
-   *
-   * If the string fits into dest but cannot be NUL-terminated
-   * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
-   * If the string itself does not fit into dest
-   * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
-   *
-   * If the string aliases to <code>dest</code> itself as an external buffer,
-   * then extract() will not copy the contents.
-   *
-   * @param dest Destination string buffer.
-   * @param destCapacity Number of UChars available at dest.
-   * @param errorCode ICU error code.
-   * @return length()
-   * @stable ICU 2.0
-   */
-  int32_t
-  extract(UChar *dest, int32_t destCapacity,
-          UErrorCode &errorCode) const;
-
-  /**
-   * Copy the characters in the range
-   * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
-   * <tt>target</tt>.
-   * @param start offset of first character which will be copied
-   * @param length the number of characters to extract
-   * @param target UnicodeString into which to copy characters.
-   * @return A reference to <TT>target</TT>
-   * @stable ICU 2.0
-   */
-  inline void extract(int32_t start,
-           int32_t length,
-           UnicodeString& target) const;
-
-  /**
-   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
-   * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
-   * @param start offset of first character which will be copied into the array
-   * @param limit offset immediately following the last character to be copied
-   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
-   * must be at least (<tt>dstStart + (limit - start)</tt>).
-   * @param dstStart the offset in <TT>dst</TT> where the first character
-   * will be extracted
-   * @stable ICU 2.0
-   */
-  inline void extractBetween(int32_t start,
-              int32_t limit,
-              UChar *dst,
-              int32_t dstStart = 0) const;
-
-  /**
-   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
-   * into the UnicodeString <tt>target</tt>.  Replaceable API.
-   * @param start offset of first character which will be copied
-   * @param limit offset immediately following the last character to be copied
-   * @param target UnicodeString into which to copy characters.
-   * @return A reference to <TT>target</TT>
-   * @stable ICU 2.0
-   */
-  virtual void extractBetween(int32_t start,
-              int32_t limit,
-              UnicodeString& target) const;
-
-  /**
-   * Copy the characters in the range 
-   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
-   * All characters must be invariant (see utypes.h).
-   * Use US_INV as the last, signature-distinguishing parameter.
-   *
-   * This function does not write any more than <code>targetLength</code>
-   * characters but returns the length of the entire output string
-   * so that one can allocate a larger buffer and call the function again
-   * if necessary.
-   * The output string is NUL-terminated if possible.
-   *
-   * @param start offset of first character which will be copied
-   * @param startLength the number of characters to extract
-   * @param target the target buffer for extraction, can be NULL
-   *               if targetLength is 0
-   * @param targetCapacity the length of the target buffer
-   * @param inv Signature-distinguishing paramater, use US_INV.
-   * @return the output string length, not including the terminating NUL
-   * @stable ICU 3.2
-   */
-  int32_t extract(int32_t start,
-           int32_t startLength,
-           char *target,
-           int32_t targetCapacity,
-           enum EInvariant inv) const;
-
-#if !UCONFIG_NO_CONVERSION
-
-  /**
-   * Copy the characters in the range
-   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
-   * in a specified codepage.
-   * The output string is NUL-terminated.
-   *
-   * Recommendation: For invariant-character strings use
-   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
-   * because it avoids object code dependencies of UnicodeString on
-   * the conversion code.
-   *
-   * @param start offset of first character which will be copied
-   * @param startLength the number of characters to extract
-   * @param target the target buffer for extraction
-   * @param codepage the desired codepage for the characters.  0 has
-   * the special meaning of the default codepage
-   * If <code>codepage</code> is an empty string (<code>""</code>),
-   * then a simple conversion is performed on the codepage-invariant
-   * subset ("invariant characters") of the platform encoding. See utypes.h.
-   * If <TT>target</TT> is NULL, then the number of bytes required for
-   * <TT>target</TT> is returned. It is assumed that the target is big enough
-   * to fit all of the characters.
-   * @return the output string length, not including the terminating NUL
-   * @stable ICU 2.0
-   */
-  inline int32_t extract(int32_t start,
-                 int32_t startLength,
-                 char *target,
-                 const char *codepage = 0) const;
-
-  /**
-   * Copy the characters in the range
-   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
-   * in a specified codepage.
-   * This function does not write any more than <code>targetLength</code>
-   * characters but returns the length of the entire output string
-   * so that one can allocate a larger buffer and call the function again
-   * if necessary.
-   * The output string is NUL-terminated if possible.
-   *
-   * Recommendation: For invariant-character strings use
-   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
-   * because it avoids object code dependencies of UnicodeString on
-   * the conversion code.
-   *
-   * @param start offset of first character which will be copied
-   * @param startLength the number of characters to extract
-   * @param target the target buffer for extraction
-   * @param targetLength the length of the target buffer
-   * @param codepage the desired codepage for the characters.  0 has
-   * the special meaning of the default codepage
-   * If <code>codepage</code> is an empty string (<code>""</code>),
-   * then a simple conversion is performed on the codepage-invariant
-   * subset ("invariant characters") of the platform encoding. See utypes.h.
-   * If <TT>target</TT> is NULL, then the number of bytes required for
-   * <TT>target</TT> is returned.
-   * @return the output string length, not including the terminating NUL
-   * @stable ICU 2.0
-   */
-  int32_t extract(int32_t start,
-           int32_t startLength,
-           char *target,
-           uint32_t targetLength,
-           const char *codepage = 0) const;
-
-  /**
-   * Convert the UnicodeString into a codepage string using an existing UConverter.
-   * The output string is NUL-terminated if possible.
-   *
-   * This function avoids the overhead of opening and closing a converter if
-   * multiple strings are extracted.
-   *
-   * @param dest destination string buffer, can be NULL if destCapacity==0
-   * @param destCapacity the number of chars available at dest
-   * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
-   *        or NULL for the default converter
-   * @param errorCode normal ICU error code
-   * @return the length of the output string, not counting the terminating NUL;
-   *         if the length is greater than destCapacity, then the string will not fit
-   *         and a buffer of the indicated length would need to be passed in
-   * @stable ICU 2.0
-   */
-  int32_t extract(char *dest, int32_t destCapacity,
-                  UConverter *cnv,
-                  UErrorCode &errorCode) const;
-
-#endif
-
-  /* Length operations */
-
-  /**
-   * Return the length of the UnicodeString object.
-   * The length is the number of UChar code units are in the UnicodeString.
-   * If you want the number of code points, please use countChar32().
-   * @return the length of the UnicodeString object
-   * @see countChar32
-   * @stable ICU 2.0
-   */
-  inline int32_t length(void) const;
-
-  /**
-   * Count Unicode code points in the length UChar code units of the string.
-   * A code point may occupy either one or two UChar code units.
-   * Counting code points involves reading all code units.
-   *
-   * This functions is basically the inverse of moveIndex32().
-   *
-   * @param start the index of the first code unit to check
-   * @param length the number of UChar code units to check
-   * @return the number of code points in the specified code units
-   * @see length
-   * @stable ICU 2.0
-   */
-  int32_t
-  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
-
-  /**
-   * Check if the length UChar code units of the string
-   * contain more Unicode code points than a certain number.
-   * This is more efficient than counting all code points in this part of the string
-   * and comparing that number with a threshold.
-   * This function may not need to scan the string at all if the length
-   * falls within a certain range, and
-   * never needs to count more than 'number+1' code points.
-   * Logically equivalent to (countChar32(start, length)>number).
-   * A Unicode code point may occupy either one or two UChar code units.
-   *
-   * @param start the index of the first code unit to check (0 for the entire string)
-   * @param length the number of UChar code units to check
-   *               (use INT32_MAX for the entire string; remember that start/length
-   *                values are pinned)
-   * @param number The number of code points in the (sub)string is compared against
-   *               the 'number' parameter.
-   * @return Boolean value for whether the string contains more Unicode code points
-   *         than 'number'. Same as (u_countChar32(s, length)>number).
-   * @see countChar32
-   * @see u_strHasMoreChar32Than
-   * @stable ICU 2.4
-   */
-  UBool
-  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
-
-  /**
-   * Determine if this string is empty.
-   * @return TRUE if this string contains 0 characters, FALSE otherwise.
-   * @stable ICU 2.0
-   */
-  inline UBool isEmpty(void) const;
-
-  /**
-   * Return the capacity of the internal buffer of the UnicodeString object.
-   * This is useful together with the getBuffer functions.
-   * See there for details.
-   *
-   * @return the number of UChars available in the internal buffer
-   * @see getBuffer
-   * @stable ICU 2.0
-   */
-  inline int32_t getCapacity(void) const;
-
-  /* Other operations */
-
-  /**
-   * Generate a hash code for this object.
-   * @return The hash code of this UnicodeString.
-   * @stable ICU 2.0
-   */
-  inline int32_t hashCode(void) const;
-
-  /**
-   * Determine if this object contains a valid string.
-   * A bogus string has no value. It is different from an empty string.
-   * It can be used to indicate that no string value is available.
-   * getBuffer() and getTerminatedBuffer() return NULL, and
-   * length() returns 0.
-   *
-   * @return TRUE if the string is valid, FALSE otherwise
-   * @see setToBogus()
-   * @stable ICU 2.0
-   */
-  inline UBool isBogus(void) const;
-
-
-  //========================================
-  // Write operations
-  //========================================
-
-  /* Assignment operations */
-
-  /**
-   * Assignment operator.  Replace the characters in this UnicodeString
-   * with the characters from <TT>srcText</TT>.
-   * @param srcText The text containing the characters to replace
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  UnicodeString &operator=(const UnicodeString &srcText);
-
-  /**
-   * Almost the same as the assignment operator.
-   * Replace the characters in this UnicodeString
-   * with the characters from <code>srcText</code>.
-   *
-   * This function works the same for all strings except for ones that
-   * are readonly aliases.
-   * Starting with ICU 2.4, the assignment operator and the copy constructor
-   * allocate a new buffer and copy the buffer contents even for readonly aliases.
-   * This function implements the old, more efficient but less safe behavior
-   * of making this string also a readonly alias to the same buffer.
-   * The fastCopyFrom function must be used only if it is known that the lifetime of
-   * this UnicodeString is at least as long as the lifetime of the aliased buffer
-   * including its contents, for example for strings from resource bundles
-   * or aliases to string contents.
-   *
-   * @param src The text containing the characters to replace.
-   * @return a reference to this
-   * @stable ICU 2.4
-   */
-  UnicodeString &fastCopyFrom(const UnicodeString &src);
-
-  /**
-   * Assignment operator.  Replace the characters in this UnicodeString
-   * with the code unit <TT>ch</TT>.
-   * @param ch the code unit to replace
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& operator= (UChar ch);
-
-  /**
-   * Assignment operator.  Replace the characters in this UnicodeString
-   * with the code point <TT>ch</TT>.
-   * @param ch the code point to replace
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& operator= (UChar32 ch);
-
-  /**
-   * Set the text in the UnicodeString object to the characters
-   * in <TT>srcText</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
-   * <TT>srcText</TT> is not modified.
-   * @param srcText the source for the new characters
-   * @param srcStart the offset into <TT>srcText</TT> where new characters
-   * will be obtained
-   * @return a reference to this
-   * @stable ICU 2.2
-   */
-  inline UnicodeString& setTo(const UnicodeString& srcText,
-               int32_t srcStart);
-
-  /**
-   * Set the text in the UnicodeString object to the characters
-   * in <TT>srcText</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
-   * <TT>srcText</TT> is not modified.
-   * @param srcText the source for the new characters
-   * @param srcStart the offset into <TT>srcText</TT> where new characters
-   * will be obtained
-   * @param srcLength the number of characters in <TT>srcText</TT> in the
-   * replace string.
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& setTo(const UnicodeString& srcText,
-               int32_t srcStart,
-               int32_t srcLength);
-
-  /**
-   * Set the text in the UnicodeString object to the characters in
-   * <TT>srcText</TT>.
-   * <TT>srcText</TT> is not modified.
-   * @param srcText the source for the new characters
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& setTo(const UnicodeString& srcText);
-
-  /**
-   * Set the characters in the UnicodeString object to the characters
-   * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
-   * @param srcChars the source for the new characters
-   * @param srcLength the number of Unicode characters in srcChars.
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& setTo(const UChar *srcChars,
-               int32_t srcLength);
-
-  /**
-   * Set the characters in the UnicodeString object to the code unit
-   * <TT>srcChar</TT>.
-   * @param srcChar the code unit which becomes the UnicodeString's character
-   * content
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  UnicodeString& setTo(UChar srcChar);
-
-  /**
-   * Set the characters in the UnicodeString object to the code point
-   * <TT>srcChar</TT>.
-   * @param srcChar the code point which becomes the UnicodeString's character
-   * content
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  UnicodeString& setTo(UChar32 srcChar);
-
-  /**
-   * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
-   * The text will be used for the UnicodeString object, but
-   * it will not be released when the UnicodeString is destroyed.
-   * This has copy-on-write semantics:
-   * When the string is modified, then the buffer is first copied into
-   * newly allocated memory.
-   * The aliased buffer is never modified.
-   * In an assignment to another UnicodeString, the text will be aliased again,
-   * so that both strings then alias the same readonly-text.
-   *
-   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
-   *                     This must be true if <code>textLength==-1</code>.
-   * @param text The characters to alias for the UnicodeString.
-   * @param textLength The number of Unicode characters in <code>text</code> to alias.
-   *                   If -1, then this constructor will determine the length
-   *                   by calling <code>u_strlen()</code>.
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  UnicodeString &setTo(UBool isTerminated,
-                       const UChar *text,
-                       int32_t textLength);
-
-  /**
-   * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
-   * The text will be used for the UnicodeString object, but
-   * it will not be released when the UnicodeString is destroyed.
-   * This has write-through semantics:
-   * For as long as the capacity of the buffer is sufficient, write operations
-   * will directly affect the buffer. When more capacity is necessary, then
-   * a new buffer will be allocated and the contents copied as with regularly
-   * constructed strings.
-   * In an assignment to another UnicodeString, the buffer will be copied.
-   * The extract(UChar *dst) function detects whether the dst pointer is the same
-   * as the string buffer itself and will in this case not copy the contents.
-   *
-   * @param buffer The characters to alias for the UnicodeString.
-   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
-   * @param buffCapacity The size of <code>buffer</code> in UChars.
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  UnicodeString &setTo(UChar *buffer,
-                       int32_t buffLength,
-                       int32_t buffCapacity);
-
-  /**
-   * Make this UnicodeString object invalid.
-   * The string will test TRUE with isBogus().
-   *
-   * A bogus string has no value. It is different from an empty string.
-   * It can be used to indicate that no string value is available.
-   * getBuffer() and getTerminatedBuffer() return NULL, and
-   * length() returns 0.
-   *
-   * This utility function is used throughout the UnicodeString
-   * implementation to indicate that a UnicodeString operation failed,
-   * and may be used in other functions,
-   * especially but not exclusively when such functions do not
-   * take a UErrorCode for simplicity.
-   *
-   * The following methods, and no others, will clear a string object's bogus flag:
-   * - remove()
-   * - remove(0, INT32_MAX)
-   * - truncate(0)
-   * - operator=() (assignment operator)
-   * - setTo(...)
-   *
-   * The simplest ways to turn a bogus string into an empty one
-   * is to use the remove() function.
-   * Examples for other functions that are equivalent to "set to empty string":
-   * \code
-   * if(s.isBogus()) {
-   *   s.remove();           // set to an empty string (remove all), or
-   *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
-   *   s.truncate(0);        // set to an empty string (complete truncation), or
-   *   s=UnicodeString();    // assign an empty string, or
-   *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
-   *   static const UChar nul=0;
-   *   s.setTo(&nul, 0);     // set to an empty C Unicode string
-   * }
-   * \endcode
-   *
-   * @see isBogus()
-   * @stable ICU 2.0
-   */
-  void setToBogus();
-
-  /**
-   * Set the character at the specified offset to the specified character.
-   * @param offset A valid offset into the text of the character to set
-   * @param ch The new character
-   * @return A reference to this
-   * @stable ICU 2.0
-   */
-  UnicodeString& setCharAt(int32_t offset,
-               UChar ch);
-
-
-  /* Append operations */
-
-  /**
-   * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
-   * object.
-   * @param ch the code unit to be appended
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
- inline  UnicodeString& operator+= (UChar ch);
-
-  /**
-   * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
-   * object.
-   * @param ch the code point to be appended
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
- inline  UnicodeString& operator+= (UChar32 ch);
-
-  /**
-   * Append operator. Append the characters in <TT>srcText</TT> to the
-   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is
-   * not modified.
-   * @param srcText the source for the new characters
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& operator+= (const UnicodeString& srcText);
-
-  /**
-   * Append the characters
-   * in <TT>srcText</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
-   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
-   * is not modified.
-   * @param srcText the source for the new characters
-   * @param srcStart the offset into <TT>srcText</TT> where new characters
-   * will be obtained
-   * @param srcLength the number of characters in <TT>srcText</TT> in
-   * the append string
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& append(const UnicodeString& srcText,
-            int32_t srcStart,
-            int32_t srcLength);
-
-  /**
-   * Append the characters in <TT>srcText</TT> to the UnicodeString object at
-   * offset <TT>start</TT>. <TT>srcText</TT> is not modified.
-   * @param srcText the source for the new characters
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& append(const UnicodeString& srcText);
-
-  /**
-   * Append the characters in <TT>srcChars</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
-   * object at offset
-   * <TT>start</TT>. <TT>srcChars</TT> is not modified.
-   * @param srcChars the source for the new characters
-   * @param srcStart the offset into <TT>srcChars</TT> where new characters
-   * will be obtained
-   * @param srcLength the number of characters in <TT>srcChars</TT> in
-   * the append string
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& append(const UChar *srcChars,
-            int32_t srcStart,
-            int32_t srcLength);
-
-  /**
-   * Append the characters in <TT>srcChars</TT> to the UnicodeString object
-   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
-   * @param srcChars the source for the new characters
-   * @param srcLength the number of Unicode characters in <TT>srcChars</TT>
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& append(const UChar *srcChars,
-            int32_t srcLength);
-
-  /**
-   * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
-   * @param srcChar the code unit to append
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& append(UChar srcChar);
-
-  /**
-   * Append the code point <TT>srcChar</TT> to the UnicodeString object.
-   * @param srcChar the code point to append
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& append(UChar32 srcChar);
-
-
-  /* Insert operations */
-
-  /**
-   * Insert the characters in <TT>srcText</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
-   * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
-   * @param start the offset where the insertion begins
-   * @param srcText the source for the new characters
-   * @param srcStart the offset into <TT>srcText</TT> where new characters
-   * will be obtained
-   * @param srcLength the number of characters in <TT>srcText</TT> in
-   * the insert string
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& insert(int32_t start,
-            const UnicodeString& srcText,
-            int32_t srcStart,
-            int32_t srcLength);
-
-  /**
-   * Insert the characters in <TT>srcText</TT> into the UnicodeString object
-   * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
-   * @param start the offset where the insertion begins
-   * @param srcText the source for the new characters
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& insert(int32_t start,
-            const UnicodeString& srcText);
-
-  /**
-   * Insert the characters in <TT>srcChars</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
-   *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
-   * @param start the offset at which the insertion begins
-   * @param srcChars the source for the new characters
-   * @param srcStart the offset into <TT>srcChars</TT> where new characters
-   * will be obtained
-   * @param srcLength the number of characters in <TT>srcChars</TT>
-   * in the insert string
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& insert(int32_t start,
-            const UChar *srcChars,
-            int32_t srcStart,
-            int32_t srcLength);
-
-  /**
-   * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
-   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
-   * @param start the offset where the insertion begins
-   * @param srcChars the source for the new characters
-   * @param srcLength the number of Unicode characters in srcChars.
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& insert(int32_t start,
-            const UChar *srcChars,
-            int32_t srcLength);
-
-  /**
-   * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
-   * offset <TT>start</TT>.
-   * @param start the offset at which the insertion occurs
-   * @param srcChar the code unit to insert
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& insert(int32_t start,
-            UChar srcChar);
-
-  /**
-   * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
-   * offset <TT>start</TT>.
-   * @param start the offset at which the insertion occurs
-   * @param srcChar the code point to insert
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& insert(int32_t start,
-            UChar32 srcChar);
-
-
-  /* Replace operations */
-
-  /**
-   * Replace the characters in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
-   * <TT>srcText</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
-   * <TT>srcText</TT> is not modified.
-   * @param start the offset at which the replace operation begins
-   * @param length the number of characters to replace. The character at
-   * <TT>start + length</TT> is not modified.
-   * @param srcText the source for the new characters
-   * @param srcStart the offset into <TT>srcText</TT> where new characters
-   * will be obtained
-   * @param srcLength the number of characters in <TT>srcText</TT> in
-   * the replace string
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  UnicodeString& replace(int32_t start,
-             int32_t length,
-             const UnicodeString& srcText,
-             int32_t srcStart,
-             int32_t srcLength);
-
-  /**
-   * Replace the characters in the range
-   * [<TT>start</TT>, <TT>start + length</TT>)
-   * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
-   *  not modified.
-   * @param start the offset at which the replace operation begins
-   * @param length the number of characters to replace. The character at
-   * <TT>start + length</TT> is not modified.
-   * @param srcText the source for the new characters
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  UnicodeString& replace(int32_t start,
-             int32_t length,
-             const UnicodeString& srcText);
-
-  /**
-   * Replace the characters in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
-   * <TT>srcChars</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
-   * is not modified.
-   * @param start the offset at which the replace operation begins
-   * @param length the number of characters to replace.  The character at
-   * <TT>start + length</TT> is not modified.
-   * @param srcChars the source for the new characters
-   * @param srcStart the offset into <TT>srcChars</TT> where new characters
-   * will be obtained
-   * @param srcLength the number of characters in <TT>srcChars</TT>
-   * in the replace string
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  UnicodeString& replace(int32_t start,
-             int32_t length,
-             const UChar *srcChars,
-             int32_t srcStart,
-             int32_t srcLength);
-
-  /**
-   * Replace the characters in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
-   * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
-   * @param start the offset at which the replace operation begins
-   * @param length number of characters to replace.  The character at
-   * <TT>start + length</TT> is not modified.
-   * @param srcChars the source for the new characters
-   * @param srcLength the number of Unicode characters in srcChars
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& replace(int32_t start,
-             int32_t length,
-             const UChar *srcChars,
-             int32_t srcLength);
-
-  /**
-   * Replace the characters in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
-   * <TT>srcChar</TT>.
-   * @param start the offset at which the replace operation begins
-   * @param length the number of characters to replace.  The character at
-   * <TT>start + length</TT> is not modified.
-   * @param srcChar the new code unit
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& replace(int32_t start,
-             int32_t length,
-             UChar srcChar);
-
-  /**
-   * Replace the characters in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) with the code point
-   * <TT>srcChar</TT>.
-   * @param start the offset at which the replace operation begins
-   * @param length the number of characters to replace.  The character at
-   * <TT>start + length</TT> is not modified.
-   * @param srcChar the new code point
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& replace(int32_t start,
-             int32_t length,
-             UChar32 srcChar);
-
-  /**
-   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
-   * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
-   * @param start the offset at which the replace operation begins
-   * @param limit the offset immediately following the replace range
-   * @param srcText the source for the new characters
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& replaceBetween(int32_t start,
-                int32_t limit,
-                const UnicodeString& srcText);
-
-  /**
-   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
-   * with the characters in <TT>srcText</TT> in the range
-   * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
-   * @param start the offset at which the replace operation begins
-   * @param limit the offset immediately following the replace range
-   * @param srcText the source for the new characters
-   * @param srcStart the offset into <TT>srcChars</TT> where new characters
-   * will be obtained
-   * @param srcLimit the offset immediately following the range to copy
-   * in <TT>srcText</TT>
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& replaceBetween(int32_t start,
-                int32_t limit,
-                const UnicodeString& srcText,
-                int32_t srcStart,
-                int32_t srcLimit);
-
-  /**
-   * Replace a substring of this object with the given text.
-   * @param start the beginning index, inclusive; <code>0 <= start
-   * <= limit</code>.
-   * @param limit the ending index, exclusive; <code>start <= limit
-   * <= length()</code>.
-   * @param text the text to replace characters <code>start</code>
-   * to <code>limit - 1</code>
-   * @stable ICU 2.0
-   */
-  virtual void handleReplaceBetween(int32_t start,
-                                    int32_t limit,
-                                    const UnicodeString& text);
-
-  /**
-   * Replaceable API
-   * @return TRUE if it has MetaData
-   * @stable ICU 2.4
-   */
-  virtual UBool hasMetaData() const;
-
-  /**
-   * Copy a substring of this object, retaining attribute (out-of-band)
-   * information.  This method is used to duplicate or reorder substrings.
-   * The destination index must not overlap the source range.
-   *
-   * @param start the beginning index, inclusive; <code>0 <= start <=
-   * limit</code>.
-   * @param limit the ending index, exclusive; <code>start <= limit <=
-   * length()</code>.
-   * @param dest the destination index.  The characters from
-   * <code>start..limit-1</code> will be copied to <code>dest</code>.
-   * Implementations of this method may assume that <code>dest <= start ||
-   * dest >= limit</code>.
-   * @stable ICU 2.0
-   */
-  virtual void copy(int32_t start, int32_t limit, int32_t dest);
-
-  /* Search and replace operations */
-
-  /**
-   * Replace all occurrences of characters in oldText with the characters
-   * in newText
-   * @param oldText the text containing the search text
-   * @param newText the text containing the replacement text
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
-                const UnicodeString& newText);
-
-  /**
-   * Replace all occurrences of characters in oldText with characters
-   * in newText
-   * in the range [<TT>start</TT>, <TT>start + length</TT>).
-   * @param start the start of the range in which replace will performed
-   * @param length the length of the range in which replace will be performed
-   * @param oldText the text containing the search text
-   * @param newText the text containing the replacement text
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& findAndReplace(int32_t start,
-                int32_t length,
-                const UnicodeString& oldText,
-                const UnicodeString& newText);
-
-  /**
-   * Replace all occurrences of characters in oldText in the range
-   * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
-   * in newText in the range
-   * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
-   * in the range [<TT>start</TT>, <TT>start + length</TT>).
-   * @param start the start of the range in which replace will performed
-   * @param length the length of the range in which replace will be performed
-   * @param oldText the text containing the search text
-   * @param oldStart the start of the search range in <TT>oldText</TT>
-   * @param oldLength the length of the search range in <TT>oldText</TT>
-   * @param newText the text containing the replacement text
-   * @param newStart the start of the replacement range in <TT>newText</TT>
-   * @param newLength the length of the replacement range in <TT>newText</TT>
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  UnicodeString& findAndReplace(int32_t start,
-                int32_t length,
-                const UnicodeString& oldText,
-                int32_t oldStart,
-                int32_t oldLength,
-                const UnicodeString& newText,
-                int32_t newStart,
-                int32_t newLength);
-
-
-  /* Remove operations */
-
-  /**
-   * Remove all characters from the UnicodeString object.
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& remove(void);
-
-  /**
-   * Remove the characters in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
-   * @param start the offset of the first character to remove
-   * @param length the number of characters to remove
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& remove(int32_t start,
-                               int32_t length = (int32_t)INT32_MAX);
-
-  /**
-   * Remove the characters in the range
-   * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
-   * @param start the offset of the first character to remove
-   * @param limit the offset immediately following the range to remove
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& removeBetween(int32_t start,
-                                      int32_t limit = (int32_t)INT32_MAX);
-
-
-  /* Length operations */
-
-  /**
-   * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
-   * If the length of this UnicodeString is less than targetLength,
-   * length() - targetLength copies of padChar will be added to the
-   * beginning of this UnicodeString.
-   * @param targetLength the desired length of the string
-   * @param padChar the character to use for padding. Defaults to
-   * space (U+0020)
-   * @return TRUE if the text was padded, FALSE otherwise.
-   * @stable ICU 2.0
-   */
-  UBool padLeading(int32_t targetLength,
-                    UChar padChar = 0x0020);
-
-  /**
-   * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
-   * If the length of this UnicodeString is less than targetLength,
-   * length() - targetLength copies of padChar will be added to the
-   * end of this UnicodeString.
-   * @param targetLength the desired length of the string
-   * @param padChar the character to use for padding. Defaults to
-   * space (U+0020)
-   * @return TRUE if the text was padded, FALSE otherwise.
-   * @stable ICU 2.0
-   */
-  UBool padTrailing(int32_t targetLength,
-                     UChar padChar = 0x0020);
-
-  /**
-   * Truncate this UnicodeString to the <TT>targetLength</TT>.
-   * @param targetLength the desired length of this UnicodeString.
-   * @return TRUE if the text was truncated, FALSE otherwise
-   * @stable ICU 2.0
-   */
-  inline UBool truncate(int32_t targetLength);
-
-  /**
-   * Trims leading and trailing whitespace from this UnicodeString.
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  UnicodeString& trim(void);
-
-
-  /* Miscellaneous operations */
-
-  /**
-   * Reverse this UnicodeString in place.
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& reverse(void);
-
-  /**
-   * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
-   * this UnicodeString.
-   * @param start the start of the range to reverse
-   * @param length the number of characters to to reverse
-   * @return a reference to this
-   * @stable ICU 2.0
-   */
-  inline UnicodeString& reverse(int32_t start,
-             int32_t length);
-
-  /**
-   * Convert the characters in this to UPPER CASE following the conventions of
-   * the default locale.
-   * @return A reference to this.
-   * @stable ICU 2.0
-   */
-  UnicodeString& toUpper(void);
-
-  /**
-   * Convert the characters in this to UPPER CASE following the conventions of
-   * a specific locale.
-   * @param locale The locale containing the conventions to use.
-   * @return A reference to this.
-   * @stable ICU 2.0
-   */
-  UnicodeString& toUpper(const Locale& locale);
-
-  /**
-   * Convert the characters in this to lower case following the conventions of
-   * the default locale.
-   * @return A reference to this.
-   * @stable ICU 2.0
-   */
-  UnicodeString& toLower(void);
-
-  /**
-   * Convert the characters in this to lower case following the conventions of
-   * a specific locale.
-   * @param locale The locale containing the conventions to use.
-   * @return A reference to this.
-   * @stable ICU 2.0
-   */
-  UnicodeString& toLower(const Locale& locale);
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-  /**
-   * Titlecase this string, convenience function using the default locale.
-   *
-   * Casing is locale-dependent and context-sensitive.
-   * Titlecasing uses a break iterator to find the first characters of words
-   * that are to be titlecased. It titlecases those characters and lowercases
-   * all others.
-   *
-   * The titlecase break iterator can be provided to customize for arbitrary
-   * styles, using rules and dictionaries beyond the standard iterators.
-   * It may be more efficient to always provide an iterator to avoid
-   * opening and closing one for each string.
-   * The standard titlecase iterator for the root locale implements the
-   * algorithm of Unicode TR 21.
-   *
-   * This function uses only the setText(), first() and next() methods of the
-   * provided break iterator.
-   *
-   * @param titleIter A break iterator to find the first characters of words
-   *                  that are to be titlecased.
-   *                  If none is provided (0), then a standard titlecase
-   *                  break iterator is opened.
-   *                  Otherwise the provided iterator is set to the string's text.
-   * @return A reference to this.
-   * @stable ICU 2.1
-   */
-  UnicodeString &toTitle(BreakIterator *titleIter);
-
-  /**
-   * Titlecase this string.
-   *
-   * Casing is locale-dependent and context-sensitive.
-   * Titlecasing uses a break iterator to find the first characters of words
-   * that are to be titlecased. It titlecases those characters and lowercases
-   * all others.
-   *
-   * The titlecase break iterator can be provided to customize for arbitrary
-   * styles, using rules and dictionaries beyond the standard iterators.
-   * It may be more efficient to always provide an iterator to avoid
-   * opening and closing one for each string.
-   * The standard titlecase iterator for the root locale implements the
-   * algorithm of Unicode TR 21.
-   *
-   * This function uses only the setText(), first() and next() methods of the
-   * provided break iterator.
-   *
-   * @param titleIter A break iterator to find the first characters of words
-   *                  that are to be titlecased.
-   *                  If none is provided (0), then a standard titlecase
-   *                  break iterator is opened.
-   *                  Otherwise the provided iterator is set to the string's text.
-   * @param locale    The locale to consider.
-   * @return A reference to this.
-   * @stable ICU 2.1
-   */
-  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
-
-  /**
-   * Titlecase this string, with options.
-   *
-   * Casing is locale-dependent and context-sensitive.
-   * Titlecasing uses a break iterator to find the first characters of words
-   * that are to be titlecased. It titlecases those characters and lowercases
-   * all others. (This can be modified with options.)
-   *
-   * The titlecase break iterator can be provided to customize for arbitrary
-   * styles, using rules and dictionaries beyond the standard iterators.
-   * It may be more efficient to always provide an iterator to avoid
-   * opening and closing one for each string.
-   * The standard titlecase iterator for the root locale implements the
-   * algorithm of Unicode TR 21.
-   *
-   * This function uses only the setText(), first() and next() methods of the
-   * provided break iterator.
-   *
-   * @param titleIter A break iterator to find the first characters of words
-   *                  that are to be titlecased.
-   *                  If none is provided (0), then a standard titlecase
-   *                  break iterator is opened.
-   *                  Otherwise the provided iterator is set to the string's text.
-   * @param locale    The locale to consider.
-   * @param options Options bit set, see ucasemap_open().
-   * @return A reference to this.
-   * @see U_TITLECASE_NO_LOWERCASE
-   * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
-   * @see ucasemap_open
-   * @stable ICU 4.0
-   */
-  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
-
-#endif
-
-  /**
-   * Case-fold the characters in this string.
-   * Case-folding is locale-independent and not context-sensitive,
-   * but there is an option for whether to include or exclude mappings for dotted I
-   * and dotless i that are marked with 'I' in CaseFolding.txt.
-   * The result may be longer or shorter than the original.
-   *
-   * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
-   * @return A reference to this.
-   * @stable ICU 2.0
-   */
-  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
-
-  //========================================
-  // Access to the internal buffer
-  //========================================
-
-  /**
-   * Get a read/write pointer to the internal buffer.
-   * The buffer is guaranteed to be large enough for at least minCapacity UChars,
-   * writable, and is still owned by the UnicodeString object.
-   * Calls to getBuffer(minCapacity) must not be nested, and
-   * must be matched with calls to releaseBuffer(newLength).
-   * If the string buffer was read-only or shared,
-   * then it will be reallocated and copied.
-   *
-   * An attempted nested call will return 0, and will not further modify the
-   * state of the UnicodeString object.
-   * It also returns 0 if the string is bogus.
-   *
-   * The actual capacity of the string buffer may be larger than minCapacity.
-   * getCapacity() returns the actual capacity.
-   * For many operations, the full capacity should be used to avoid reallocations.
-   *
-   * While the buffer is "open" between getBuffer(minCapacity)
-   * and releaseBuffer(newLength), the following applies:
-   * - The string length is set to 0.
-   * - Any read API call on the UnicodeString object will behave like on a 0-length string.
-   * - Any write API call on the UnicodeString object is disallowed and will have no effect.
-   * - You can read from and write to the returned buffer.
-   * - The previous string contents will still be in the buffer;
-   *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
-   *   If the length() was greater than minCapacity, then any contents after minCapacity
-   *   may be lost.
-   *   The buffer contents is not NUL-terminated by getBuffer().
-   *   If length()<getCapacity() then you can terminate it by writing a NUL
-   *   at index length().
-   * - You must call releaseBuffer(newLength) before and in order to
-   *   return to normal UnicodeString operation.
-   *
-   * @param minCapacity the minimum number of UChars that are to be available
-   *        in the buffer, starting at the returned pointer;
-   *        default to the current string capacity if minCapacity==-1
-   * @return a writable pointer to the internal string buffer,
-   *         or 0 if an error occurs (nested calls, out of memory)
-   *
-   * @see releaseBuffer
-   * @see getTerminatedBuffer()
-   * @stable ICU 2.0
-   */
-  UChar *getBuffer(int32_t minCapacity);
-
-  /**
-   * Release a read/write buffer on a UnicodeString object with an
-   * "open" getBuffer(minCapacity).
-   * This function must be called in a matched pair with getBuffer(minCapacity).
-   * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
-   *
-   * It will set the string length to newLength, at most to the current capacity.
-   * If newLength==-1 then it will set the length according to the
-   * first NUL in the buffer, or to the capacity if there is no NUL.
-   *
-   * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
-   *
-   * @param newLength the new length of the UnicodeString object;
-   *        defaults to the current capacity if newLength is greater than that;
-   *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
-   *        the current capacity of the string
-   *
-   * @see getBuffer(int32_t minCapacity)
-   * @stable ICU 2.0
-   */
-  void releaseBuffer(int32_t newLength=-1);
-
-  /**
-   * Get a read-only pointer to the internal buffer.
-   * This can be called at any time on a valid UnicodeString.
-   *
-   * It returns 0 if the string is bogus, or
-   * during an "open" getBuffer(minCapacity).
-   *
-   * It can be called as many times as desired.
-   * The pointer that it returns will remain valid until the UnicodeString object is modified,
-   * at which time the pointer is semantically invalidated and must not be used any more.
-   *
-   * The capacity of the buffer can be determined with getCapacity().
-   * The part after length() may or may not be initialized and valid,
-   * depending on the history of the UnicodeString object.
-   *
-   * The buffer contents is (probably) not NUL-terminated.
-   * You can check if it is with
-   * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
-   * (See getTerminatedBuffer().)
-   *
-   * The buffer may reside in read-only memory. Its contents must not
-   * be modified.
-   *
-   * @return a read-only pointer to the internal string buffer,
-   *         or 0 if the string is empty or bogus
-   *
-   * @see getBuffer(int32_t minCapacity)
-   * @see getTerminatedBuffer()
-   * @stable ICU 2.0
-   */
-  inline const UChar *getBuffer() const;
-
-  /**
-   * Get a read-only pointer to the internal buffer,
-   * making sure that it is NUL-terminated.
-   * This can be called at any time on a valid UnicodeString.
-   *
-   * It returns 0 if the string is bogus, or
-   * during an "open" getBuffer(minCapacity), or if the buffer cannot
-   * be NUL-terminated (because memory allocation failed).
-   *
-   * It can be called as many times as desired.
-   * The pointer that it returns will remain valid until the UnicodeString object is modified,
-   * at which time the pointer is semantically invalidated and must not be used any more.
-   *
-   * The capacity of the buffer can be determined with getCapacity().
-   * The part after length()+1 may or may not be initialized and valid,
-   * depending on the history of the UnicodeString object.
-   *
-   * The buffer contents is guaranteed to be NUL-terminated.
-   * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
-   * is written.
-   * For this reason, this function is not const, unlike getBuffer().
-   * Note that a UnicodeString may also contain NUL characters as part of its contents.
-   *
-   * The buffer may reside in read-only memory. Its contents must not
-   * be modified.
-   *
-   * @return a read-only pointer to the internal string buffer,
-   *         or 0 if the string is empty or bogus
-   *
-   * @see getBuffer(int32_t minCapacity)
-   * @see getBuffer()
-   * @stable ICU 2.2
-   */
-  inline const UChar *getTerminatedBuffer();
-
-  //========================================
-  // Constructors
-  //========================================
-
-  /** Construct an empty UnicodeString.
-   * @stable ICU 2.0
-   */
-  UnicodeString();
-
-  /**
-   * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
-   * @param capacity the number of UChars this UnicodeString should hold
-   * before a resize is necessary; if count is greater than 0 and count
-   * code points c take up more space than capacity, then capacity is adjusted
-   * accordingly.
-   * @param c is used to initially fill the string
-   * @param count specifies how many code points c are to be written in the
-   *              string
-   * @stable ICU 2.0
-   */
-  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
-
-  /**
-   * Single UChar (code unit) constructor.
-   * @param ch the character to place in the UnicodeString
-   * @stable ICU 2.0
-   */
-  UnicodeString(UChar ch);
-
-  /**
-   * Single UChar32 (code point) constructor.
-   * @param ch the character to place in the UnicodeString
-   * @stable ICU 2.0
-   */
-  UnicodeString(UChar32 ch);
-
-  /**
-   * UChar* constructor.
-   * @param text The characters to place in the UnicodeString.  <TT>text</TT>
-   * must be NULL (U+0000) terminated.
-   * @stable ICU 2.0
-   */
-  UnicodeString(const UChar *text);
-
-  /**
-   * UChar* constructor.
-   * @param text The characters to place in the UnicodeString.
-   * @param textLength The number of Unicode characters in <TT>text</TT>
-   * to copy.
-   * @stable ICU 2.0
-   */
-  UnicodeString(const UChar *text,
-        int32_t textLength);
-
-  /**
-   * Readonly-aliasing UChar* constructor.
-   * The text will be used for the UnicodeString object, but
-   * it will not be released when the UnicodeString is destroyed.
-   * This has copy-on-write semantics:
-   * When the string is modified, then the buffer is first copied into
-   * newly allocated memory.
-   * The aliased buffer is never modified.
-   * In an assignment to another UnicodeString, the text will be aliased again,
-   * so that both strings then alias the same readonly-text.
-   *
-   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
-   *                     This must be true if <code>textLength==-1</code>.
-   * @param text The characters to alias for the UnicodeString.
-   * @param textLength The number of Unicode characters in <code>text</code> to alias.
-   *                   If -1, then this constructor will determine the length
-   *                   by calling <code>u_strlen()</code>.
-   * @stable ICU 2.0
-   */
-  UnicodeString(UBool isTerminated,
-                const UChar *text,
-                int32_t textLength);
-
-  /**
-   * Writable-aliasing UChar* constructor.
-   * The text will be used for the UnicodeString object, but
-   * it will not be released when the UnicodeString is destroyed.
-   * This has write-through semantics:
-   * For as long as the capacity of the buffer is sufficient, write operations
-   * will directly affect the buffer. When more capacity is necessary, then
-   * a new buffer will be allocated and the contents copied as with regularly
-   * constructed strings.
-   * In an assignment to another UnicodeString, the buffer will be copied.
-   * The extract(UChar *dst) function detects whether the dst pointer is the same
-   * as the string buffer itself and will in this case not copy the contents.
-   *
-   * @param buffer The characters to alias for the UnicodeString.
-   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
-   * @param buffCapacity The size of <code>buffer</code> in UChars.
-   * @stable ICU 2.0
-   */
-  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
-
-#if !UCONFIG_NO_CONVERSION
-
-  /**
-   * char* constructor.
-   * @param codepageData an array of bytes, null-terminated
-   * @param codepage the encoding of <TT>codepageData</TT>.  The special
-   * value 0 for <TT>codepage</TT> indicates that the text is in the
-   * platform's default codepage.
-   *
-   * If <code>codepage</code> is an empty string (<code>""</code>),
-   * then a simple conversion is performed on the codepage-invariant
-   * subset ("invariant characters") of the platform encoding. See utypes.h.
-   * Recommendation: For invariant-character strings use the constructor
-   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
-   * because it avoids object code dependencies of UnicodeString on
-   * the conversion code.
-   *
-   * @stable ICU 2.0
-   */
-  UnicodeString(const char *codepageData,
-        const char *codepage = 0);
-
-  /**
-   * char* constructor.
-   * @param codepageData an array of bytes.
-   * @param dataLength The number of bytes in <TT>codepageData</TT>.
-   * @param codepage the encoding of <TT>codepageData</TT>.  The special
-   * value 0 for <TT>codepage</TT> indicates that the text is in the
-   * platform's default codepage.
-   * If <code>codepage</code> is an empty string (<code>""</code>),
-   * then a simple conversion is performed on the codepage-invariant
-   * subset ("invariant characters") of the platform encoding. See utypes.h.
-   * Recommendation: For invariant-character strings use the constructor
-   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
-   * because it avoids object code dependencies of UnicodeString on
-   * the conversion code.
-   *
-   * @stable ICU 2.0
-   */
-  UnicodeString(const char *codepageData,
-        int32_t dataLength,
-        const char *codepage = 0);
-
-  /**
-   * char * / UConverter constructor.
-   * This constructor uses an existing UConverter object to
-   * convert the codepage string to Unicode and construct a UnicodeString
-   * from that.
-   *
-   * The converter is reset at first.
-   * If the error code indicates a failure before this constructor is called,
-   * or if an error occurs during conversion or construction,
-   * then the string will be bogus.
-   *
-   * This function avoids the overhead of opening and closing a converter if
-   * multiple strings are constructed.
-   *
-   * @param src input codepage string
-   * @param srcLength length of the input string, can be -1 for NUL-terminated strings
-   * @param cnv converter object (ucnv_resetToUnicode() will be called),
-   *        can be NULL for the default converter
-   * @param errorCode normal ICU error code
-   * @stable ICU 2.0
-   */
-  UnicodeString(
-        const char *src, int32_t srcLength,
-        UConverter *cnv,
-        UErrorCode &errorCode);
-
-#endif
-
-  /**
-   * Constructs a Unicode string from an invariant-character char * string.
-   * About invariant characters see utypes.h.
-   * This constructor has no runtime dependency on conversion code and is
-   * therefore recommended over ones taking a charset name string
-   * (where the empty string "" indicates invariant-character conversion).
-   *
-   * Use the macro US_INV as the third, signature-distinguishing parameter.
-   *
-   * For example:
-   * \code
-   * void fn(const char *s) {
-   *   UnicodeString ustr(s, -1, US_INV);
-   *   // use ustr ...
-   * }
-   * \endcode
-   *
-   * @param src String using only invariant characters.
-   * @param length Length of src, or -1 if NUL-terminated.
-   * @param inv Signature-distinguishing paramater, use US_INV.
-   *
-   * @see US_INV
-   * @stable ICU 3.2
-   */
-  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
-
-
-  /**
-   * Copy constructor.
-   * @param that The UnicodeString object to copy.
-   * @stable ICU 2.0
-   */
-  UnicodeString(const UnicodeString& that);
-
-  /**
-   * 'Substring' constructor from tail of source string.
-   * @param src The UnicodeString object to copy.
-   * @param srcStart The offset into <tt>src</tt> at which to start copying.
-   * @stable ICU 2.2
-   */
-  UnicodeString(const UnicodeString& src, int32_t srcStart);
-
-  /**
-   * 'Substring' constructor from subrange of source string.
-   * @param src The UnicodeString object to copy.
-   * @param srcStart The offset into <tt>src</tt> at which to start copying.
-   * @param srcLength The number of characters from <tt>src</tt> to copy.
-   * @stable ICU 2.2
-   */
-  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
-
-  /**
-   * Clone this object, an instance of a subclass of Replaceable.
-   * Clones can be used concurrently in multiple threads.
-   * If a subclass does not implement clone(), or if an error occurs,
-   * then NULL is returned.
-   * The clone functions in all subclasses return a pointer to a Replaceable
-   * because some compilers do not support covariant (same-as-this)
-   * return types; cast to the appropriate subclass if necessary.
-   * The caller must delete the clone.
-   *
-   * @return a clone of this object
-   *
-   * @see Replaceable::clone
-   * @see getDynamicClassID
-   * @stable ICU 2.6
-   */
-  virtual Replaceable *clone() const;
-
-  /** Destructor.
-   * @stable ICU 2.0
-   */
-  virtual ~UnicodeString();
-
-
-  /* Miscellaneous operations */
-
-  /**
-   * Unescape a string of characters and return a string containing
-   * the result.  The following escape sequences are recognized:
-   *
-   * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
-   * \\Uhhhhhhhh   8 hex digits
-   * \\xhh         1-2 hex digits
-   * \\ooo         1-3 octal digits; o in [0-7]
-   * \\cX          control-X; X is masked with 0x1F
-   *
-   * as well as the standard ANSI C escapes:
-   *
-   * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
-   * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
-   * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
-   *
-   * Anything else following a backslash is generically escaped.  For
-   * example, "[a\\-z]" returns "[a-z]".
-   *
-   * If an escape sequence is ill-formed, this method returns an empty
-   * string.  An example of an ill-formed sequence is "\\u" followed by
-   * fewer than 4 hex digits.
-   *
-   * This function is similar to u_unescape() but not identical to it.
-   * The latter takes a source char*, so it does escape recognition
-   * and also invariant conversion.
-   *
-   * @return a string with backslash escapes interpreted, or an
-   * empty string on error.
-   * @see UnicodeString#unescapeAt()
-   * @see u_unescape()
-   * @see u_unescapeAt()
-   * @stable ICU 2.0
-   */
-  UnicodeString unescape() const;
-
-  /**
-   * Unescape a single escape sequence and return the represented
-   * character.  See unescape() for a listing of the recognized escape
-   * sequences.  The character at offset-1 is assumed (without
-   * checking) to be a backslash.  If the escape sequence is
-   * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
-   * returned.
-   *
-   * @param offset an input output parameter.  On input, it is the
-   * offset into this string where the escape sequence is located,
-   * after the initial backslash.  On output, it is advanced after the
-   * last character parsed.  On error, it is not advanced at all.
-   * @return the character represented by the escape sequence at
-   * offset, or (UChar32)0xFFFFFFFF on error.
-   * @see UnicodeString#unescape()
-   * @see u_unescape()
-   * @see u_unescapeAt()
-   * @stable ICU 2.0
-   */
-  UChar32 unescapeAt(int32_t &offset) const;
-
-  /**
-   * ICU "poor man's RTTI", returns a UClassID for this class.
-   *
-   * @stable ICU 2.2
-   */
-  static UClassID U_EXPORT2 getStaticClassID();
-
-  /**
-   * ICU "poor man's RTTI", returns a UClassID for the actual class.
-   *
-   * @stable ICU 2.2
-   */
-  virtual UClassID getDynamicClassID() const;
-
-  //========================================
-  // Implementation methods
-  //========================================
-
-protected:
-  /**
-   * Implement Replaceable::getLength() (see jitterbug 1027).
-   * @stable ICU 2.4
-   */
-  virtual int32_t getLength() const;
-
-  /**
-   * The change in Replaceable to use virtual getCharAt() allows
-   * UnicodeString::charAt() to be inline again (see jitterbug 709).
-   * @stable ICU 2.4
-   */
-  virtual UChar getCharAt(int32_t offset) const;
-
-  /**
-   * The change in Replaceable to use virtual getChar32At() allows
-   * UnicodeString::char32At() to be inline again (see jitterbug 709).
-   * @stable ICU 2.4
-   */
-  virtual UChar32 getChar32At(int32_t offset) const;
-
-private:
-
-  inline int8_t
-  doCompare(int32_t start,
-           int32_t length,
-           const UnicodeString& srcText,
-           int32_t srcStart,
-           int32_t srcLength) const;
-
-  int8_t doCompare(int32_t start,
-           int32_t length,
-           const UChar *srcChars,
-           int32_t srcStart,
-           int32_t srcLength) const;
-
-  inline int8_t
-  doCompareCodePointOrder(int32_t start,
-                          int32_t length,
-                          const UnicodeString& srcText,
-                          int32_t srcStart,
-                          int32_t srcLength) const;
-
-  int8_t doCompareCodePointOrder(int32_t start,
-                                 int32_t length,
-                                 const UChar *srcChars,
-                                 int32_t srcStart,
-                                 int32_t srcLength) const;
-
-  inline int8_t
-  doCaseCompare(int32_t start,
-                int32_t length,
-                const UnicodeString &srcText,
-                int32_t srcStart,
-                int32_t srcLength,
-                uint32_t options) const;
-
-  int8_t
-  doCaseCompare(int32_t start,
-                int32_t length,
-                const UChar *srcChars,
-                int32_t srcStart,
-                int32_t srcLength,
-                uint32_t options) const;
-
-  int32_t doIndexOf(UChar c,
-            int32_t start,
-            int32_t length) const;
-
-  int32_t doIndexOf(UChar32 c,
-                        int32_t start,
-                        int32_t length) const;
-
-  int32_t doLastIndexOf(UChar c,
-                int32_t start,
-                int32_t length) const;
-
-  int32_t doLastIndexOf(UChar32 c,
-                            int32_t start,
-                            int32_t length) const;
-
-  void doExtract(int32_t start,
-         int32_t length,
-         UChar *dst,
-         int32_t dstStart) const;
-
-  inline void doExtract(int32_t start,
-         int32_t length,
-         UnicodeString& target) const;
-
-  inline UChar doCharAt(int32_t offset)  const;
-
-  UnicodeString& doReplace(int32_t start,
-               int32_t length,
-               const UnicodeString& srcText,
-               int32_t srcStart,
-               int32_t srcLength);
-
-  UnicodeString& doReplace(int32_t start,
-               int32_t length,
-               const UChar *srcChars,
-               int32_t srcStart,
-               int32_t srcLength);
-
-  UnicodeString& doReverse(int32_t start,
-               int32_t length);
-
-  // calculate hash code
-  int32_t doHashCode(void) const;
-
-  // get pointer to start of array
-  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
-  inline UChar* getArrayStart(void);
-  inline const UChar* getArrayStart(void) const;
-
-  // A UnicodeString object (not necessarily its current buffer)
-  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
-  inline UBool isWritable() const;
-
-  // Is the current buffer writable?
-  inline UBool isBufferWritable() const;
-
-  // None of the following does releaseArray().
-  inline void setLength(int32_t len);        // sets only fShortLength and fLength
-  inline void setToEmpty();                  // sets fFlags=kShortString
-  inline void setToStackBuffer(int32_t len); // sets fFlags=kShortString
-  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
-
-  // allocate the array; result may be fStackBuffer
-  // sets refCount to 1 if appropriate
-  // sets fArray, fCapacity, and fFlags
-  // returns boolean for success or failure
-  UBool allocate(int32_t capacity);
-
-  // release the array if owned
-  void releaseArray(void);
-
-  // turn a bogus string into an empty one
-  void unBogus();
-
-  // implements assigment operator, copy constructor, and fastCopyFrom()
-  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
-
-  // Pin start and limit to acceptable values.
-  inline void pinIndex(int32_t& start) const;
-  inline void pinIndices(int32_t& start,
-                         int32_t& length) const;
-
-#if !UCONFIG_NO_CONVERSION
-
-  /* Internal extract() using UConverter. */
-  int32_t doExtract(int32_t start, int32_t length,
-                    char *dest, int32_t destCapacity,
-                    UConverter *cnv,
-                    UErrorCode &errorCode) const;
-
-  /*
-   * Real constructor for converting from codepage data.
-   * It assumes that it is called with !fRefCounted.
-   *
-   * If <code>codepage==0</code>, then the default converter
-   * is used for the platform encoding.
-   * If <code>codepage</code> is an empty string (<code>""</code>),
-   * then a simple conversion is performed on the codepage-invariant
-   * subset ("invariant characters") of the platform encoding. See utypes.h.
-   */
-  void doCodepageCreate(const char *codepageData,
-                        int32_t dataLength,
-                        const char *codepage);
-
-  /*
-   * Worker function for creating a UnicodeString from
-   * a codepage string using a UConverter.
-   */
-  void
-  doCodepageCreate(const char *codepageData,
-                   int32_t dataLength,
-                   UConverter *converter,
-                   UErrorCode &status);
-
-#endif
-
-  /*
-   * This function is called when write access to the array
-   * is necessary.
-   *
-   * We need to make a copy of the array if
-   * the buffer is read-only, or
-   * the buffer is refCounted (shared), and refCount>1, or
-   * the buffer is too small.
-   *
-   * Return FALSE if memory could not be allocated.
-   */
-  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
-                            int32_t growCapacity = -1,
-                            UBool doCopyArray = TRUE,
-                            int32_t **pBufferToDelete = 0,
-                            UBool forceClone = FALSE);
-
-  // common function for case mappings
-  UnicodeString &
-  caseMap(BreakIterator *titleIter,
-          const char *locale,
-          uint32_t options,
-          int32_t toWhichCase);
-
-  // ref counting
-  void addRef(void);
-  int32_t removeRef(void);
-  int32_t refCount(void) const;
-
-  // constants
-  enum {
-    // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
-    // 32-bit pointers: 4+1+1+13*2 = 32 bytes
-    // 64-bit pointers: 8+1+1+15*2 = 40 bytes
-    US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
-    kInvalidUChar=0xffff, // invalid UChar index
-    kGrowSize=128, // grow size for this buffer
-    kInvalidHashCode=0, // invalid hash code
-    kEmptyHashCode=1, // hash code for empty string
-
-    // bit flag values for fFlags
-    kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
-    kUsingStackBuffer=2,// fArray==fStackBuffer
-    kRefCounted=4,      // there is a refCount field before the characters in fArray
-    kBufferIsReadonly=8,// do not write to this buffer
-    kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
-                        // and releaseBuffer(newLength) must be called
-
-    // combined values for convenience
-    kShortString=kUsingStackBuffer,
-    kLongString=kRefCounted,
-    kReadonlyAlias=kBufferIsReadonly,
-    kWritableAlias=0
-  };
-
-  friend class StringThreadTest;
-
-  union StackBufferOrFields;        // forward declaration necessary before friend declaration
-  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
-
-  /*
-   * The following are all the class fields that are stored
-   * in each UnicodeString object.
-   * Note that UnicodeString has virtual functions,
-   * therefore there is an implicit vtable pointer
-   * as the first real field.
-   * The fields should be aligned such that no padding is
-   * necessary, mostly by having larger types first.
-   * On 32-bit machines, the size should be 32 bytes,
-   * on 64-bit machines (8-byte pointers), it should be 40 bytes.
-   */
-  // (implicit) *vtable;
-  int8_t    fShortLength;   // 0..127: length  <0: real length is in fUnion.fFields.fLength
-  uint8_t   fFlags;         // bit flags: see constants above
-  union StackBufferOrFields {
-    // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
-    // else fFields is used
-    UChar     fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
-    struct {
-      uint16_t  fPadding;   // align the following field at 8B (32b pointers) or 12B (64b)
-      int32_t   fLength;    // number of characters in fArray if >127; else undefined
-      UChar     *fArray;    // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
-      int32_t   fCapacity;  // sizeof fArray
-    } fFields;
-  } fUnion;
-};
-
-/**
- * Create a new UnicodeString with the concatenation of two others.
- *
- * @param s1 The first string to be copied to the new one.
- * @param s2 The second string to be copied to the new one, after s1.
- * @return UnicodeString(s1).append(s2)
- * @stable ICU 2.8
- */
-U_COMMON_API UnicodeString U_EXPORT2
-operator+ (const UnicodeString &s1, const UnicodeString &s2);
-
-//========================================
-// Inline members
-//========================================
-
-//========================================
-// Privates
-//========================================
-
-inline void
-UnicodeString::pinIndex(int32_t& start) const
-{
-  // pin index
-  if(start < 0) {
-    start = 0;
-  } else if(start > length()) {
-    start = length();
-  }
-}
-
-inline void
-UnicodeString::pinIndices(int32_t& start,
-                          int32_t& _length) const
-{
-  // pin indices
-  int32_t len = length();
-  if(start < 0) {
-    start = 0;
-  } else if(start > len) {
-    start = len;
-  }
-  if(_length < 0) {
-    _length = 0;
-  } else if(_length > (len - start)) {
-    _length = (len - start);
-  }
-}
-
-inline UChar*
-UnicodeString::getArrayStart()
-{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
-
-inline const UChar*
-UnicodeString::getArrayStart() const
-{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
-
-//========================================
-// Read-only implementation methods
-//========================================
-inline int32_t
-UnicodeString::length() const
-{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
-
-inline int32_t
-UnicodeString::getCapacity() const
-{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
-
-inline int32_t
-UnicodeString::hashCode() const
-{ return doHashCode(); }
-
-inline UBool
-UnicodeString::isBogus() const
-{ return (UBool)(fFlags & kIsBogus); }
-
-inline UBool
-UnicodeString::isWritable() const
-{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
-
-inline UBool
-UnicodeString::isBufferWritable() const
-{
-  return (UBool)(
-      !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
-      (!(fFlags&kRefCounted) || refCount()==1));
-}
-
-inline const UChar *
-UnicodeString::getBuffer() const {
-  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
-    return 0;
-  } else if(fFlags&kUsingStackBuffer) {
-    return fUnion.fStackBuffer;
-  } else {
-    return fUnion.fFields.fArray;
-  }
-}
-
-//========================================
-// Read-only alias methods
-//========================================
-inline int8_t
-UnicodeString::doCompare(int32_t start,
-              int32_t thisLength,
-              const UnicodeString& srcText,
-              int32_t srcStart,
-              int32_t srcLength) const
-{
-  if(srcText.isBogus()) {
-    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
-  } else {
-    srcText.pinIndices(srcStart, srcLength);
-    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
-  }
-}
-
-inline UBool
-UnicodeString::operator== (const UnicodeString& text) const
-{
-  if(isBogus()) {
-    return text.isBogus();
-  } else {
-    int32_t len = length(), textLength = text.length();
-    return
-      !text.isBogus() &&
-      len == textLength &&
-      doCompare(0, len, text, 0, textLength) == 0;
-  }
-}
-
-inline UBool
-UnicodeString::operator!= (const UnicodeString& text) const
-{ return (! operator==(text)); }
-
-inline UBool
-UnicodeString::operator> (const UnicodeString& text) const
-{ return doCompare(0, length(), text, 0, text.length()) == 1; }
-
-inline UBool
-UnicodeString::operator< (const UnicodeString& text) const
-{ return doCompare(0, length(), text, 0, text.length()) == -1; }
-
-inline UBool
-UnicodeString::operator>= (const UnicodeString& text) const
-{ return doCompare(0, length(), text, 0, text.length()) != -1; }
-
-inline UBool
-UnicodeString::operator<= (const UnicodeString& text) const
-{ return doCompare(0, length(), text, 0, text.length()) != 1; }
-
-inline int8_t
-UnicodeString::compare(const UnicodeString& text) const
-{ return doCompare(0, length(), text, 0, text.length()); }
-
-inline int8_t
-UnicodeString::compare(int32_t start,
-               int32_t _length,
-               const UnicodeString& srcText) const
-{ return doCompare(start, _length, srcText, 0, srcText.length()); }
-
-inline int8_t
-UnicodeString::compare(const UChar *srcChars,
-               int32_t srcLength) const
-{ return doCompare(0, length(), srcChars, 0, srcLength); }
-
-inline int8_t
-UnicodeString::compare(int32_t start,
-               int32_t _length,
-               const UnicodeString& srcText,
-               int32_t srcStart,
-               int32_t srcLength) const
-{ return doCompare(start, _length, srcText, srcStart, srcLength); }
-
-inline int8_t
-UnicodeString::compare(int32_t start,
-               int32_t _length,
-               const UChar *srcChars) const
-{ return doCompare(start, _length, srcChars, 0, _length); }
-
-inline int8_t
-UnicodeString::compare(int32_t start,
-               int32_t _length,
-               const UChar *srcChars,
-               int32_t srcStart,
-               int32_t srcLength) const
-{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
-
-inline int8_t
-UnicodeString::compareBetween(int32_t start,
-                  int32_t limit,
-                  const UnicodeString& srcText,
-                  int32_t srcStart,
-                  int32_t srcLimit) const
-{ return doCompare(start, limit - start,
-           srcText, srcStart, srcLimit - srcStart); }
-
-inline int8_t
-UnicodeString::doCompareCodePointOrder(int32_t start,
-                                       int32_t thisLength,
-                                       const UnicodeString& srcText,
-                                       int32_t srcStart,
-                                       int32_t srcLength) const
-{
-  if(srcText.isBogus()) {
-    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
-  } else {
-    srcText.pinIndices(srcStart, srcLength);
-    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
-  }
-}
-
-inline int8_t
-UnicodeString::compareCodePointOrder(const UnicodeString& text) const
-{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
-
-inline int8_t
-UnicodeString::compareCodePointOrder(int32_t start,
-                                     int32_t _length,
-                                     const UnicodeString& srcText) const
-{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
-
-inline int8_t
-UnicodeString::compareCodePointOrder(const UChar *srcChars,
-                                     int32_t srcLength) const
-{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
-
-inline int8_t
-UnicodeString::compareCodePointOrder(int32_t start,
-                                     int32_t _length,
-                                     const UnicodeString& srcText,
-                                     int32_t srcStart,
-                                     int32_t srcLength) const
-{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
-
-inline int8_t
-UnicodeString::compareCodePointOrder(int32_t start,
-                                     int32_t _length,
-                                     const UChar *srcChars) const
-{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
-
-inline int8_t
-UnicodeString::compareCodePointOrder(int32_t start,
-                                     int32_t _length,
-                                     const UChar *srcChars,
-                                     int32_t srcStart,
-                                     int32_t srcLength) const
-{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
-
-inline int8_t
-UnicodeString::compareCodePointOrderBetween(int32_t start,
-                                            int32_t limit,
-                                            const UnicodeString& srcText,
-                                            int32_t srcStart,
-                                            int32_t srcLimit) const
-{ return doCompareCodePointOrder(start, limit - start,
-           srcText, srcStart, srcLimit - srcStart); }
-
-inline int8_t
-UnicodeString::doCaseCompare(int32_t start,
-                             int32_t thisLength,
-                             const UnicodeString &srcText,
-                             int32_t srcStart,
-                             int32_t srcLength,
-                             uint32_t options) const
-{
-  if(srcText.isBogus()) {
-    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
-  } else {
-    srcText.pinIndices(srcStart, srcLength);
-    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
-  }
-}
-
-inline int8_t
-UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
-  return doCaseCompare(0, length(), text, 0, text.length(), options);
-}
-
-inline int8_t
-UnicodeString::caseCompare(int32_t start,
-                           int32_t _length,
-                           const UnicodeString &srcText,
-                           uint32_t options) const {
-  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
-}
-
-inline int8_t
-UnicodeString::caseCompare(const UChar *srcChars,
-                           int32_t srcLength,
-                           uint32_t options) const {
-  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
-}
-
-inline int8_t
-UnicodeString::caseCompare(int32_t start,
-                           int32_t _length,
-                           const UnicodeString &srcText,
-                           int32_t srcStart,
-                           int32_t srcLength,
-                           uint32_t options) const {
-  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
-}
-
-inline int8_t
-UnicodeString::caseCompare(int32_t start,
-                           int32_t _length,
-                           const UChar *srcChars,
-                           uint32_t options) const {
-  return doCaseCompare(start, _length, srcChars, 0, _length, options);
-}
-
-inline int8_t
-UnicodeString::caseCompare(int32_t start,
-                           int32_t _length,
-                           const UChar *srcChars,
-                           int32_t srcStart,
-                           int32_t srcLength,
-                           uint32_t options) const {
-  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
-}
-
-inline int8_t
-UnicodeString::caseCompareBetween(int32_t start,
-                                  int32_t limit,
-                                  const UnicodeString &srcText,
-                                  int32_t srcStart,
-                                  int32_t srcLimit,
-                                  uint32_t options) const {
-  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
-}
-
-inline int32_t
-UnicodeString::indexOf(const UnicodeString& srcText,
-               int32_t srcStart,
-               int32_t srcLength,
-               int32_t start,
-               int32_t _length) const
-{
-  if(!srcText.isBogus()) {
-    srcText.pinIndices(srcStart, srcLength);
-    if(srcLength > 0) {
-      return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
-    }
-  }
-  return -1;
-}
-
-inline int32_t
-UnicodeString::indexOf(const UnicodeString& text) const
-{ return indexOf(text, 0, text.length(), 0, length()); }
-
-inline int32_t
-UnicodeString::indexOf(const UnicodeString& text,
-               int32_t start) const {
-  pinIndex(start);
-  return indexOf(text, 0, text.length(), start, length() - start);
-}
-
-inline int32_t
-UnicodeString::indexOf(const UnicodeString& text,
-               int32_t start,
-               int32_t _length) const
-{ return indexOf(text, 0, text.length(), start, _length); }
-
-inline int32_t
-UnicodeString::indexOf(const UChar *srcChars,
-               int32_t srcLength,
-               int32_t start) const {
-  pinIndex(start);
-  return indexOf(srcChars, 0, srcLength, start, length() - start);
-}
-
-inline int32_t
-UnicodeString::indexOf(const UChar *srcChars,
-               int32_t srcLength,
-               int32_t start,
-               int32_t _length) const
-{ return indexOf(srcChars, 0, srcLength, start, _length); }
-
-inline int32_t
-UnicodeString::indexOf(UChar c,
-               int32_t start,
-               int32_t _length) const
-{ return doIndexOf(c, start, _length); }
-
-inline int32_t
-UnicodeString::indexOf(UChar32 c,
-               int32_t start,
-               int32_t _length) const
-{ return doIndexOf(c, start, _length); }
-
-inline int32_t
-UnicodeString::indexOf(UChar c) const
-{ return doIndexOf(c, 0, length()); }
-
-inline int32_t
-UnicodeString::indexOf(UChar32 c) const
-{ return indexOf(c, 0, length()); }
-
-inline int32_t
-UnicodeString::indexOf(UChar c,
-               int32_t start) const {
-  pinIndex(start);
-  return doIndexOf(c, start, length() - start);
-}
-
-inline int32_t
-UnicodeString::indexOf(UChar32 c,
-               int32_t start) const {
-  pinIndex(start);
-  return indexOf(c, start, length() - start);
-}
-
-inline int32_t
-UnicodeString::lastIndexOf(const UChar *srcChars,
-               int32_t srcLength,
-               int32_t start,
-               int32_t _length) const
-{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
-
-inline int32_t
-UnicodeString::lastIndexOf(const UChar *srcChars,
-               int32_t srcLength,
-               int32_t start) const {
-  pinIndex(start);
-  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
-}
-
-inline int32_t
-UnicodeString::lastIndexOf(const UnicodeString& srcText,
-               int32_t srcStart,
-               int32_t srcLength,
-               int32_t start,
-               int32_t _length) const
-{
-  if(!srcText.isBogus()) {
-    srcText.pinIndices(srcStart, srcLength);
-    if(srcLength > 0) {
-      return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
-    }
-  }
-  return -1;
-}
-
-inline int32_t
-UnicodeString::lastIndexOf(const UnicodeString& text,
-               int32_t start,
-               int32_t _length) const
-{ return lastIndexOf(text, 0, text.length(), start, _length); }
-
-inline int32_t
-UnicodeString::lastIndexOf(const UnicodeString& text,
-               int32_t start) const {
-  pinIndex(start);
-  return lastIndexOf(text, 0, text.length(), start, length() - start);
-}
-
-inline int32_t
-UnicodeString::lastIndexOf(const UnicodeString& text) const
-{ return lastIndexOf(text, 0, text.length(), 0, length()); }
-
-inline int32_t
-UnicodeString::lastIndexOf(UChar c,
-               int32_t start,
-               int32_t _length) const
-{ return doLastIndexOf(c, start, _length); }
-
-inline int32_t
-UnicodeString::lastIndexOf(UChar32 c,
-               int32_t start,
-               int32_t _length) const {
-  return doLastIndexOf(c, start, _length);
-}
-
-inline int32_t
-UnicodeString::lastIndexOf(UChar c) const
-{ return doLastIndexOf(c, 0, length()); }
-
-inline int32_t
-UnicodeString::lastIndexOf(UChar32 c) const {
-  return lastIndexOf(c, 0, length());
-}
-
-inline int32_t
-UnicodeString::lastIndexOf(UChar c,
-               int32_t start) const {
-  pinIndex(start);
-  return doLastIndexOf(c, start, length() - start);
-}
-
-inline int32_t
-UnicodeString::lastIndexOf(UChar32 c,
-               int32_t start) const {
-  pinIndex(start);
-  return lastIndexOf(c, start, length() - start);
-}
-
-inline UBool
-UnicodeString::startsWith(const UnicodeString& text) const
-{ return compare(0, text.length(), text, 0, text.length()) == 0; }
-
-inline UBool
-UnicodeString::startsWith(const UnicodeString& srcText,
-              int32_t srcStart,
-              int32_t srcLength) const
-{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
-
-inline UBool
-UnicodeString::startsWith(const UChar *srcChars,
-              int32_t srcLength) const
-{ return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
-
-inline UBool
-UnicodeString::startsWith(const UChar *srcChars,
-              int32_t srcStart,
-              int32_t srcLength) const
-{ return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
-
-inline UBool
-UnicodeString::endsWith(const UnicodeString& text) const
-{ return doCompare(length() - text.length(), text.length(),
-           text, 0, text.length()) == 0; }
-
-inline UBool
-UnicodeString::endsWith(const UnicodeString& srcText,
-            int32_t srcStart,
-            int32_t srcLength) const {
-  srcText.pinIndices(srcStart, srcLength);
-  return doCompare(length() - srcLength, srcLength,
-                   srcText, srcStart, srcLength) == 0;
-}
-
-inline UBool
-UnicodeString::endsWith(const UChar *srcChars,
-            int32_t srcLength) const {
-  if(srcLength < 0) {
-    srcLength = u_strlen(srcChars);
-  }
-  return doCompare(length() - srcLength, srcLength,
-                   srcChars, 0, srcLength) == 0;
-}
-
-inline UBool
-UnicodeString::endsWith(const UChar *srcChars,
-            int32_t srcStart,
-            int32_t srcLength) const {
-  if(srcLength < 0) {
-    srcLength = u_strlen(srcChars + srcStart);
-  }
-  return doCompare(length() - srcLength, srcLength,
-                   srcChars, srcStart, srcLength) == 0;
-}
-
-//========================================
-// replace
-//========================================
-inline UnicodeString&
-UnicodeString::replace(int32_t start,
-               int32_t _length,
-               const UnicodeString& srcText)
-{ return doReplace(start, _length, srcText, 0, srcText.length()); }
-
-inline UnicodeString&
-UnicodeString::replace(int32_t start,
-               int32_t _length,
-               const UnicodeString& srcText,
-               int32_t srcStart,
-               int32_t srcLength)
-{ return doReplace(start, _length, srcText, srcStart, srcLength); }
-
-inline UnicodeString&
-UnicodeString::replace(int32_t start,
-               int32_t _length,
-               const UChar *srcChars,
-               int32_t srcLength)
-{ return doReplace(start, _length, srcChars, 0, srcLength); }
-
-inline UnicodeString&
-UnicodeString::replace(int32_t start,
-               int32_t _length,
-               const UChar *srcChars,
-               int32_t srcStart,
-               int32_t srcLength)
-{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
-
-inline UnicodeString&
-UnicodeString::replace(int32_t start,
-               int32_t _length,
-               UChar srcChar)
-{ return doReplace(start, _length, &srcChar, 0, 1); }
-
-inline UnicodeString&
-UnicodeString::replace(int32_t start,
-               int32_t _length,
-               UChar32 srcChar) {
-  UChar buffer[U16_MAX_LENGTH];
-  int32_t count = 0;
-  UBool isError = FALSE;
-  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
-  return doReplace(start, _length, buffer, 0, count);
-}
-
-inline UnicodeString&
-UnicodeString::replaceBetween(int32_t start,
-                  int32_t limit,
-                  const UnicodeString& srcText)
-{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
-
-inline UnicodeString&
-UnicodeString::replaceBetween(int32_t start,
-                  int32_t limit,
-                  const UnicodeString& srcText,
-                  int32_t srcStart,
-                  int32_t srcLimit)
-{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
-
-inline UnicodeString&
-UnicodeString::findAndReplace(const UnicodeString& oldText,
-                  const UnicodeString& newText)
-{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
-            newText, 0, newText.length()); }
-
-inline UnicodeString&
-UnicodeString::findAndReplace(int32_t start,
-                  int32_t _length,
-                  const UnicodeString& oldText,
-                  const UnicodeString& newText)
-{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
-            newText, 0, newText.length()); }
-
-// ============================
-// extract
-// ============================
-inline void
-UnicodeString::doExtract(int32_t start,
-             int32_t _length,
-             UnicodeString& target) const
-{ target.replace(0, target.length(), *this, start, _length); }
-
-inline void
-UnicodeString::extract(int32_t start,
-               int32_t _length,
-               UChar *target,
-               int32_t targetStart) const
-{ doExtract(start, _length, target, targetStart); }
-
-inline void
-UnicodeString::extract(int32_t start,
-               int32_t _length,
-               UnicodeString& target) const
-{ doExtract(start, _length, target); }
-
-#if !UCONFIG_NO_CONVERSION
-
-inline int32_t
-UnicodeString::extract(int32_t start,
-               int32_t _length,
-               char *dst,
-               const char *codepage) const
-
-{
-  // This dstSize value will be checked explicitly
-  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
-}
-
-#endif
-
-inline void
-UnicodeString::extractBetween(int32_t start,
-                  int32_t limit,
-                  UChar *dst,
-                  int32_t dstStart) const {
-  pinIndex(start);
-  pinIndex(limit);
-  doExtract(start, limit - start, dst, dstStart);
-}
-
-inline UChar
-UnicodeString::doCharAt(int32_t offset) const
-{
-  if((uint32_t)offset < (uint32_t)length()) {
-    return getArrayStart()[offset];
-  } else {
-    return kInvalidUChar;
-  }
-}
-
-inline UChar
-UnicodeString::charAt(int32_t offset) const
-{ return doCharAt(offset); }
-
-inline UChar
-UnicodeString::operator[] (int32_t offset) const
-{ return doCharAt(offset); }
-
-inline UChar32
-UnicodeString::char32At(int32_t offset) const
-{
-  int32_t len = length();
-  if((uint32_t)offset < (uint32_t)len) {
-    const UChar *array = getArrayStart();
-    UChar32 c;
-    U16_GET(array, 0, offset, len, c);
-    return c;
-  } else {
-    return kInvalidUChar;
-  }
-}
-
-inline int32_t
-UnicodeString::getChar32Start(int32_t offset) const {
-  if((uint32_t)offset < (uint32_t)length()) {
-    const UChar *array = getArrayStart();
-    U16_SET_CP_START(array, 0, offset);
-    return offset;
-  } else {
-    return 0;
-  }
-}
-
-inline int32_t
-UnicodeString::getChar32Limit(int32_t offset) const {
-  int32_t len = length();
-  if((uint32_t)offset < (uint32_t)len) {
-    const UChar *array = getArrayStart();
-    U16_SET_CP_LIMIT(array, 0, offset, len);
-    return offset;
-  } else {
-    return len;
-  }
-}
-
-inline UBool
-UnicodeString::isEmpty() const {
-  return fShortLength == 0;
-}
-
-//========================================
-// Write implementation methods
-//========================================
-inline void
-UnicodeString::setLength(int32_t len) {
-  if(len <= 127) {
-    fShortLength = (int8_t)len;
-  } else {
-    fShortLength = (int8_t)-1;
-    fUnion.fFields.fLength = len;
-  }
-}
-
-inline void
-UnicodeString::setToEmpty() {
-  fShortLength = 0;
-  fFlags = kShortString;
-}
-
-inline void
-UnicodeString::setToStackBuffer(int32_t len) {
-  fShortLength = (int8_t)len;
-  fFlags = kShortString;
-}
-
-inline void
-UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
-  setLength(len);
-  fUnion.fFields.fArray = array;
-  fUnion.fFields.fCapacity = capacity;
-}
-
-inline const UChar *
-UnicodeString::getTerminatedBuffer() {
-  if(!isWritable()) {
-    return 0;
-  } else {
-    UChar *array = getArrayStart();
-    int32_t len = length();
-    if(len < getCapacity() && array[len] == 0) {
-      return array;
-    } else if(cloneArrayIfNeeded(len+1)) {
-      array = getArrayStart();
-      array[len] = 0;
-      return array;
-    } else {
-      return 0;
-    }
-  }
-}
-
-inline UnicodeString&
-UnicodeString::operator= (UChar ch)
-{ return doReplace(0, length(), &ch, 0, 1); }
-
-inline UnicodeString&
-UnicodeString::operator= (UChar32 ch)
-{ return replace(0, length(), ch); }
-
-inline UnicodeString&
-UnicodeString::setTo(const UnicodeString& srcText,
-             int32_t srcStart,
-             int32_t srcLength)
-{
-  unBogus();
-  return doReplace(0, length(), srcText, srcStart, srcLength);
-}
-
-inline UnicodeString&
-UnicodeString::setTo(const UnicodeString& srcText,
-             int32_t srcStart)
-{
-  unBogus();
-  srcText.pinIndex(srcStart);
-  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
-}
-
-inline UnicodeString&
-UnicodeString::setTo(const UnicodeString& srcText)
-{
-  unBogus();
-  return doReplace(0, length(), srcText, 0, srcText.length());
-}
-
-inline UnicodeString&
-UnicodeString::setTo(const UChar *srcChars,
-             int32_t srcLength)
-{
-  unBogus();
-  return doReplace(0, length(), srcChars, 0, srcLength);
-}
-
-inline UnicodeString&
-UnicodeString::setTo(UChar srcChar)
-{
-  unBogus();
-  return doReplace(0, length(), &srcChar, 0, 1);
-}
-
-inline UnicodeString&
-UnicodeString::setTo(UChar32 srcChar)
-{
-  unBogus();
-  return replace(0, length(), srcChar);
-}
-
-inline UnicodeString&
-UnicodeString::append(const UnicodeString& srcText,
-              int32_t srcStart,
-              int32_t srcLength)
-{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
-
-inline UnicodeString&
-UnicodeString::append(const UnicodeString& srcText)
-{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
-
-inline UnicodeString&
-UnicodeString::append(const UChar *srcChars,
-              int32_t srcStart,
-              int32_t srcLength)
-{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
-
-inline UnicodeString&
-UnicodeString::append(const UChar *srcChars,
-              int32_t srcLength)
-{ return doReplace(length(), 0, srcChars, 0, srcLength); }
-
-inline UnicodeString&
-UnicodeString::append(UChar srcChar)
-{ return doReplace(length(), 0, &srcChar, 0, 1); }
-
-inline UnicodeString&
-UnicodeString::append(UChar32 srcChar) {
-  UChar buffer[U16_MAX_LENGTH];
-  int32_t _length = 0;
-  UBool isError = FALSE;
-  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
-  return doReplace(length(), 0, buffer, 0, _length);
-}
-
-inline UnicodeString&
-UnicodeString::operator+= (UChar ch)
-{ return doReplace(length(), 0, &ch, 0, 1); }
-
-inline UnicodeString&
-UnicodeString::operator+= (UChar32 ch) {
-  return append(ch);
-}
-
-inline UnicodeString&
-UnicodeString::operator+= (const UnicodeString& srcText)
-{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
-
-inline UnicodeString&
-UnicodeString::insert(int32_t start,
-              const UnicodeString& srcText,
-              int32_t srcStart,
-              int32_t srcLength)
-{ return doReplace(start, 0, srcText, srcStart, srcLength); }
-
-inline UnicodeString&
-UnicodeString::insert(int32_t start,
-              const UnicodeString& srcText)
-{ return doReplace(start, 0, srcText, 0, srcText.length()); }
-
-inline UnicodeString&
-UnicodeString::insert(int32_t start,
-              const UChar *srcChars,
-              int32_t srcStart,
-              int32_t srcLength)
-{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
-
-inline UnicodeString&
-UnicodeString::insert(int32_t start,
-              const UChar *srcChars,
-              int32_t srcLength)
-{ return doReplace(start, 0, srcChars, 0, srcLength); }
-
-inline UnicodeString&
-UnicodeString::insert(int32_t start,
-              UChar srcChar)
-{ return doReplace(start, 0, &srcChar, 0, 1); }
-
-inline UnicodeString&
-UnicodeString::insert(int32_t start,
-              UChar32 srcChar)
-{ return replace(start, 0, srcChar); }
-
-
-inline UnicodeString&
-UnicodeString::remove()
-{
-  // remove() of a bogus string makes the string empty and non-bogus
-  if(isBogus()) {
-    unBogus();
-  } else {
-    setLength(0);
-  }
-  return *this;
-}
-
-inline UnicodeString&
-UnicodeString::remove(int32_t start,
-             int32_t _length)
-{
-    if(start <= 0 && _length == INT32_MAX) {
-        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
-        return remove();
-    }
-    return doReplace(start, _length, NULL, 0, 0);
-}
-
-inline UnicodeString&
-UnicodeString::removeBetween(int32_t start,
-                int32_t limit)
-{ return doReplace(start, limit - start, NULL, 0, 0); }
-
-inline UBool
-UnicodeString::truncate(int32_t targetLength)
-{
-  if(isBogus() && targetLength == 0) {
-    // truncate(0) of a bogus string makes the string empty and non-bogus
-    unBogus();
-    return FALSE;
-  } else if((uint32_t)targetLength < (uint32_t)length()) {
-    setLength(targetLength);
-    return TRUE;
-  } else {
-    return FALSE;
-  }
-}
-
-inline UnicodeString&
-UnicodeString::reverse()
-{ return doReverse(0, length()); }
-
-inline UnicodeString&
-UnicodeString::reverse(int32_t start,
-               int32_t _length)
-{ return doReverse(start, _length); }
-
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/unistr.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/unistr.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/unistr.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/unistr.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,4230 @@
+/*
+**********************************************************************
+*   Copyright (C) 1998-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+* File unistr.h
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   09/25/98    stephen     Creation.
+*   11/11/98    stephen     Changed per 11/9 code review.
+*   04/20/99    stephen     Overhauled per 4/16 code review.
+*   11/18/99    aliu        Made to inherit from Replaceable.  Added method
+*                           handleReplaceBetween(); other methods unchanged.
+*   06/25/01    grhoten     Remove dependency on iostream.
+******************************************************************************
+*/
+
+#ifndef UNISTR_H
+#define UNISTR_H
+
+/**
+ * \file 
+ * \brief C++ API: Unicode String 
+ */
+
+#include "unicode/rep.h"
+
+struct UConverter;          // unicode/ucnv.h
+class  StringThreadTest;
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also ustring.h and unorm.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER  0x8000
+#endif
+
+#ifndef USTRING_H
+/**
+ * \ingroup ustring_ustrlen
+ */
+U_STABLE int32_t U_EXPORT2
+u_strlen(const UChar *s);
+#endif
+
+U_NAMESPACE_BEGIN
+
+class Locale;               // unicode/locid.h
+class StringCharacterIterator;
+class BreakIterator;        // unicode/brkiter.h
+
+/* The <iostream> include has been moved to unicode/ustream.h */
+
+/**
+ * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+ * which constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * @stable ICU 3.2
+ */
+#define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
+
+/**
+ * Unicode String literals in C++.
+ * Dependent on the platform properties, different UnicodeString
+ * constructors should be used to create a UnicodeString object from
+ * a string literal.
+ * The macros are defined for maximum performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * <code>NUL</code>, must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ * @stable ICU 2.0
+ */
+#if defined(U_DECLARE_UTF16)
+#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
+#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
+#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
+#else
+#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
+#endif
+
+/**
+ * Unicode String literals in C++.
+ * Dependent on the platform properties, different UnicodeString
+ * constructors should be used to create a UnicodeString object from
+ * a string literal.
+ * The macros are defined for improved performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * @stable ICU 2.0
+ */
+#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
+
+/**
+ * UnicodeString is a string class that stores Unicode characters directly and provides
+ * similar functionality as the Java String and StringBuffer classes.
+ * It is a concrete implementation of the abstract class Replaceable (for transliteration).
+ *
+ * The UnicodeString class is not suitable for subclassing.
+ *
+ * <p>For an overview of Unicode strings in C and C++ see the
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ *
+ * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
+ * A Unicode character may be stored with either one code unit
+ * (the most common case) or with a matched pair of special code units
+ * ("surrogates"). The data type for code units is UChar. 
+ * For single-character handling, a Unicode character code <em>point</em> is a value
+ * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
+ *
+ * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
+ * This is the same as with multi-byte char* strings in traditional string handling.
+ * Operations on partial strings typically do not test for code point boundaries.
+ * If necessary, the user needs to take care of such boundaries by testing for the code unit
+ * values or by using functions like
+ * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
+ * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
+ *
+ * UnicodeString methods are more lenient with regard to input parameter values
+ * than other ICU APIs. In particular:
+ * - If indexes are out of bounds for a UnicodeString object
+ *   (<0 or >length()) then they are "pinned" to the nearest boundary.
+ * - If primitive string pointer values (e.g., const UChar * or char *)
+ *   for input strings are NULL, then those input string parameters are treated
+ *   as if they pointed to an empty string.
+ *   However, this is <em>not</em> the case for char * parameters for charset names
+ *   or other IDs.
+ * - Most UnicodeString methods do not take a UErrorCode parameter because
+ *   there are usually very few opportunities for failure other than a shortage
+ *   of memory, error codes in low-level C++ string methods would be inconvenient,
+ *   and the error code as the last parameter (ICU convention) would prevent
+ *   the use of default parameter values.
+ *   Instead, such methods set the UnicodeString into a "bogus" state
+ *   (see isBogus()) if an error occurs.
+ *
+ * In string comparisons, two UnicodeString objects that are both "bogus"
+ * compare equal (to be transitive and prevent endless loops in sorting),
+ * and a "bogus" string compares less than any non-"bogus" one.
+ *
+ * Const UnicodeString methods are thread-safe. Multiple threads can use
+ * const methods on the same UnicodeString object simultaneously,
+ * but non-const methods must not be called concurrently (in multiple threads)
+ * with any other (const or non-const) methods.
+ *
+ * Similarly, const UnicodeString & parameters are thread-safe.
+ * One object may be passed in as such a parameter concurrently in multiple threads.
+ * This includes the const UnicodeString & parameters for
+ * copy construction, assignment, and cloning.
+ *
+ * <p>UnicodeString uses several storage methods.
+ * String contents can be stored inside the UnicodeString object itself,
+ * in an allocated and shared buffer, or in an outside buffer that is "aliased".
+ * Most of this is done transparently, but careful aliasing in particular provides
+ * significant performance improvements.
+ * Also, the internal buffer is accessible via special functions.
+ * For details see the
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ *
+ * @see utf.h
+ * @see CharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeString : public Replaceable
+{
+public:
+
+  /**
+   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+   * which constructs a Unicode string from an invariant-character char * string.
+   * Use the macro US_INV instead of the full qualification for this value.
+   *
+   * @see US_INV
+   * @stable ICU 3.2
+   */
+  enum EInvariant {
+    /**
+     * @see EInvariant
+     * @stable ICU 3.2
+     */
+    kInvariant
+  };
+
+  //========================================
+  // Read-only operations
+  //========================================
+
+  /* Comparison - bitwise only - for international comparison use collation */
+
+  /**
+   * Equality operator. Performs only bitwise comparison.
+   * @param text The UnicodeString to compare to this one.
+   * @return TRUE if <TT>text</TT> contains the same characters as this one,
+   * FALSE otherwise.
+   * @stable ICU 2.0
+   */
+  inline UBool operator== (const UnicodeString& text) const;
+
+  /**
+   * Inequality operator. Performs only bitwise comparison.
+   * @param text The UnicodeString to compare to this one.
+   * @return FALSE if <TT>text</TT> contains the same characters as this one,
+   * TRUE otherwise.
+   * @stable ICU 2.0
+   */
+  inline UBool operator!= (const UnicodeString& text) const;
+
+  /**
+   * Greater than operator. Performs only bitwise comparison.
+   * @param text The UnicodeString to compare to this one.
+   * @return TRUE if the characters in this are bitwise
+   * greater than the characters in <code>text</code>, FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool operator> (const UnicodeString& text) const;
+
+  /**
+   * Less than operator. Performs only bitwise comparison.
+   * @param text The UnicodeString to compare to this one.
+   * @return TRUE if the characters in this are bitwise
+   * less than the characters in <code>text</code>, FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool operator< (const UnicodeString& text) const;
+
+  /**
+   * Greater than or equal operator. Performs only bitwise comparison.
+   * @param text The UnicodeString to compare to this one.
+   * @return TRUE if the characters in this are bitwise
+   * greater than or equal to the characters in <code>text</code>, FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool operator>= (const UnicodeString& text) const;
+
+  /**
+   * Less than or equal operator. Performs only bitwise comparison.
+   * @param text The UnicodeString to compare to this one.
+   * @return TRUE if the characters in this are bitwise
+   * less than or equal to the characters in <code>text</code>, FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool operator<= (const UnicodeString& text) const;
+
+  /**
+   * Compare the characters bitwise in this UnicodeString to
+   * the characters in <code>text</code>.
+   * @param text The UnicodeString to compare to this one.
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>text</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>text</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>text</code>.
+   * @stable ICU 2.0
+   */
+  inline int8_t compare(const UnicodeString& text) const;
+
+  /**
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+   * in <TT>text</TT>
+   * @param start the offset at which the compare operation begins
+   * @param length the number of characters of text to compare.
+   * @param text the other text to be compared against this string.
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>text</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>text</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>text</code>.
+   * @stable ICU 2.0
+   */
+  inline int8_t compare(int32_t start,
+         int32_t length,
+         const UnicodeString& text) const;
+
+  /**
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * @param start the offset at which the compare operation begins
+   * @param length the number of characters in this to compare.
+   * @param srcText the text to be compared
+   * @param srcStart the offset into <TT>srcText</TT> to start comparison
+   * @param srcLength the number of characters in <TT>src</TT> to compare
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcText</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcText</code>.
+   * @stable ICU 2.0
+   */
+   inline int8_t compare(int32_t start,
+         int32_t length,
+         const UnicodeString& srcText,
+         int32_t srcStart,
+         int32_t srcLength) const;
+
+  /**
+   * Compare the characters bitwise in this UnicodeString with the first
+   * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
+   * @param srcChars The characters to compare to this UnicodeString.
+   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcChars</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcChars</code>.
+   * @stable ICU 2.0
+   */
+  inline int8_t compare(const UChar *srcChars,
+         int32_t srcLength) const;
+
+  /**
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the first
+   * <TT>length</TT> characters in <TT>srcChars</TT>
+   * @param start the offset at which the compare operation begins
+   * @param length the number of characters to compare.
+   * @param srcChars the characters to be compared
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcChars</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcChars</code>.
+   * @stable ICU 2.0
+   */
+  inline int8_t compare(int32_t start,
+         int32_t length,
+         const UChar *srcChars) const;
+
+  /**
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+   * in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * @param start the offset at which the compare operation begins
+   * @param length the number of characters in this to compare
+   * @param srcChars the characters to be compared
+   * @param srcStart the offset into <TT>srcChars</TT> to start comparison
+   * @param srcLength the number of characters in <TT>srcChars</TT> to compare
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcChars</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcChars</code>.
+   * @stable ICU 2.0
+   */
+  inline int8_t compare(int32_t start,
+         int32_t length,
+         const UChar *srcChars,
+         int32_t srcStart,
+         int32_t srcLength) const;
+
+  /**
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>limit</TT>) with the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
+   * @param start the offset at which the compare operation begins
+   * @param limit the offset immediately following the compare operation
+   * @param srcText the text to be compared
+   * @param srcStart the offset into <TT>srcText</TT> to start comparison
+   * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcText</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcText</code>.
+   * @stable ICU 2.0
+   */
+  inline int8_t compareBetween(int32_t start,
+            int32_t limit,
+            const UnicodeString& srcText,
+            int32_t srcStart,
+            int32_t srcLimit) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param text Another string to compare this one to.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcText Another string to compare this one to.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+  inline int8_t compareCodePointOrder(int32_t start,
+                                      int32_t length,
+                                      const UnicodeString& srcText) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcText Another string to compare this one to.
+   * @param srcStart The start offset in that string at which the compare operation begins.
+   * @param srcLength The number of code units from that string to compare.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+   inline int8_t compareCodePointOrder(int32_t start,
+                                       int32_t length,
+                                       const UnicodeString& srcText,
+                                       int32_t srcStart,
+                                       int32_t srcLength) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param srcChars A pointer to another string to compare this one to.
+   * @param srcLength The number of code units from that string to compare.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+  inline int8_t compareCodePointOrder(const UChar *srcChars,
+                                      int32_t srcLength) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcChars A pointer to another string to compare this one to.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+  inline int8_t compareCodePointOrder(int32_t start,
+                                      int32_t length,
+                                      const UChar *srcChars) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcChars A pointer to another string to compare this one to.
+   * @param srcStart The start offset in that string at which the compare operation begins.
+   * @param srcLength The number of code units from that string to compare.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+  inline int8_t compareCodePointOrder(int32_t start,
+                                      int32_t length,
+                                      const UChar *srcChars,
+                                      int32_t srcStart,
+                                      int32_t srcLength) const;
+
+  /**
+   * Compare two Unicode strings in code point order.
+   * The result may be different from the results of compare(), operator<, etc.
+   * if supplementary characters are present:
+   *
+   * In UTF-16, supplementary characters (with code points U+10000 and above) are
+   * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+   * which means that they compare as less than some other BMP characters like U+feff.
+   * This function compares Unicode strings in code point order.
+   * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param limit The offset after the last code unit from this string to compare.
+   * @param srcText Another string to compare this one to.
+   * @param srcStart The start offset in that string at which the compare operation begins.
+   * @param srcLimit The offset after the last code unit from that string to compare.
+   * @return a negative/zero/positive integer corresponding to whether
+   * this string is less than/equal to/greater than the second one
+   * in code point order
+   * @stable ICU 2.0
+   */
+  inline int8_t compareCodePointOrderBetween(int32_t start,
+                                             int32_t limit,
+                                             const UnicodeString& srcText,
+                                             int32_t srcStart,
+                                             int32_t srcLimit) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
+   *
+   * @param text Another string to compare this one to.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcText Another string to compare this one to.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompare(int32_t start,
+         int32_t length,
+         const UnicodeString& srcText,
+         uint32_t options) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcText Another string to compare this one to.
+   * @param srcStart The start offset in that string at which the compare operation begins.
+   * @param srcLength The number of code units from that string to compare.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompare(int32_t start,
+         int32_t length,
+         const UnicodeString& srcText,
+         int32_t srcStart,
+         int32_t srcLength,
+         uint32_t options) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+   *
+   * @param srcChars A pointer to another string to compare this one to.
+   * @param srcLength The number of code units from that string to compare.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompare(const UChar *srcChars,
+         int32_t srcLength,
+         uint32_t options) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcChars A pointer to another string to compare this one to.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompare(int32_t start,
+         int32_t length,
+         const UChar *srcChars,
+         uint32_t options) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param length The number of code units from this string to compare.
+   * @param srcChars A pointer to another string to compare this one to.
+   * @param srcStart The start offset in that string at which the compare operation begins.
+   * @param srcLength The number of code units from that string to compare.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompare(int32_t start,
+         int32_t length,
+         const UChar *srcChars,
+         int32_t srcStart,
+         int32_t srcLength,
+         uint32_t options) const;
+
+  /**
+   * Compare two strings case-insensitively using full case folding.
+   * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
+   *
+   * @param start The start offset in this string at which the compare operation begins.
+   * @param limit The offset after the last code unit from this string to compare.
+   * @param srcText Another string to compare this one to.
+   * @param srcStart The start offset in that string at which the compare operation begins.
+   * @param srcLimit The offset after the last code unit from that string to compare.
+   * @param options A bit set of options:
+   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+   *     Comparison in code unit order with default case folding.
+   *
+   *   - U_COMPARE_CODE_POINT_ORDER
+   *     Set to choose code point order instead of code unit order
+   *     (see u_strCompare for details).
+   *
+   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   *
+   * @return A negative, zero, or positive integer indicating the comparison result.
+   * @stable ICU 2.0
+   */
+  inline int8_t caseCompareBetween(int32_t start,
+            int32_t limit,
+            const UnicodeString& srcText,
+            int32_t srcStart,
+            int32_t srcLimit,
+            uint32_t options) const;
+
+  /**
+   * Determine if this starts with the characters in <TT>text</TT>
+   * @param text The text to match.
+   * @return TRUE if this starts with the characters in <TT>text</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool startsWith(const UnicodeString& text) const;
+
+  /**
+   * Determine if this starts with the characters in <TT>srcText</TT>
+   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * @param srcText The text to match.
+   * @param srcStart the offset into <TT>srcText</TT> to start matching
+   * @param srcLength the number of characters in <TT>srcText</TT> to match
+   * @return TRUE if this starts with the characters in <TT>text</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool startsWith(const UnicodeString& srcText,
+            int32_t srcStart,
+            int32_t srcLength) const;
+
+  /**
+   * Determine if this starts with the characters in <TT>srcChars</TT>
+   * @param srcChars The characters to match.
+   * @param srcLength the number of characters in <TT>srcChars</TT>
+   * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool startsWith(const UChar *srcChars,
+            int32_t srcLength) const;
+
+  /**
+   * Determine if this ends with the characters in <TT>srcChars</TT>
+   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * @param srcChars The characters to match.
+   * @param srcStart the offset into <TT>srcText</TT> to start matching
+   * @param srcLength the number of characters in <TT>srcChars</TT> to match
+   * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool startsWith(const UChar *srcChars,
+            int32_t srcStart,
+            int32_t srcLength) const;
+
+  /**
+   * Determine if this ends with the characters in <TT>text</TT>
+   * @param text The text to match.
+   * @return TRUE if this ends with the characters in <TT>text</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool endsWith(const UnicodeString& text) const;
+
+  /**
+   * Determine if this ends with the characters in <TT>srcText</TT>
+   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * @param srcText The text to match.
+   * @param srcStart the offset into <TT>srcText</TT> to start matching
+   * @param srcLength the number of characters in <TT>srcText</TT> to match
+   * @return TRUE if this ends with the characters in <TT>text</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool endsWith(const UnicodeString& srcText,
+          int32_t srcStart,
+          int32_t srcLength) const;
+
+  /**
+   * Determine if this ends with the characters in <TT>srcChars</TT>
+   * @param srcChars The characters to match.
+   * @param srcLength the number of characters in <TT>srcChars</TT>
+   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool endsWith(const UChar *srcChars,
+          int32_t srcLength) const;
+
+  /**
+   * Determine if this ends with the characters in <TT>srcChars</TT>
+   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * @param srcChars The characters to match.
+   * @param srcStart the offset into <TT>srcText</TT> to start matching
+   * @param srcLength the number of characters in <TT>srcChars</TT> to match
+   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
+   * FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool endsWith(const UChar *srcChars,
+          int32_t srcStart,
+          int32_t srcLength) const;
+
+
+  /* Searching - bitwise only */
+
+  /**
+   * Locate in this the first occurrence of the characters in <TT>text</TT>,
+   * using bitwise comparison.
+   * @param text The text to search for.
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(const UnicodeString& text) const;
+
+  /**
+   * Locate in this the first occurrence of the characters in <TT>text</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   * @param text The text to search for.
+   * @param start The offset at which searching will start.
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(const UnicodeString& text,
+              int32_t start) const;
+
+  /**
+   * Locate in this the first occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>text</TT>, using bitwise comparison.
+   * @param text The text to search for.
+   * @param start The offset at which searching will start.
+   * @param length The number of characters to search
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(const UnicodeString& text,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the first occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   *  in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+   * using bitwise comparison.
+   * @param srcText The text to search for.
+   * @param srcStart the offset into <TT>srcText</TT> at which
+   * to start matching
+   * @param srcLength the number of characters in <TT>srcText</TT> to match
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(const UnicodeString& srcText,
+              int32_t srcStart,
+              int32_t srcLength,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the first occurrence of the characters in
+   * <TT>srcChars</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   * @param srcChars The text to search for.
+   * @param srcLength the number of characters in <TT>srcChars</TT> to match
+   * @param start the offset into this at which to start matching
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(const UChar *srcChars,
+              int32_t srcLength,
+              int32_t start) const;
+
+  /**
+   * Locate in this the first occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcChars</TT>, using bitwise comparison.
+   * @param srcChars The text to search for.
+   * @param srcLength the number of characters in <TT>srcChars</TT>
+   * @param start The offset at which searching will start.
+   * @param length The number of characters to search
+   * @return The offset into this of the start of <TT>srcChars</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(const UChar *srcChars,
+              int32_t srcLength,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the first occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+   * using bitwise comparison.
+   * @param srcChars The text to search for.
+   * @param srcStart the offset into <TT>srcChars</TT> at which
+   * to start matching
+   * @param srcLength the number of characters in <TT>srcChars</TT> to match
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  int32_t indexOf(const UChar *srcChars,
+              int32_t srcStart,
+              int32_t srcLength,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the first occurrence of the BMP code point <code>c</code>,
+   * using bitwise comparison.
+   * @param c The code unit to search for.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(UChar c) const;
+
+  /**
+   * Locate in this the first occurrence of the code point <TT>c</TT>,
+   * using bitwise comparison.
+   *
+   * @param c The code point to search for.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(UChar32 c) const;
+
+  /**
+   * Locate in this the first occurrence of the BMP code point <code>c</code>,
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   * @param c The code unit to search for.
+   * @param start The offset at which searching will start.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(UChar c,
+              int32_t start) const;
+
+  /**
+   * Locate in this the first occurrence of the code point <TT>c</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   *
+   * @param c The code point to search for.
+   * @param start The offset at which searching will start.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(UChar32 c,
+              int32_t start) const;
+
+  /**
+   * Locate in this the first occurrence of the BMP code point <code>c</code>
+   * in the range [<TT>start</TT>, <TT>start + length</TT>),
+   * using bitwise comparison.
+   * @param c The code unit to search for.
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(UChar c,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the first occurrence of the code point <TT>c</TT>
+   * in the range [<TT>start</TT>, <TT>start + length</TT>),
+   * using bitwise comparison.
+   *
+   * @param c The code point to search for.
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t indexOf(UChar32 c,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the last occurrence of the characters in <TT>text</TT>,
+   * using bitwise comparison.
+   * @param text The text to search for.
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(const UnicodeString& text) const;
+
+  /**
+   * Locate in this the last occurrence of the characters in <TT>text</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   * @param text The text to search for.
+   * @param start The offset at which searching will start.
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(const UnicodeString& text,
+              int32_t start) const;
+
+  /**
+   * Locate in this the last occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>text</TT>, using bitwise comparison.
+   * @param text The text to search for.
+   * @param start The offset at which searching will start.
+   * @param length The number of characters to search
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(const UnicodeString& text,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the last occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+   * using bitwise comparison.
+   * @param srcText The text to search for.
+   * @param srcStart the offset into <TT>srcText</TT> at which
+   * to start matching
+   * @param srcLength the number of characters in <TT>srcText</TT> to match
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(const UnicodeString& srcText,
+              int32_t srcStart,
+              int32_t srcLength,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   * @param srcChars The text to search for.
+   * @param srcLength the number of characters in <TT>srcChars</TT> to match
+   * @param start the offset into this at which to start matching
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(const UChar *srcChars,
+              int32_t srcLength,
+              int32_t start) const;
+
+  /**
+   * Locate in this the last occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcChars</TT>, using bitwise comparison.
+   * @param srcChars The text to search for.
+   * @param srcLength the number of characters in <TT>srcChars</TT>
+   * @param start The offset at which searching will start.
+   * @param length The number of characters to search
+   * @return The offset into this of the start of <TT>srcChars</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(const UChar *srcChars,
+              int32_t srcLength,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the last occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+   * using bitwise comparison.
+   * @param srcChars The text to search for.
+   * @param srcStart the offset into <TT>srcChars</TT> at which
+   * to start matching
+   * @param srcLength the number of characters in <TT>srcChars</TT> to match
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of the start of <TT>text</TT>,
+   * or -1 if not found.
+   * @stable ICU 2.0
+   */
+  int32_t lastIndexOf(const UChar *srcChars,
+              int32_t srcStart,
+              int32_t srcLength,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the last occurrence of the BMP code point <code>c</code>,
+   * using bitwise comparison.
+   * @param c The code unit to search for.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(UChar c) const;
+
+  /**
+   * Locate in this the last occurrence of the code point <TT>c</TT>,
+   * using bitwise comparison.
+   *
+   * @param c The code point to search for.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(UChar32 c) const;
+
+  /**
+   * Locate in this the last occurrence of the BMP code point <code>c</code>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   * @param c The code unit to search for.
+   * @param start The offset at which searching will start.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(UChar c,
+              int32_t start) const;
+
+  /**
+   * Locate in this the last occurrence of the code point <TT>c</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
+   *
+   * @param c The code point to search for.
+   * @param start The offset at which searching will start.
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(UChar32 c,
+              int32_t start) const;
+
+  /**
+   * Locate in this the last occurrence of the BMP code point <code>c</code>
+   * in the range [<TT>start</TT>, <TT>start + length</TT>),
+   * using bitwise comparison.
+   * @param c The code unit to search for.
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(UChar c,
+              int32_t start,
+              int32_t length) const;
+
+  /**
+   * Locate in this the last occurrence of the code point <TT>c</TT>
+   * in the range [<TT>start</TT>, <TT>start + length</TT>),
+   * using bitwise comparison.
+   *
+   * @param c The code point to search for.
+   * @param start the offset into this at which to start matching
+   * @param length the number of characters in this to search
+   * @return The offset into this of <TT>c</TT>, or -1 if not found.
+   * @stable ICU 2.0
+   */
+  inline int32_t lastIndexOf(UChar32 c,
+              int32_t start,
+              int32_t length) const;
+
+
+  /* Character access */
+
+  /**
+   * Return the code unit at offset <tt>offset</tt>.
+   * If the offset is not valid (0..length()-1) then U+ffff is returned.
+   * @param offset a valid offset into the text
+   * @return the code unit at offset <tt>offset</tt>
+   *         or 0xffff if the offset is not valid for this string
+   * @stable ICU 2.0
+   */
+  inline UChar charAt(int32_t offset) const;
+
+  /**
+   * Return the code unit at offset <tt>offset</tt>.
+   * If the offset is not valid (0..length()-1) then U+ffff is returned.
+   * @param offset a valid offset into the text
+   * @return the code unit at offset <tt>offset</tt>
+   * @stable ICU 2.0
+   */
+  inline UChar operator[] (int32_t offset) const;
+
+  /**
+   * Return the code point that contains the code unit
+   * at offset <tt>offset</tt>.
+   * If the offset is not valid (0..length()-1) then U+ffff is returned.
+   * @param offset a valid offset into the text
+   * that indicates the text offset of any of the code units
+   * that will be assembled into a code point (21-bit value) and returned
+   * @return the code point of text at <tt>offset</tt>
+   *         or 0xffff if the offset is not valid for this string
+   * @stable ICU 2.0
+   */
+  inline UChar32 char32At(int32_t offset) const;
+
+  /**
+   * Adjust a random-access offset so that
+   * it points to the beginning of a Unicode character.
+   * The offset that is passed in points to
+   * any code unit of a code point,
+   * while the returned offset will point to the first code unit
+   * of the same code point.
+   * In UTF-16, if the input offset points to a second surrogate
+   * of a surrogate pair, then the returned offset will point
+   * to the first surrogate.
+   * @param offset a valid offset into one code point of the text
+   * @return offset of the first code unit of the same code point
+   * @see U16_SET_CP_START
+   * @stable ICU 2.0
+   */
+  inline int32_t getChar32Start(int32_t offset) const;
+
+  /**
+   * Adjust a random-access offset so that
+   * it points behind a Unicode character.
+   * The offset that is passed in points behind
+   * any code unit of a code point,
+   * while the returned offset will point behind the last code unit
+   * of the same code point.
+   * In UTF-16, if the input offset points behind the first surrogate
+   * (i.e., to the second surrogate)
+   * of a surrogate pair, then the returned offset will point
+   * behind the second surrogate (i.e., to the first surrogate).
+   * @param offset a valid offset after any code unit of a code point of the text
+   * @return offset of the first code unit after the same code point
+   * @see U16_SET_CP_LIMIT
+   * @stable ICU 2.0
+   */
+  inline int32_t getChar32Limit(int32_t offset) const;
+
+  /**
+   * Move the code unit index along the string by delta code points.
+   * Interpret the input index as a code unit-based offset into the string,
+   * move the index forward or backward by delta code points, and
+   * return the resulting index.
+   * The input index should point to the first code unit of a code point,
+   * if there is more than one.
+   *
+   * Both input and output indexes are code unit-based as for all
+   * string indexes/offsets in ICU (and other libraries, like MBCS char*).
+   * If delta<0 then the index is moved backward (toward the start of the string).
+   * If delta>0 then the index is moved forward (toward the end of the string).
+   *
+   * This behaves like CharacterIterator::move32(delta, kCurrent).
+   *
+   * Behavior for out-of-bounds indexes:
+   * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
+   * if the input index<0 then it is pinned to 0;
+   * if it is index>length() then it is pinned to length().
+   * Afterwards, the index is moved by <code>delta</code> code points
+   * forward or backward,
+   * but no further backward than to 0 and no further forward than to length().
+   * The resulting index return value will be in between 0 and length(), inclusively.
+   *
+   * Examples:
+   * <pre>
+   * // s has code points 'a' U+10000 'b' U+10ffff U+2029
+   * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
+   *
+   * // initial index: position of U+10000
+   * int32_t index=1;
+   *
+   * // the following examples will all result in index==4, position of U+10ffff
+   *
+   * // skip 2 code points from some position in the string
+   * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
+   *
+   * // go to the 3rd code point from the start of s (0-based)
+   * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
+   *
+   * // go to the next-to-last code point of s
+   * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
+   * </pre>
+   *
+   * @param index input code unit index
+   * @param delta (signed) code point count to move the index forward or backward
+   *        in the string
+   * @return the resulting code unit index
+   * @stable ICU 2.0
+   */
+  int32_t moveIndex32(int32_t index, int32_t delta) const;
+
+  /* Substring extraction */
+
+  /**
+   * Copy the characters in the range
+   * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
+   * beginning at <tt>dstStart</tt>.
+   * If the string aliases to <code>dst</code> itself as an external buffer,
+   * then extract() will not copy the contents.
+   *
+   * @param start offset of first character which will be copied into the array
+   * @param length the number of characters to extract
+   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
+   * must be at least (<tt>dstStart + length</tt>).
+   * @param dstStart the offset in <TT>dst</TT> where the first character
+   * will be extracted
+   * @stable ICU 2.0
+   */
+  inline void extract(int32_t start,
+           int32_t length,
+           UChar *dst,
+           int32_t dstStart = 0) const;
+
+  /**
+   * Copy the contents of the string into dest.
+   * This is a convenience function that
+   * checks if there is enough space in dest,
+   * extracts the entire string if possible,
+   * and NUL-terminates dest if possible.
+   *
+   * If the string fits into dest but cannot be NUL-terminated
+   * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
+   * If the string itself does not fit into dest
+   * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+   *
+   * If the string aliases to <code>dest</code> itself as an external buffer,
+   * then extract() will not copy the contents.
+   *
+   * @param dest Destination string buffer.
+   * @param destCapacity Number of UChars available at dest.
+   * @param errorCode ICU error code.
+   * @return length()
+   * @stable ICU 2.0
+   */
+  int32_t
+  extract(UChar *dest, int32_t destCapacity,
+          UErrorCode &errorCode) const;
+
+  /**
+   * Copy the characters in the range
+   * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
+   * <tt>target</tt>.
+   * @param start offset of first character which will be copied
+   * @param length the number of characters to extract
+   * @param target UnicodeString into which to copy characters.
+   * @return A reference to <TT>target</TT>
+   * @stable ICU 2.0
+   */
+  inline void extract(int32_t start,
+           int32_t length,
+           UnicodeString& target) const;
+
+  /**
+   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
+   * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
+   * @param start offset of first character which will be copied into the array
+   * @param limit offset immediately following the last character to be copied
+   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
+   * must be at least (<tt>dstStart + (limit - start)</tt>).
+   * @param dstStart the offset in <TT>dst</TT> where the first character
+   * will be extracted
+   * @stable ICU 2.0
+   */
+  inline void extractBetween(int32_t start,
+              int32_t limit,
+              UChar *dst,
+              int32_t dstStart = 0) const;
+
+  /**
+   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
+   * into the UnicodeString <tt>target</tt>.  Replaceable API.
+   * @param start offset of first character which will be copied
+   * @param limit offset immediately following the last character to be copied
+   * @param target UnicodeString into which to copy characters.
+   * @return A reference to <TT>target</TT>
+   * @stable ICU 2.0
+   */
+  virtual void extractBetween(int32_t start,
+              int32_t limit,
+              UnicodeString& target) const;
+
+  /**
+   * Copy the characters in the range 
+   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
+   * All characters must be invariant (see utypes.h).
+   * Use US_INV as the last, signature-distinguishing parameter.
+   *
+   * This function does not write any more than <code>targetLength</code>
+   * characters but returns the length of the entire output string
+   * so that one can allocate a larger buffer and call the function again
+   * if necessary.
+   * The output string is NUL-terminated if possible.
+   *
+   * @param start offset of first character which will be copied
+   * @param startLength the number of characters to extract
+   * @param target the target buffer for extraction, can be NULL
+   *               if targetLength is 0
+   * @param targetCapacity the length of the target buffer
+   * @param inv Signature-distinguishing paramater, use US_INV.
+   * @return the output string length, not including the terminating NUL
+   * @stable ICU 3.2
+   */
+  int32_t extract(int32_t start,
+           int32_t startLength,
+           char *target,
+           int32_t targetCapacity,
+           enum EInvariant inv) const;
+
+#if !UCONFIG_NO_CONVERSION
+
+  /**
+   * Copy the characters in the range
+   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+   * in a specified codepage.
+   * The output string is NUL-terminated.
+   *
+   * Recommendation: For invariant-character strings use
+   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
+   * @param start offset of first character which will be copied
+   * @param startLength the number of characters to extract
+   * @param target the target buffer for extraction
+   * @param codepage the desired codepage for the characters.  0 has
+   * the special meaning of the default codepage
+   * If <code>codepage</code> is an empty string (<code>""</code>),
+   * then a simple conversion is performed on the codepage-invariant
+   * subset ("invariant characters") of the platform encoding. See utypes.h.
+   * If <TT>target</TT> is NULL, then the number of bytes required for
+   * <TT>target</TT> is returned. It is assumed that the target is big enough
+   * to fit all of the characters.
+   * @return the output string length, not including the terminating NUL
+   * @stable ICU 2.0
+   */
+  inline int32_t extract(int32_t start,
+                 int32_t startLength,
+                 char *target,
+                 const char *codepage = 0) const;
+
+  /**
+   * Copy the characters in the range
+   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+   * in a specified codepage.
+   * This function does not write any more than <code>targetLength</code>
+   * characters but returns the length of the entire output string
+   * so that one can allocate a larger buffer and call the function again
+   * if necessary.
+   * The output string is NUL-terminated if possible.
+   *
+   * Recommendation: For invariant-character strings use
+   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
+   * @param start offset of first character which will be copied
+   * @param startLength the number of characters to extract
+   * @param target the target buffer for extraction
+   * @param targetLength the length of the target buffer
+   * @param codepage the desired codepage for the characters.  0 has
+   * the special meaning of the default codepage
+   * If <code>codepage</code> is an empty string (<code>""</code>),
+   * then a simple conversion is performed on the codepage-invariant
+   * subset ("invariant characters") of the platform encoding. See utypes.h.
+   * If <TT>target</TT> is NULL, then the number of bytes required for
+   * <TT>target</TT> is returned.
+   * @return the output string length, not including the terminating NUL
+   * @stable ICU 2.0
+   */
+  int32_t extract(int32_t start,
+           int32_t startLength,
+           char *target,
+           uint32_t targetLength,
+           const char *codepage = 0) const;
+
+  /**
+   * Convert the UnicodeString into a codepage string using an existing UConverter.
+   * The output string is NUL-terminated if possible.
+   *
+   * This function avoids the overhead of opening and closing a converter if
+   * multiple strings are extracted.
+   *
+   * @param dest destination string buffer, can be NULL if destCapacity==0
+   * @param destCapacity the number of chars available at dest
+   * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
+   *        or NULL for the default converter
+   * @param errorCode normal ICU error code
+   * @return the length of the output string, not counting the terminating NUL;
+   *         if the length is greater than destCapacity, then the string will not fit
+   *         and a buffer of the indicated length would need to be passed in
+   * @stable ICU 2.0
+   */
+  int32_t extract(char *dest, int32_t destCapacity,
+                  UConverter *cnv,
+                  UErrorCode &errorCode) const;
+
+#endif
+
+  /* Length operations */
+
+  /**
+   * Return the length of the UnicodeString object.
+   * The length is the number of UChar code units are in the UnicodeString.
+   * If you want the number of code points, please use countChar32().
+   * @return the length of the UnicodeString object
+   * @see countChar32
+   * @stable ICU 2.0
+   */
+  inline int32_t length(void) const;
+
+  /**
+   * Count Unicode code points in the length UChar code units of the string.
+   * A code point may occupy either one or two UChar code units.
+   * Counting code points involves reading all code units.
+   *
+   * This functions is basically the inverse of moveIndex32().
+   *
+   * @param start the index of the first code unit to check
+   * @param length the number of UChar code units to check
+   * @return the number of code points in the specified code units
+   * @see length
+   * @stable ICU 2.0
+   */
+  int32_t
+  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
+
+  /**
+   * Check if the length UChar code units of the string
+   * contain more Unicode code points than a certain number.
+   * This is more efficient than counting all code points in this part of the string
+   * and comparing that number with a threshold.
+   * This function may not need to scan the string at all if the length
+   * falls within a certain range, and
+   * never needs to count more than 'number+1' code points.
+   * Logically equivalent to (countChar32(start, length)>number).
+   * A Unicode code point may occupy either one or two UChar code units.
+   *
+   * @param start the index of the first code unit to check (0 for the entire string)
+   * @param length the number of UChar code units to check
+   *               (use INT32_MAX for the entire string; remember that start/length
+   *                values are pinned)
+   * @param number The number of code points in the (sub)string is compared against
+   *               the 'number' parameter.
+   * @return Boolean value for whether the string contains more Unicode code points
+   *         than 'number'. Same as (u_countChar32(s, length)>number).
+   * @see countChar32
+   * @see u_strHasMoreChar32Than
+   * @stable ICU 2.4
+   */
+  UBool
+  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
+
+  /**
+   * Determine if this string is empty.
+   * @return TRUE if this string contains 0 characters, FALSE otherwise.
+   * @stable ICU 2.0
+   */
+  inline UBool isEmpty(void) const;
+
+  /**
+   * Return the capacity of the internal buffer of the UnicodeString object.
+   * This is useful together with the getBuffer functions.
+   * See there for details.
+   *
+   * @return the number of UChars available in the internal buffer
+   * @see getBuffer
+   * @stable ICU 2.0
+   */
+  inline int32_t getCapacity(void) const;
+
+  /* Other operations */
+
+  /**
+   * Generate a hash code for this object.
+   * @return The hash code of this UnicodeString.
+   * @stable ICU 2.0
+   */
+  inline int32_t hashCode(void) const;
+
+  /**
+   * Determine if this object contains a valid string.
+   * A bogus string has no value. It is different from an empty string.
+   * It can be used to indicate that no string value is available.
+   * getBuffer() and getTerminatedBuffer() return NULL, and
+   * length() returns 0.
+   *
+   * @return TRUE if the string is valid, FALSE otherwise
+   * @see setToBogus()
+   * @stable ICU 2.0
+   */
+  inline UBool isBogus(void) const;
+
+
+  //========================================
+  // Write operations
+  //========================================
+
+  /* Assignment operations */
+
+  /**
+   * Assignment operator.  Replace the characters in this UnicodeString
+   * with the characters from <TT>srcText</TT>.
+   * @param srcText The text containing the characters to replace
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString &operator=(const UnicodeString &srcText);
+
+  /**
+   * Almost the same as the assignment operator.
+   * Replace the characters in this UnicodeString
+   * with the characters from <code>srcText</code>.
+   *
+   * This function works the same for all strings except for ones that
+   * are readonly aliases.
+   * Starting with ICU 2.4, the assignment operator and the copy constructor
+   * allocate a new buffer and copy the buffer contents even for readonly aliases.
+   * This function implements the old, more efficient but less safe behavior
+   * of making this string also a readonly alias to the same buffer.
+   * The fastCopyFrom function must be used only if it is known that the lifetime of
+   * this UnicodeString is at least as long as the lifetime of the aliased buffer
+   * including its contents, for example for strings from resource bundles
+   * or aliases to string contents.
+   *
+   * @param src The text containing the characters to replace.
+   * @return a reference to this
+   * @stable ICU 2.4
+   */
+  UnicodeString &fastCopyFrom(const UnicodeString &src);
+
+  /**
+   * Assignment operator.  Replace the characters in this UnicodeString
+   * with the code unit <TT>ch</TT>.
+   * @param ch the code unit to replace
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& operator= (UChar ch);
+
+  /**
+   * Assignment operator.  Replace the characters in this UnicodeString
+   * with the code point <TT>ch</TT>.
+   * @param ch the code point to replace
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& operator= (UChar32 ch);
+
+  /**
+   * Set the text in the UnicodeString object to the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
+   * <TT>srcText</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
+   * will be obtained
+   * @return a reference to this
+   * @stable ICU 2.2
+   */
+  inline UnicodeString& setTo(const UnicodeString& srcText,
+               int32_t srcStart);
+
+  /**
+   * Set the text in the UnicodeString object to the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * <TT>srcText</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcText</TT> in the
+   * replace string.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& setTo(const UnicodeString& srcText,
+               int32_t srcStart,
+               int32_t srcLength);
+
+  /**
+   * Set the text in the UnicodeString object to the characters in
+   * <TT>srcText</TT>.
+   * <TT>srcText</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& setTo(const UnicodeString& srcText);
+
+  /**
+   * Set the characters in the UnicodeString object to the characters
+   * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
+   * @param srcChars the source for the new characters
+   * @param srcLength the number of Unicode characters in srcChars.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& setTo(const UChar *srcChars,
+               int32_t srcLength);
+
+  /**
+   * Set the characters in the UnicodeString object to the code unit
+   * <TT>srcChar</TT>.
+   * @param srcChar the code unit which becomes the UnicodeString's character
+   * content
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& setTo(UChar srcChar);
+
+  /**
+   * Set the characters in the UnicodeString object to the code point
+   * <TT>srcChar</TT>.
+   * @param srcChar the code point which becomes the UnicodeString's character
+   * content
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& setTo(UChar32 srcChar);
+
+  /**
+   * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
+   * The text will be used for the UnicodeString object, but
+   * it will not be released when the UnicodeString is destroyed.
+   * This has copy-on-write semantics:
+   * When the string is modified, then the buffer is first copied into
+   * newly allocated memory.
+   * The aliased buffer is never modified.
+   * In an assignment to another UnicodeString, the text will be aliased again,
+   * so that both strings then alias the same readonly-text.
+   *
+   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
+   *                     This must be true if <code>textLength==-1</code>.
+   * @param text The characters to alias for the UnicodeString.
+   * @param textLength The number of Unicode characters in <code>text</code> to alias.
+   *                   If -1, then this constructor will determine the length
+   *                   by calling <code>u_strlen()</code>.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString &setTo(UBool isTerminated,
+                       const UChar *text,
+                       int32_t textLength);
+
+  /**
+   * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
+   * The text will be used for the UnicodeString object, but
+   * it will not be released when the UnicodeString is destroyed.
+   * This has write-through semantics:
+   * For as long as the capacity of the buffer is sufficient, write operations
+   * will directly affect the buffer. When more capacity is necessary, then
+   * a new buffer will be allocated and the contents copied as with regularly
+   * constructed strings.
+   * In an assignment to another UnicodeString, the buffer will be copied.
+   * The extract(UChar *dst) function detects whether the dst pointer is the same
+   * as the string buffer itself and will in this case not copy the contents.
+   *
+   * @param buffer The characters to alias for the UnicodeString.
+   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
+   * @param buffCapacity The size of <code>buffer</code> in UChars.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString &setTo(UChar *buffer,
+                       int32_t buffLength,
+                       int32_t buffCapacity);
+
+  /**
+   * Make this UnicodeString object invalid.
+   * The string will test TRUE with isBogus().
+   *
+   * A bogus string has no value. It is different from an empty string.
+   * It can be used to indicate that no string value is available.
+   * getBuffer() and getTerminatedBuffer() return NULL, and
+   * length() returns 0.
+   *
+   * This utility function is used throughout the UnicodeString
+   * implementation to indicate that a UnicodeString operation failed,
+   * and may be used in other functions,
+   * especially but not exclusively when such functions do not
+   * take a UErrorCode for simplicity.
+   *
+   * The following methods, and no others, will clear a string object's bogus flag:
+   * - remove()
+   * - remove(0, INT32_MAX)
+   * - truncate(0)
+   * - operator=() (assignment operator)
+   * - setTo(...)
+   *
+   * The simplest ways to turn a bogus string into an empty one
+   * is to use the remove() function.
+   * Examples for other functions that are equivalent to "set to empty string":
+   * \code
+   * if(s.isBogus()) {
+   *   s.remove();           // set to an empty string (remove all), or
+   *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
+   *   s.truncate(0);        // set to an empty string (complete truncation), or
+   *   s=UnicodeString();    // assign an empty string, or
+   *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
+   *   static const UChar nul=0;
+   *   s.setTo(&nul, 0);     // set to an empty C Unicode string
+   * }
+   * \endcode
+   *
+   * @see isBogus()
+   * @stable ICU 2.0
+   */
+  void setToBogus();
+
+  /**
+   * Set the character at the specified offset to the specified character.
+   * @param offset A valid offset into the text of the character to set
+   * @param ch The new character
+   * @return A reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& setCharAt(int32_t offset,
+               UChar ch);
+
+
+  /* Append operations */
+
+  /**
+   * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
+   * object.
+   * @param ch the code unit to be appended
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+ inline  UnicodeString& operator+= (UChar ch);
+
+  /**
+   * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
+   * object.
+   * @param ch the code point to be appended
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+ inline  UnicodeString& operator+= (UChar32 ch);
+
+  /**
+   * Append operator. Append the characters in <TT>srcText</TT> to the
+   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is
+   * not modified.
+   * @param srcText the source for the new characters
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& operator+= (const UnicodeString& srcText);
+
+  /**
+   * Append the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
+   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
+   * is not modified.
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcText</TT> in
+   * the append string
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& append(const UnicodeString& srcText,
+            int32_t srcStart,
+            int32_t srcLength);
+
+  /**
+   * Append the characters in <TT>srcText</TT> to the UnicodeString object at
+   * offset <TT>start</TT>. <TT>srcText</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& append(const UnicodeString& srcText);
+
+  /**
+   * Append the characters in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
+   * object at offset
+   * <TT>start</TT>. <TT>srcChars</TT> is not modified.
+   * @param srcChars the source for the new characters
+   * @param srcStart the offset into <TT>srcChars</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcChars</TT> in
+   * the append string
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& append(const UChar *srcChars,
+            int32_t srcStart,
+            int32_t srcLength);
+
+  /**
+   * Append the characters in <TT>srcChars</TT> to the UnicodeString object
+   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
+   * @param srcChars the source for the new characters
+   * @param srcLength the number of Unicode characters in <TT>srcChars</TT>
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& append(const UChar *srcChars,
+            int32_t srcLength);
+
+  /**
+   * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
+   * @param srcChar the code unit to append
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& append(UChar srcChar);
+
+  /**
+   * Append the code point <TT>srcChar</TT> to the UnicodeString object.
+   * @param srcChar the code point to append
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& append(UChar32 srcChar);
+
+
+  /* Insert operations */
+
+  /**
+   * Insert the characters in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
+   * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
+   * @param start the offset where the insertion begins
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcText</TT> in
+   * the insert string
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& insert(int32_t start,
+            const UnicodeString& srcText,
+            int32_t srcStart,
+            int32_t srcLength);
+
+  /**
+   * Insert the characters in <TT>srcText</TT> into the UnicodeString object
+   * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
+   * @param start the offset where the insertion begins
+   * @param srcText the source for the new characters
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& insert(int32_t start,
+            const UnicodeString& srcText);
+
+  /**
+   * Insert the characters in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
+   *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
+   * @param start the offset at which the insertion begins
+   * @param srcChars the source for the new characters
+   * @param srcStart the offset into <TT>srcChars</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcChars</TT>
+   * in the insert string
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& insert(int32_t start,
+            const UChar *srcChars,
+            int32_t srcStart,
+            int32_t srcLength);
+
+  /**
+   * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
+   * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
+   * @param start the offset where the insertion begins
+   * @param srcChars the source for the new characters
+   * @param srcLength the number of Unicode characters in srcChars.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& insert(int32_t start,
+            const UChar *srcChars,
+            int32_t srcLength);
+
+  /**
+   * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
+   * offset <TT>start</TT>.
+   * @param start the offset at which the insertion occurs
+   * @param srcChar the code unit to insert
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& insert(int32_t start,
+            UChar srcChar);
+
+  /**
+   * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
+   * offset <TT>start</TT>.
+   * @param start the offset at which the insertion occurs
+   * @param srcChar the code point to insert
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& insert(int32_t start,
+            UChar32 srcChar);
+
+
+  /* Replace operations */
+
+  /**
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+   * <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+   * <TT>srcText</TT> is not modified.
+   * @param start the offset at which the replace operation begins
+   * @param length the number of characters to replace. The character at
+   * <TT>start + length</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcText</TT> in
+   * the replace string
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& replace(int32_t start,
+             int32_t length,
+             const UnicodeString& srcText,
+             int32_t srcStart,
+             int32_t srcLength);
+
+  /**
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>)
+   * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
+   *  not modified.
+   * @param start the offset at which the replace operation begins
+   * @param length the number of characters to replace. The character at
+   * <TT>start + length</TT> is not modified.
+   * @param srcText the source for the new characters
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& replace(int32_t start,
+             int32_t length,
+             const UnicodeString& srcText);
+
+  /**
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+   * <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
+   * is not modified.
+   * @param start the offset at which the replace operation begins
+   * @param length the number of characters to replace.  The character at
+   * <TT>start + length</TT> is not modified.
+   * @param srcChars the source for the new characters
+   * @param srcStart the offset into <TT>srcChars</TT> where new characters
+   * will be obtained
+   * @param srcLength the number of characters in <TT>srcChars</TT>
+   * in the replace string
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& replace(int32_t start,
+             int32_t length,
+             const UChar *srcChars,
+             int32_t srcStart,
+             int32_t srcLength);
+
+  /**
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+   * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
+   * @param start the offset at which the replace operation begins
+   * @param length number of characters to replace.  The character at
+   * <TT>start + length</TT> is not modified.
+   * @param srcChars the source for the new characters
+   * @param srcLength the number of Unicode characters in srcChars
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& replace(int32_t start,
+             int32_t length,
+             const UChar *srcChars,
+             int32_t srcLength);
+
+  /**
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
+   * <TT>srcChar</TT>.
+   * @param start the offset at which the replace operation begins
+   * @param length the number of characters to replace.  The character at
+   * <TT>start + length</TT> is not modified.
+   * @param srcChar the new code unit
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& replace(int32_t start,
+             int32_t length,
+             UChar srcChar);
+
+  /**
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the code point
+   * <TT>srcChar</TT>.
+   * @param start the offset at which the replace operation begins
+   * @param length the number of characters to replace.  The character at
+   * <TT>start + length</TT> is not modified.
+   * @param srcChar the new code point
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& replace(int32_t start,
+             int32_t length,
+             UChar32 srcChar);
+
+  /**
+   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
+   * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
+   * @param start the offset at which the replace operation begins
+   * @param limit the offset immediately following the replace range
+   * @param srcText the source for the new characters
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& replaceBetween(int32_t start,
+                int32_t limit,
+                const UnicodeString& srcText);
+
+  /**
+   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
+   * with the characters in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
+   * @param start the offset at which the replace operation begins
+   * @param limit the offset immediately following the replace range
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcChars</TT> where new characters
+   * will be obtained
+   * @param srcLimit the offset immediately following the range to copy
+   * in <TT>srcText</TT>
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& replaceBetween(int32_t start,
+                int32_t limit,
+                const UnicodeString& srcText,
+                int32_t srcStart,
+                int32_t srcLimit);
+
+  /**
+   * Replace a substring of this object with the given text.
+   * @param start the beginning index, inclusive; <code>0 <= start
+   * <= limit</code>.
+   * @param limit the ending index, exclusive; <code>start <= limit
+   * <= length()</code>.
+   * @param text the text to replace characters <code>start</code>
+   * to <code>limit - 1</code>
+   * @stable ICU 2.0
+   */
+  virtual void handleReplaceBetween(int32_t start,
+                                    int32_t limit,
+                                    const UnicodeString& text);
+
+  /**
+   * Replaceable API
+   * @return TRUE if it has MetaData
+   * @stable ICU 2.4
+   */
+  virtual UBool hasMetaData() const;
+
+  /**
+   * Copy a substring of this object, retaining attribute (out-of-band)
+   * information.  This method is used to duplicate or reorder substrings.
+   * The destination index must not overlap the source range.
+   *
+   * @param start the beginning index, inclusive; <code>0 <= start <=
+   * limit</code>.
+   * @param limit the ending index, exclusive; <code>start <= limit <=
+   * length()</code>.
+   * @param dest the destination index.  The characters from
+   * <code>start..limit-1</code> will be copied to <code>dest</code>.
+   * Implementations of this method may assume that <code>dest <= start ||
+   * dest >= limit</code>.
+   * @stable ICU 2.0
+   */
+  virtual void copy(int32_t start, int32_t limit, int32_t dest);
+
+  /* Search and replace operations */
+
+  /**
+   * Replace all occurrences of characters in oldText with the characters
+   * in newText
+   * @param oldText the text containing the search text
+   * @param newText the text containing the replacement text
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
+                const UnicodeString& newText);
+
+  /**
+   * Replace all occurrences of characters in oldText with characters
+   * in newText
+   * in the range [<TT>start</TT>, <TT>start + length</TT>).
+   * @param start the start of the range in which replace will performed
+   * @param length the length of the range in which replace will be performed
+   * @param oldText the text containing the search text
+   * @param newText the text containing the replacement text
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& findAndReplace(int32_t start,
+                int32_t length,
+                const UnicodeString& oldText,
+                const UnicodeString& newText);
+
+  /**
+   * Replace all occurrences of characters in oldText in the range
+   * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
+   * in newText in the range
+   * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
+   * in the range [<TT>start</TT>, <TT>start + length</TT>).
+   * @param start the start of the range in which replace will performed
+   * @param length the length of the range in which replace will be performed
+   * @param oldText the text containing the search text
+   * @param oldStart the start of the search range in <TT>oldText</TT>
+   * @param oldLength the length of the search range in <TT>oldText</TT>
+   * @param newText the text containing the replacement text
+   * @param newStart the start of the replacement range in <TT>newText</TT>
+   * @param newLength the length of the replacement range in <TT>newText</TT>
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& findAndReplace(int32_t start,
+                int32_t length,
+                const UnicodeString& oldText,
+                int32_t oldStart,
+                int32_t oldLength,
+                const UnicodeString& newText,
+                int32_t newStart,
+                int32_t newLength);
+
+
+  /* Remove operations */
+
+  /**
+   * Remove all characters from the UnicodeString object.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& remove(void);
+
+  /**
+   * Remove the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
+   * @param start the offset of the first character to remove
+   * @param length the number of characters to remove
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& remove(int32_t start,
+                               int32_t length = (int32_t)INT32_MAX);
+
+  /**
+   * Remove the characters in the range
+   * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
+   * @param start the offset of the first character to remove
+   * @param limit the offset immediately following the range to remove
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& removeBetween(int32_t start,
+                                      int32_t limit = (int32_t)INT32_MAX);
+
+
+  /* Length operations */
+
+  /**
+   * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
+   * If the length of this UnicodeString is less than targetLength,
+   * length() - targetLength copies of padChar will be added to the
+   * beginning of this UnicodeString.
+   * @param targetLength the desired length of the string
+   * @param padChar the character to use for padding. Defaults to
+   * space (U+0020)
+   * @return TRUE if the text was padded, FALSE otherwise.
+   * @stable ICU 2.0
+   */
+  UBool padLeading(int32_t targetLength,
+                    UChar padChar = 0x0020);
+
+  /**
+   * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
+   * If the length of this UnicodeString is less than targetLength,
+   * length() - targetLength copies of padChar will be added to the
+   * end of this UnicodeString.
+   * @param targetLength the desired length of the string
+   * @param padChar the character to use for padding. Defaults to
+   * space (U+0020)
+   * @return TRUE if the text was padded, FALSE otherwise.
+   * @stable ICU 2.0
+   */
+  UBool padTrailing(int32_t targetLength,
+                     UChar padChar = 0x0020);
+
+  /**
+   * Truncate this UnicodeString to the <TT>targetLength</TT>.
+   * @param targetLength the desired length of this UnicodeString.
+   * @return TRUE if the text was truncated, FALSE otherwise
+   * @stable ICU 2.0
+   */
+  inline UBool truncate(int32_t targetLength);
+
+  /**
+   * Trims leading and trailing whitespace from this UnicodeString.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  UnicodeString& trim(void);
+
+
+  /* Miscellaneous operations */
+
+  /**
+   * Reverse this UnicodeString in place.
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& reverse(void);
+
+  /**
+   * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
+   * this UnicodeString.
+   * @param start the start of the range to reverse
+   * @param length the number of characters to to reverse
+   * @return a reference to this
+   * @stable ICU 2.0
+   */
+  inline UnicodeString& reverse(int32_t start,
+             int32_t length);
+
+  /**
+   * Convert the characters in this to UPPER CASE following the conventions of
+   * the default locale.
+   * @return A reference to this.
+   * @stable ICU 2.0
+   */
+  UnicodeString& toUpper(void);
+
+  /**
+   * Convert the characters in this to UPPER CASE following the conventions of
+   * a specific locale.
+   * @param locale The locale containing the conventions to use.
+   * @return A reference to this.
+   * @stable ICU 2.0
+   */
+  UnicodeString& toUpper(const Locale& locale);
+
+  /**
+   * Convert the characters in this to lower case following the conventions of
+   * the default locale.
+   * @return A reference to this.
+   * @stable ICU 2.0
+   */
+  UnicodeString& toLower(void);
+
+  /**
+   * Convert the characters in this to lower case following the conventions of
+   * a specific locale.
+   * @param locale The locale containing the conventions to use.
+   * @return A reference to this.
+   * @stable ICU 2.0
+   */
+  UnicodeString& toLower(const Locale& locale);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+  /**
+   * Titlecase this string, convenience function using the default locale.
+   *
+   * Casing is locale-dependent and context-sensitive.
+   * Titlecasing uses a break iterator to find the first characters of words
+   * that are to be titlecased. It titlecases those characters and lowercases
+   * all others.
+   *
+   * The titlecase break iterator can be provided to customize for arbitrary
+   * styles, using rules and dictionaries beyond the standard iterators.
+   * It may be more efficient to always provide an iterator to avoid
+   * opening and closing one for each string.
+   * The standard titlecase iterator for the root locale implements the
+   * algorithm of Unicode TR 21.
+   *
+   * This function uses only the setText(), first() and next() methods of the
+   * provided break iterator.
+   *
+   * @param titleIter A break iterator to find the first characters of words
+   *                  that are to be titlecased.
+   *                  If none is provided (0), then a standard titlecase
+   *                  break iterator is opened.
+   *                  Otherwise the provided iterator is set to the string's text.
+   * @return A reference to this.
+   * @stable ICU 2.1
+   */
+  UnicodeString &toTitle(BreakIterator *titleIter);
+
+  /**
+   * Titlecase this string.
+   *
+   * Casing is locale-dependent and context-sensitive.
+   * Titlecasing uses a break iterator to find the first characters of words
+   * that are to be titlecased. It titlecases those characters and lowercases
+   * all others.
+   *
+   * The titlecase break iterator can be provided to customize for arbitrary
+   * styles, using rules and dictionaries beyond the standard iterators.
+   * It may be more efficient to always provide an iterator to avoid
+   * opening and closing one for each string.
+   * The standard titlecase iterator for the root locale implements the
+   * algorithm of Unicode TR 21.
+   *
+   * This function uses only the setText(), first() and next() methods of the
+   * provided break iterator.
+   *
+   * @param titleIter A break iterator to find the first characters of words
+   *                  that are to be titlecased.
+   *                  If none is provided (0), then a standard titlecase
+   *                  break iterator is opened.
+   *                  Otherwise the provided iterator is set to the string's text.
+   * @param locale    The locale to consider.
+   * @return A reference to this.
+   * @stable ICU 2.1
+   */
+  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
+
+  /**
+   * Titlecase this string, with options.
+   *
+   * Casing is locale-dependent and context-sensitive.
+   * Titlecasing uses a break iterator to find the first characters of words
+   * that are to be titlecased. It titlecases those characters and lowercases
+   * all others. (This can be modified with options.)
+   *
+   * The titlecase break iterator can be provided to customize for arbitrary
+   * styles, using rules and dictionaries beyond the standard iterators.
+   * It may be more efficient to always provide an iterator to avoid
+   * opening and closing one for each string.
+   * The standard titlecase iterator for the root locale implements the
+   * algorithm of Unicode TR 21.
+   *
+   * This function uses only the setText(), first() and next() methods of the
+   * provided break iterator.
+   *
+   * @param titleIter A break iterator to find the first characters of words
+   *                  that are to be titlecased.
+   *                  If none is provided (0), then a standard titlecase
+   *                  break iterator is opened.
+   *                  Otherwise the provided iterator is set to the string's text.
+   * @param locale    The locale to consider.
+   * @param options Options bit set, see ucasemap_open().
+   * @return A reference to this.
+   * @see U_TITLECASE_NO_LOWERCASE
+   * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+   * @see ucasemap_open
+   * @stable ICU 4.0
+   */
+  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
+
+#endif
+
+  /**
+   * Case-fold the characters in this string.
+   * Case-folding is locale-independent and not context-sensitive,
+   * but there is an option for whether to include or exclude mappings for dotted I
+   * and dotless i that are marked with 'I' in CaseFolding.txt.
+   * The result may be longer or shorter than the original.
+   *
+   * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+   * @return A reference to this.
+   * @stable ICU 2.0
+   */
+  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
+
+  //========================================
+  // Access to the internal buffer
+  //========================================
+
+  /**
+   * Get a read/write pointer to the internal buffer.
+   * The buffer is guaranteed to be large enough for at least minCapacity UChars,
+   * writable, and is still owned by the UnicodeString object.
+   * Calls to getBuffer(minCapacity) must not be nested, and
+   * must be matched with calls to releaseBuffer(newLength).
+   * If the string buffer was read-only or shared,
+   * then it will be reallocated and copied.
+   *
+   * An attempted nested call will return 0, and will not further modify the
+   * state of the UnicodeString object.
+   * It also returns 0 if the string is bogus.
+   *
+   * The actual capacity of the string buffer may be larger than minCapacity.
+   * getCapacity() returns the actual capacity.
+   * For many operations, the full capacity should be used to avoid reallocations.
+   *
+   * While the buffer is "open" between getBuffer(minCapacity)
+   * and releaseBuffer(newLength), the following applies:
+   * - The string length is set to 0.
+   * - Any read API call on the UnicodeString object will behave like on a 0-length string.
+   * - Any write API call on the UnicodeString object is disallowed and will have no effect.
+   * - You can read from and write to the returned buffer.
+   * - The previous string contents will still be in the buffer;
+   *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
+   *   If the length() was greater than minCapacity, then any contents after minCapacity
+   *   may be lost.
+   *   The buffer contents is not NUL-terminated by getBuffer().
+   *   If length()<getCapacity() then you can terminate it by writing a NUL
+   *   at index length().
+   * - You must call releaseBuffer(newLength) before and in order to
+   *   return to normal UnicodeString operation.
+   *
+   * @param minCapacity the minimum number of UChars that are to be available
+   *        in the buffer, starting at the returned pointer;
+   *        default to the current string capacity if minCapacity==-1
+   * @return a writable pointer to the internal string buffer,
+   *         or 0 if an error occurs (nested calls, out of memory)
+   *
+   * @see releaseBuffer
+   * @see getTerminatedBuffer()
+   * @stable ICU 2.0
+   */
+  UChar *getBuffer(int32_t minCapacity);
+
+  /**
+   * Release a read/write buffer on a UnicodeString object with an
+   * "open" getBuffer(minCapacity).
+   * This function must be called in a matched pair with getBuffer(minCapacity).
+   * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
+   *
+   * It will set the string length to newLength, at most to the current capacity.
+   * If newLength==-1 then it will set the length according to the
+   * first NUL in the buffer, or to the capacity if there is no NUL.
+   *
+   * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
+   *
+   * @param newLength the new length of the UnicodeString object;
+   *        defaults to the current capacity if newLength is greater than that;
+   *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
+   *        the current capacity of the string
+   *
+   * @see getBuffer(int32_t minCapacity)
+   * @stable ICU 2.0
+   */
+  void releaseBuffer(int32_t newLength=-1);
+
+  /**
+   * Get a read-only pointer to the internal buffer.
+   * This can be called at any time on a valid UnicodeString.
+   *
+   * It returns 0 if the string is bogus, or
+   * during an "open" getBuffer(minCapacity).
+   *
+   * It can be called as many times as desired.
+   * The pointer that it returns will remain valid until the UnicodeString object is modified,
+   * at which time the pointer is semantically invalidated and must not be used any more.
+   *
+   * The capacity of the buffer can be determined with getCapacity().
+   * The part after length() may or may not be initialized and valid,
+   * depending on the history of the UnicodeString object.
+   *
+   * The buffer contents is (probably) not NUL-terminated.
+   * You can check if it is with
+   * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
+   * (See getTerminatedBuffer().)
+   *
+   * The buffer may reside in read-only memory. Its contents must not
+   * be modified.
+   *
+   * @return a read-only pointer to the internal string buffer,
+   *         or 0 if the string is empty or bogus
+   *
+   * @see getBuffer(int32_t minCapacity)
+   * @see getTerminatedBuffer()
+   * @stable ICU 2.0
+   */
+  inline const UChar *getBuffer() const;
+
+  /**
+   * Get a read-only pointer to the internal buffer,
+   * making sure that it is NUL-terminated.
+   * This can be called at any time on a valid UnicodeString.
+   *
+   * It returns 0 if the string is bogus, or
+   * during an "open" getBuffer(minCapacity), or if the buffer cannot
+   * be NUL-terminated (because memory allocation failed).
+   *
+   * It can be called as many times as desired.
+   * The pointer that it returns will remain valid until the UnicodeString object is modified,
+   * at which time the pointer is semantically invalidated and must not be used any more.
+   *
+   * The capacity of the buffer can be determined with getCapacity().
+   * The part after length()+1 may or may not be initialized and valid,
+   * depending on the history of the UnicodeString object.
+   *
+   * The buffer contents is guaranteed to be NUL-terminated.
+   * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
+   * is written.
+   * For this reason, this function is not const, unlike getBuffer().
+   * Note that a UnicodeString may also contain NUL characters as part of its contents.
+   *
+   * The buffer may reside in read-only memory. Its contents must not
+   * be modified.
+   *
+   * @return a read-only pointer to the internal string buffer,
+   *         or 0 if the string is empty or bogus
+   *
+   * @see getBuffer(int32_t minCapacity)
+   * @see getBuffer()
+   * @stable ICU 2.2
+   */
+  inline const UChar *getTerminatedBuffer();
+
+  //========================================
+  // Constructors
+  //========================================
+
+  /** Construct an empty UnicodeString.
+   * @stable ICU 2.0
+   */
+  UnicodeString();
+
+  /**
+   * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
+   * @param capacity the number of UChars this UnicodeString should hold
+   * before a resize is necessary; if count is greater than 0 and count
+   * code points c take up more space than capacity, then capacity is adjusted
+   * accordingly.
+   * @param c is used to initially fill the string
+   * @param count specifies how many code points c are to be written in the
+   *              string
+   * @stable ICU 2.0
+   */
+  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
+
+  /**
+   * Single UChar (code unit) constructor.
+   * @param ch the character to place in the UnicodeString
+   * @stable ICU 2.0
+   */
+  UnicodeString(UChar ch);
+
+  /**
+   * Single UChar32 (code point) constructor.
+   * @param ch the character to place in the UnicodeString
+   * @stable ICU 2.0
+   */
+  UnicodeString(UChar32 ch);
+
+  /**
+   * UChar* constructor.
+   * @param text The characters to place in the UnicodeString.  <TT>text</TT>
+   * must be NULL (U+0000) terminated.
+   * @stable ICU 2.0
+   */
+  UnicodeString(const UChar *text);
+
+  /**
+   * UChar* constructor.
+   * @param text The characters to place in the UnicodeString.
+   * @param textLength The number of Unicode characters in <TT>text</TT>
+   * to copy.
+   * @stable ICU 2.0
+   */
+  UnicodeString(const UChar *text,
+        int32_t textLength);
+
+  /**
+   * Readonly-aliasing UChar* constructor.
+   * The text will be used for the UnicodeString object, but
+   * it will not be released when the UnicodeString is destroyed.
+   * This has copy-on-write semantics:
+   * When the string is modified, then the buffer is first copied into
+   * newly allocated memory.
+   * The aliased buffer is never modified.
+   * In an assignment to another UnicodeString, the text will be aliased again,
+   * so that both strings then alias the same readonly-text.
+   *
+   * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
+   *                     This must be true if <code>textLength==-1</code>.
+   * @param text The characters to alias for the UnicodeString.
+   * @param textLength The number of Unicode characters in <code>text</code> to alias.
+   *                   If -1, then this constructor will determine the length
+   *                   by calling <code>u_strlen()</code>.
+   * @stable ICU 2.0
+   */
+  UnicodeString(UBool isTerminated,
+                const UChar *text,
+                int32_t textLength);
+
+  /**
+   * Writable-aliasing UChar* constructor.
+   * The text will be used for the UnicodeString object, but
+   * it will not be released when the UnicodeString is destroyed.
+   * This has write-through semantics:
+   * For as long as the capacity of the buffer is sufficient, write operations
+   * will directly affect the buffer. When more capacity is necessary, then
+   * a new buffer will be allocated and the contents copied as with regularly
+   * constructed strings.
+   * In an assignment to another UnicodeString, the buffer will be copied.
+   * The extract(UChar *dst) function detects whether the dst pointer is the same
+   * as the string buffer itself and will in this case not copy the contents.
+   *
+   * @param buffer The characters to alias for the UnicodeString.
+   * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
+   * @param buffCapacity The size of <code>buffer</code> in UChars.
+   * @stable ICU 2.0
+   */
+  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
+
+#if !UCONFIG_NO_CONVERSION
+
+  /**
+   * char* constructor.
+   * @param codepageData an array of bytes, null-terminated
+   * @param codepage the encoding of <TT>codepageData</TT>.  The special
+   * value 0 for <TT>codepage</TT> indicates that the text is in the
+   * platform's default codepage.
+   *
+   * If <code>codepage</code> is an empty string (<code>""</code>),
+   * then a simple conversion is performed on the codepage-invariant
+   * subset ("invariant characters") of the platform encoding. See utypes.h.
+   * Recommendation: For invariant-character strings use the constructor
+   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
+   * @stable ICU 2.0
+   */
+  UnicodeString(const char *codepageData,
+        const char *codepage = 0);
+
+  /**
+   * char* constructor.
+   * @param codepageData an array of bytes.
+   * @param dataLength The number of bytes in <TT>codepageData</TT>.
+   * @param codepage the encoding of <TT>codepageData</TT>.  The special
+   * value 0 for <TT>codepage</TT> indicates that the text is in the
+   * platform's default codepage.
+   * If <code>codepage</code> is an empty string (<code>""</code>),
+   * then a simple conversion is performed on the codepage-invariant
+   * subset ("invariant characters") of the platform encoding. See utypes.h.
+   * Recommendation: For invariant-character strings use the constructor
+   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
+   * @stable ICU 2.0
+   */
+  UnicodeString(const char *codepageData,
+        int32_t dataLength,
+        const char *codepage = 0);
+
+  /**
+   * char * / UConverter constructor.
+   * This constructor uses an existing UConverter object to
+   * convert the codepage string to Unicode and construct a UnicodeString
+   * from that.
+   *
+   * The converter is reset at first.
+   * If the error code indicates a failure before this constructor is called,
+   * or if an error occurs during conversion or construction,
+   * then the string will be bogus.
+   *
+   * This function avoids the overhead of opening and closing a converter if
+   * multiple strings are constructed.
+   *
+   * @param src input codepage string
+   * @param srcLength length of the input string, can be -1 for NUL-terminated strings
+   * @param cnv converter object (ucnv_resetToUnicode() will be called),
+   *        can be NULL for the default converter
+   * @param errorCode normal ICU error code
+   * @stable ICU 2.0
+   */
+  UnicodeString(
+        const char *src, int32_t srcLength,
+        UConverter *cnv,
+        UErrorCode &errorCode);
+
+#endif
+
+  /**
+   * Constructs a Unicode string from an invariant-character char * string.
+   * About invariant characters see utypes.h.
+   * This constructor has no runtime dependency on conversion code and is
+   * therefore recommended over ones taking a charset name string
+   * (where the empty string "" indicates invariant-character conversion).
+   *
+   * Use the macro US_INV as the third, signature-distinguishing parameter.
+   *
+   * For example:
+   * \code
+   * void fn(const char *s) {
+   *   UnicodeString ustr(s, -1, US_INV);
+   *   // use ustr ...
+   * }
+   * \endcode
+   *
+   * @param src String using only invariant characters.
+   * @param length Length of src, or -1 if NUL-terminated.
+   * @param inv Signature-distinguishing paramater, use US_INV.
+   *
+   * @see US_INV
+   * @stable ICU 3.2
+   */
+  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
+
+
+  /**
+   * Copy constructor.
+   * @param that The UnicodeString object to copy.
+   * @stable ICU 2.0
+   */
+  UnicodeString(const UnicodeString& that);
+
+  /**
+   * 'Substring' constructor from tail of source string.
+   * @param src The UnicodeString object to copy.
+   * @param srcStart The offset into <tt>src</tt> at which to start copying.
+   * @stable ICU 2.2
+   */
+  UnicodeString(const UnicodeString& src, int32_t srcStart);
+
+  /**
+   * 'Substring' constructor from subrange of source string.
+   * @param src The UnicodeString object to copy.
+   * @param srcStart The offset into <tt>src</tt> at which to start copying.
+   * @param srcLength The number of characters from <tt>src</tt> to copy.
+   * @stable ICU 2.2
+   */
+  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
+
+  /**
+   * Clone this object, an instance of a subclass of Replaceable.
+   * Clones can be used concurrently in multiple threads.
+   * If a subclass does not implement clone(), or if an error occurs,
+   * then NULL is returned.
+   * The clone functions in all subclasses return a pointer to a Replaceable
+   * because some compilers do not support covariant (same-as-this)
+   * return types; cast to the appropriate subclass if necessary.
+   * The caller must delete the clone.
+   *
+   * @return a clone of this object
+   *
+   * @see Replaceable::clone
+   * @see getDynamicClassID
+   * @stable ICU 2.6
+   */
+  virtual Replaceable *clone() const;
+
+  /** Destructor.
+   * @stable ICU 2.0
+   */
+  virtual ~UnicodeString();
+
+
+  /* Miscellaneous operations */
+
+  /**
+   * Unescape a string of characters and return a string containing
+   * the result.  The following escape sequences are recognized:
+   *
+   * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
+   * \\Uhhhhhhhh   8 hex digits
+   * \\xhh         1-2 hex digits
+   * \\ooo         1-3 octal digits; o in [0-7]
+   * \\cX          control-X; X is masked with 0x1F
+   *
+   * as well as the standard ANSI C escapes:
+   *
+   * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+   * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+   * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+   *
+   * Anything else following a backslash is generically escaped.  For
+   * example, "[a\\-z]" returns "[a-z]".
+   *
+   * If an escape sequence is ill-formed, this method returns an empty
+   * string.  An example of an ill-formed sequence is "\\u" followed by
+   * fewer than 4 hex digits.
+   *
+   * This function is similar to u_unescape() but not identical to it.
+   * The latter takes a source char*, so it does escape recognition
+   * and also invariant conversion.
+   *
+   * @return a string with backslash escapes interpreted, or an
+   * empty string on error.
+   * @see UnicodeString#unescapeAt()
+   * @see u_unescape()
+   * @see u_unescapeAt()
+   * @stable ICU 2.0
+   */
+  UnicodeString unescape() const;
+
+  /**
+   * Unescape a single escape sequence and return the represented
+   * character.  See unescape() for a listing of the recognized escape
+   * sequences.  The character at offset-1 is assumed (without
+   * checking) to be a backslash.  If the escape sequence is
+   * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
+   * returned.
+   *
+   * @param offset an input output parameter.  On input, it is the
+   * offset into this string where the escape sequence is located,
+   * after the initial backslash.  On output, it is advanced after the
+   * last character parsed.  On error, it is not advanced at all.
+   * @return the character represented by the escape sequence at
+   * offset, or (UChar32)0xFFFFFFFF on error.
+   * @see UnicodeString#unescape()
+   * @see u_unescape()
+   * @see u_unescapeAt()
+   * @stable ICU 2.0
+   */
+  UChar32 unescapeAt(int32_t &offset) const;
+
+  /**
+   * ICU "poor man's RTTI", returns a UClassID for this class.
+   *
+   * @stable ICU 2.2
+   */
+  static UClassID U_EXPORT2 getStaticClassID();
+
+  /**
+   * ICU "poor man's RTTI", returns a UClassID for the actual class.
+   *
+   * @stable ICU 2.2
+   */
+  virtual UClassID getDynamicClassID() const;
+
+  //========================================
+  // Implementation methods
+  //========================================
+
+protected:
+  /**
+   * Implement Replaceable::getLength() (see jitterbug 1027).
+   * @stable ICU 2.4
+   */
+  virtual int32_t getLength() const;
+
+  /**
+   * The change in Replaceable to use virtual getCharAt() allows
+   * UnicodeString::charAt() to be inline again (see jitterbug 709).
+   * @stable ICU 2.4
+   */
+  virtual UChar getCharAt(int32_t offset) const;
+
+  /**
+   * The change in Replaceable to use virtual getChar32At() allows
+   * UnicodeString::char32At() to be inline again (see jitterbug 709).
+   * @stable ICU 2.4
+   */
+  virtual UChar32 getChar32At(int32_t offset) const;
+
+private:
+
+  inline int8_t
+  doCompare(int32_t start,
+           int32_t length,
+           const UnicodeString& srcText,
+           int32_t srcStart,
+           int32_t srcLength) const;
+
+  int8_t doCompare(int32_t start,
+           int32_t length,
+           const UChar *srcChars,
+           int32_t srcStart,
+           int32_t srcLength) const;
+
+  inline int8_t
+  doCompareCodePointOrder(int32_t start,
+                          int32_t length,
+                          const UnicodeString& srcText,
+                          int32_t srcStart,
+                          int32_t srcLength) const;
+
+  int8_t doCompareCodePointOrder(int32_t start,
+                                 int32_t length,
+                                 const UChar *srcChars,
+                                 int32_t srcStart,
+                                 int32_t srcLength) const;
+
+  inline int8_t
+  doCaseCompare(int32_t start,
+                int32_t length,
+                const UnicodeString &srcText,
+                int32_t srcStart,
+                int32_t srcLength,
+                uint32_t options) const;
+
+  int8_t
+  doCaseCompare(int32_t start,
+                int32_t length,
+                const UChar *srcChars,
+                int32_t srcStart,
+                int32_t srcLength,
+                uint32_t options) const;
+
+  int32_t doIndexOf(UChar c,
+            int32_t start,
+            int32_t length) const;
+
+  int32_t doIndexOf(UChar32 c,
+                        int32_t start,
+                        int32_t length) const;
+
+  int32_t doLastIndexOf(UChar c,
+                int32_t start,
+                int32_t length) const;
+
+  int32_t doLastIndexOf(UChar32 c,
+                            int32_t start,
+                            int32_t length) const;
+
+  void doExtract(int32_t start,
+         int32_t length,
+         UChar *dst,
+         int32_t dstStart) const;
+
+  inline void doExtract(int32_t start,
+         int32_t length,
+         UnicodeString& target) const;
+
+  inline UChar doCharAt(int32_t offset)  const;
+
+  UnicodeString& doReplace(int32_t start,
+               int32_t length,
+               const UnicodeString& srcText,
+               int32_t srcStart,
+               int32_t srcLength);
+
+  UnicodeString& doReplace(int32_t start,
+               int32_t length,
+               const UChar *srcChars,
+               int32_t srcStart,
+               int32_t srcLength);
+
+  UnicodeString& doReverse(int32_t start,
+               int32_t length);
+
+  // calculate hash code
+  int32_t doHashCode(void) const;
+
+  // get pointer to start of array
+  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
+  inline UChar* getArrayStart(void);
+  inline const UChar* getArrayStart(void) const;
+
+  // A UnicodeString object (not necessarily its current buffer)
+  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
+  inline UBool isWritable() const;
+
+  // Is the current buffer writable?
+  inline UBool isBufferWritable() const;
+
+  // None of the following does releaseArray().
+  inline void setLength(int32_t len);        // sets only fShortLength and fLength
+  inline void setToEmpty();                  // sets fFlags=kShortString
+  inline void setToStackBuffer(int32_t len); // sets fFlags=kShortString
+  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
+
+  // allocate the array; result may be fStackBuffer
+  // sets refCount to 1 if appropriate
+  // sets fArray, fCapacity, and fFlags
+  // returns boolean for success or failure
+  UBool allocate(int32_t capacity);
+
+  // release the array if owned
+  void releaseArray(void);
+
+  // turn a bogus string into an empty one
+  void unBogus();
+
+  // implements assigment operator, copy constructor, and fastCopyFrom()
+  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
+
+  // Pin start and limit to acceptable values.
+  inline void pinIndex(int32_t& start) const;
+  inline void pinIndices(int32_t& start,
+                         int32_t& length) const;
+
+#if !UCONFIG_NO_CONVERSION
+
+  /* Internal extract() using UConverter. */
+  int32_t doExtract(int32_t start, int32_t length,
+                    char *dest, int32_t destCapacity,
+                    UConverter *cnv,
+                    UErrorCode &errorCode) const;
+
+  /*
+   * Real constructor for converting from codepage data.
+   * It assumes that it is called with !fRefCounted.
+   *
+   * If <code>codepage==0</code>, then the default converter
+   * is used for the platform encoding.
+   * If <code>codepage</code> is an empty string (<code>""</code>),
+   * then a simple conversion is performed on the codepage-invariant
+   * subset ("invariant characters") of the platform encoding. See utypes.h.
+   */
+  void doCodepageCreate(const char *codepageData,
+                        int32_t dataLength,
+                        const char *codepage);
+
+  /*
+   * Worker function for creating a UnicodeString from
+   * a codepage string using a UConverter.
+   */
+  void
+  doCodepageCreate(const char *codepageData,
+                   int32_t dataLength,
+                   UConverter *converter,
+                   UErrorCode &status);
+
+#endif
+
+  /*
+   * This function is called when write access to the array
+   * is necessary.
+   *
+   * We need to make a copy of the array if
+   * the buffer is read-only, or
+   * the buffer is refCounted (shared), and refCount>1, or
+   * the buffer is too small.
+   *
+   * Return FALSE if memory could not be allocated.
+   */
+  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
+                            int32_t growCapacity = -1,
+                            UBool doCopyArray = TRUE,
+                            int32_t **pBufferToDelete = 0,
+                            UBool forceClone = FALSE);
+
+  // common function for case mappings
+  UnicodeString &
+  caseMap(BreakIterator *titleIter,
+          const char *locale,
+          uint32_t options,
+          int32_t toWhichCase);
+
+  // ref counting
+  void addRef(void);
+  int32_t removeRef(void);
+  int32_t refCount(void) const;
+
+  // constants
+  enum {
+    // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
+    // 32-bit pointers: 4+1+1+13*2 = 32 bytes
+    // 64-bit pointers: 8+1+1+15*2 = 40 bytes
+    US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
+    kInvalidUChar=0xffff, // invalid UChar index
+    kGrowSize=128, // grow size for this buffer
+    kInvalidHashCode=0, // invalid hash code
+    kEmptyHashCode=1, // hash code for empty string
+
+    // bit flag values for fFlags
+    kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
+    kUsingStackBuffer=2,// fArray==fStackBuffer
+    kRefCounted=4,      // there is a refCount field before the characters in fArray
+    kBufferIsReadonly=8,// do not write to this buffer
+    kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
+                        // and releaseBuffer(newLength) must be called
+
+    // combined values for convenience
+    kShortString=kUsingStackBuffer,
+    kLongString=kRefCounted,
+    kReadonlyAlias=kBufferIsReadonly,
+    kWritableAlias=0
+  };
+
+  friend class StringThreadTest;
+
+  union StackBufferOrFields;        // forward declaration necessary before friend declaration
+  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
+
+  /*
+   * The following are all the class fields that are stored
+   * in each UnicodeString object.
+   * Note that UnicodeString has virtual functions,
+   * therefore there is an implicit vtable pointer
+   * as the first real field.
+   * The fields should be aligned such that no padding is
+   * necessary, mostly by having larger types first.
+   * On 32-bit machines, the size should be 32 bytes,
+   * on 64-bit machines (8-byte pointers), it should be 40 bytes.
+   */
+  // (implicit) *vtable;
+  int8_t    fShortLength;   // 0..127: length  <0: real length is in fUnion.fFields.fLength
+  uint8_t   fFlags;         // bit flags: see constants above
+  union StackBufferOrFields {
+    // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
+    // else fFields is used
+    UChar     fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
+    struct {
+      uint16_t  fPadding;   // align the following field at 8B (32b pointers) or 12B (64b)
+      int32_t   fLength;    // number of characters in fArray if >127; else undefined
+      UChar     *fArray;    // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
+      int32_t   fCapacity;  // sizeof fArray
+    } fFields;
+  } fUnion;
+};
+
+/**
+ * Create a new UnicodeString with the concatenation of two others.
+ *
+ * @param s1 The first string to be copied to the new one.
+ * @param s2 The second string to be copied to the new one, after s1.
+ * @return UnicodeString(s1).append(s2)
+ * @stable ICU 2.8
+ */
+U_COMMON_API UnicodeString U_EXPORT2
+operator+ (const UnicodeString &s1, const UnicodeString &s2);
+
+//========================================
+// Inline members
+//========================================
+
+//========================================
+// Privates
+//========================================
+
+inline void
+UnicodeString::pinIndex(int32_t& start) const
+{
+  // pin index
+  if(start < 0) {
+    start = 0;
+  } else if(start > length()) {
+    start = length();
+  }
+}
+
+inline void
+UnicodeString::pinIndices(int32_t& start,
+                          int32_t& _length) const
+{
+  // pin indices
+  int32_t len = length();
+  if(start < 0) {
+    start = 0;
+  } else if(start > len) {
+    start = len;
+  }
+  if(_length < 0) {
+    _length = 0;
+  } else if(_length > (len - start)) {
+    _length = (len - start);
+  }
+}
+
+inline UChar*
+UnicodeString::getArrayStart()
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
+
+inline const UChar*
+UnicodeString::getArrayStart() const
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
+
+//========================================
+// Read-only implementation methods
+//========================================
+inline int32_t
+UnicodeString::length() const
+{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
+
+inline int32_t
+UnicodeString::getCapacity() const
+{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
+
+inline int32_t
+UnicodeString::hashCode() const
+{ return doHashCode(); }
+
+inline UBool
+UnicodeString::isBogus() const
+{ return (UBool)(fFlags & kIsBogus); }
+
+inline UBool
+UnicodeString::isWritable() const
+{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
+
+inline UBool
+UnicodeString::isBufferWritable() const
+{
+  return (UBool)(
+      !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
+      (!(fFlags&kRefCounted) || refCount()==1));
+}
+
+inline const UChar *
+UnicodeString::getBuffer() const {
+  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
+    return 0;
+  } else if(fFlags&kUsingStackBuffer) {
+    return fUnion.fStackBuffer;
+  } else {
+    return fUnion.fFields.fArray;
+  }
+}
+
+//========================================
+// Read-only alias methods
+//========================================
+inline int8_t
+UnicodeString::doCompare(int32_t start,
+              int32_t thisLength,
+              const UnicodeString& srcText,
+              int32_t srcStart,
+              int32_t srcLength) const
+{
+  if(srcText.isBogus()) {
+    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+  } else {
+    srcText.pinIndices(srcStart, srcLength);
+    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+  }
+}
+
+inline UBool
+UnicodeString::operator== (const UnicodeString& text) const
+{
+  if(isBogus()) {
+    return text.isBogus();
+  } else {
+    int32_t len = length(), textLength = text.length();
+    return
+      !text.isBogus() &&
+      len == textLength &&
+      doCompare(0, len, text, 0, textLength) == 0;
+  }
+}
+
+inline UBool
+UnicodeString::operator!= (const UnicodeString& text) const
+{ return (! operator==(text)); }
+
+inline UBool
+UnicodeString::operator> (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) == 1; }
+
+inline UBool
+UnicodeString::operator< (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) == -1; }
+
+inline UBool
+UnicodeString::operator>= (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) != -1; }
+
+inline UBool
+UnicodeString::operator<= (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) != 1; }
+
+inline int8_t
+UnicodeString::compare(const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+               int32_t _length,
+               const UnicodeString& srcText) const
+{ return doCompare(start, _length, srcText, 0, srcText.length()); }
+
+inline int8_t
+UnicodeString::compare(const UChar *srcChars,
+               int32_t srcLength) const
+{ return doCompare(0, length(), srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+               int32_t _length,
+               const UnicodeString& srcText,
+               int32_t srcStart,
+               int32_t srcLength) const
+{ return doCompare(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+               int32_t _length,
+               const UChar *srcChars) const
+{ return doCompare(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+               int32_t _length,
+               const UChar *srcChars,
+               int32_t srcStart,
+               int32_t srcLength) const
+{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareBetween(int32_t start,
+                  int32_t limit,
+                  const UnicodeString& srcText,
+                  int32_t srcStart,
+                  int32_t srcLimit) const
+{ return doCompare(start, limit - start,
+           srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCompareCodePointOrder(int32_t start,
+                                       int32_t thisLength,
+                                       const UnicodeString& srcText,
+                                       int32_t srcStart,
+                                       int32_t srcLength) const
+{
+  if(srcText.isBogus()) {
+    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+  } else {
+    srcText.pinIndices(srcStart, srcLength);
+    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+  }
+}
+
+inline int8_t
+UnicodeString::compareCodePointOrder(const UnicodeString& text) const
+{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+                                     int32_t _length,
+                                     const UnicodeString& srcText) const
+{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(const UChar *srcChars,
+                                     int32_t srcLength) const
+{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+                                     int32_t _length,
+                                     const UnicodeString& srcText,
+                                     int32_t srcStart,
+                                     int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+                                     int32_t _length,
+                                     const UChar *srcChars) const
+{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+                                     int32_t _length,
+                                     const UChar *srcChars,
+                                     int32_t srcStart,
+                                     int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrderBetween(int32_t start,
+                                            int32_t limit,
+                                            const UnicodeString& srcText,
+                                            int32_t srcStart,
+                                            int32_t srcLimit) const
+{ return doCompareCodePointOrder(start, limit - start,
+           srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCaseCompare(int32_t start,
+                             int32_t thisLength,
+                             const UnicodeString &srcText,
+                             int32_t srcStart,
+                             int32_t srcLength,
+                             uint32_t options) const
+{
+  if(srcText.isBogus()) {
+    return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+  } else {
+    srcText.pinIndices(srcStart, srcLength);
+    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
+  }
+}
+
+inline int8_t
+UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
+  return doCaseCompare(0, length(), text, 0, text.length(), options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+                           int32_t _length,
+                           const UnicodeString &srcText,
+                           uint32_t options) const {
+  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(const UChar *srcChars,
+                           int32_t srcLength,
+                           uint32_t options) const {
+  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+                           int32_t _length,
+                           const UnicodeString &srcText,
+                           int32_t srcStart,
+                           int32_t srcLength,
+                           uint32_t options) const {
+  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+                           int32_t _length,
+                           const UChar *srcChars,
+                           uint32_t options) const {
+  return doCaseCompare(start, _length, srcChars, 0, _length, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+                           int32_t _length,
+                           const UChar *srcChars,
+                           int32_t srcStart,
+                           int32_t srcLength,
+                           uint32_t options) const {
+  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompareBetween(int32_t start,
+                                  int32_t limit,
+                                  const UnicodeString &srcText,
+                                  int32_t srcStart,
+                                  int32_t srcLimit,
+                                  uint32_t options) const {
+  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& srcText,
+               int32_t srcStart,
+               int32_t srcLength,
+               int32_t start,
+               int32_t _length) const
+{
+  if(!srcText.isBogus()) {
+    srcText.pinIndices(srcStart, srcLength);
+    if(srcLength > 0) {
+      return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+    }
+  }
+  return -1;
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text) const
+{ return indexOf(text, 0, text.length(), 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+               int32_t start) const {
+  pinIndex(start);
+  return indexOf(text, 0, text.length(), start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+               int32_t start,
+               int32_t _length) const
+{ return indexOf(text, 0, text.length(), start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(const UChar *srcChars,
+               int32_t srcLength,
+               int32_t start) const {
+  pinIndex(start);
+  return indexOf(srcChars, 0, srcLength, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UChar *srcChars,
+               int32_t srcLength,
+               int32_t start,
+               int32_t _length) const
+{ return indexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c,
+               int32_t start,
+               int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+               int32_t start,
+               int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c) const
+{ return doIndexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c) const
+{ return indexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c,
+               int32_t start) const {
+  pinIndex(start);
+  return doIndexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+               int32_t start) const {
+  pinIndex(start);
+  return indexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UChar *srcChars,
+               int32_t srcLength,
+               int32_t start,
+               int32_t _length) const
+{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const UChar *srcChars,
+               int32_t srcLength,
+               int32_t start) const {
+  pinIndex(start);
+  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& srcText,
+               int32_t srcStart,
+               int32_t srcLength,
+               int32_t start,
+               int32_t _length) const
+{
+  if(!srcText.isBogus()) {
+    srcText.pinIndices(srcStart, srcLength);
+    if(srcLength > 0) {
+      return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+    }
+  }
+  return -1;
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+               int32_t start,
+               int32_t _length) const
+{ return lastIndexOf(text, 0, text.length(), start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+               int32_t start) const {
+  pinIndex(start);
+  return lastIndexOf(text, 0, text.length(), start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text) const
+{ return lastIndexOf(text, 0, text.length(), 0, length()); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c,
+               int32_t start,
+               int32_t _length) const
+{ return doLastIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+               int32_t start,
+               int32_t _length) const {
+  return doLastIndexOf(c, start, _length);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c) const
+{ return doLastIndexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c) const {
+  return lastIndexOf(c, 0, length());
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c,
+               int32_t start) const {
+  pinIndex(start);
+  return doLastIndexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+               int32_t start) const {
+  pinIndex(start);
+  return lastIndexOf(c, start, length() - start);
+}
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& text) const
+{ return compare(0, text.length(), text, 0, text.length()) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& srcText,
+              int32_t srcStart,
+              int32_t srcLength) const
+{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UChar *srcChars,
+              int32_t srcLength) const
+{ return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UChar *srcChars,
+              int32_t srcStart,
+              int32_t srcLength) const
+{ return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& text) const
+{ return doCompare(length() - text.length(), text.length(),
+           text, 0, text.length()) == 0; }
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& srcText,
+            int32_t srcStart,
+            int32_t srcLength) const {
+  srcText.pinIndices(srcStart, srcLength);
+  return doCompare(length() - srcLength, srcLength,
+                   srcText, srcStart, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UChar *srcChars,
+            int32_t srcLength) const {
+  if(srcLength < 0) {
+    srcLength = u_strlen(srcChars);
+  }
+  return doCompare(length() - srcLength, srcLength,
+                   srcChars, 0, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UChar *srcChars,
+            int32_t srcStart,
+            int32_t srcLength) const {
+  if(srcLength < 0) {
+    srcLength = u_strlen(srcChars + srcStart);
+  }
+  return doCompare(length() - srcLength, srcLength,
+                   srcChars, srcStart, srcLength) == 0;
+}
+
+//========================================
+// replace
+//========================================
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               const UnicodeString& srcText)
+{ return doReplace(start, _length, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               const UnicodeString& srcText,
+               int32_t srcStart,
+               int32_t srcLength)
+{ return doReplace(start, _length, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               const UChar *srcChars,
+               int32_t srcLength)
+{ return doReplace(start, _length, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               const UChar *srcChars,
+               int32_t srcStart,
+               int32_t srcLength)
+{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               UChar srcChar)
+{ return doReplace(start, _length, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               UChar32 srcChar) {
+  UChar buffer[U16_MAX_LENGTH];
+  int32_t count = 0;
+  UBool isError = FALSE;
+  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
+  return doReplace(start, _length, buffer, 0, count);
+}
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+                  int32_t limit,
+                  const UnicodeString& srcText)
+{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+                  int32_t limit,
+                  const UnicodeString& srcText,
+                  int32_t srcStart,
+                  int32_t srcLimit)
+{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(const UnicodeString& oldText,
+                  const UnicodeString& newText)
+{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
+            newText, 0, newText.length()); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(int32_t start,
+                  int32_t _length,
+                  const UnicodeString& oldText,
+                  const UnicodeString& newText)
+{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
+            newText, 0, newText.length()); }
+
+// ============================
+// extract
+// ============================
+inline void
+UnicodeString::doExtract(int32_t start,
+             int32_t _length,
+             UnicodeString& target) const
+{ target.replace(0, target.length(), *this, start, _length); }
+
+inline void
+UnicodeString::extract(int32_t start,
+               int32_t _length,
+               UChar *target,
+               int32_t targetStart) const
+{ doExtract(start, _length, target, targetStart); }
+
+inline void
+UnicodeString::extract(int32_t start,
+               int32_t _length,
+               UnicodeString& target) const
+{ doExtract(start, _length, target); }
+
+#if !UCONFIG_NO_CONVERSION
+
+inline int32_t
+UnicodeString::extract(int32_t start,
+               int32_t _length,
+               char *dst,
+               const char *codepage) const
+
+{
+  // This dstSize value will be checked explicitly
+  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
+}
+
+#endif
+
+inline void
+UnicodeString::extractBetween(int32_t start,
+                  int32_t limit,
+                  UChar *dst,
+                  int32_t dstStart) const {
+  pinIndex(start);
+  pinIndex(limit);
+  doExtract(start, limit - start, dst, dstStart);
+}
+
+inline UChar
+UnicodeString::doCharAt(int32_t offset) const
+{
+  if((uint32_t)offset < (uint32_t)length()) {
+    return getArrayStart()[offset];
+  } else {
+    return kInvalidUChar;
+  }
+}
+
+inline UChar
+UnicodeString::charAt(int32_t offset) const
+{ return doCharAt(offset); }
+
+inline UChar
+UnicodeString::operator[] (int32_t offset) const
+{ return doCharAt(offset); }
+
+inline UChar32
+UnicodeString::char32At(int32_t offset) const
+{
+  int32_t len = length();
+  if((uint32_t)offset < (uint32_t)len) {
+    const UChar *array = getArrayStart();
+    UChar32 c;
+    U16_GET(array, 0, offset, len, c);
+    return c;
+  } else {
+    return kInvalidUChar;
+  }
+}
+
+inline int32_t
+UnicodeString::getChar32Start(int32_t offset) const {
+  if((uint32_t)offset < (uint32_t)length()) {
+    const UChar *array = getArrayStart();
+    U16_SET_CP_START(array, 0, offset);
+    return offset;
+  } else {
+    return 0;
+  }
+}
+
+inline int32_t
+UnicodeString::getChar32Limit(int32_t offset) const {
+  int32_t len = length();
+  if((uint32_t)offset < (uint32_t)len) {
+    const UChar *array = getArrayStart();
+    U16_SET_CP_LIMIT(array, 0, offset, len);
+    return offset;
+  } else {
+    return len;
+  }
+}
+
+inline UBool
+UnicodeString::isEmpty() const {
+  return fShortLength == 0;
+}
+
+//========================================
+// Write implementation methods
+//========================================
+inline void
+UnicodeString::setLength(int32_t len) {
+  if(len <= 127) {
+    fShortLength = (int8_t)len;
+  } else {
+    fShortLength = (int8_t)-1;
+    fUnion.fFields.fLength = len;
+  }
+}
+
+inline void
+UnicodeString::setToEmpty() {
+  fShortLength = 0;
+  fFlags = kShortString;
+}
+
+inline void
+UnicodeString::setToStackBuffer(int32_t len) {
+  fShortLength = (int8_t)len;
+  fFlags = kShortString;
+}
+
+inline void
+UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
+  setLength(len);
+  fUnion.fFields.fArray = array;
+  fUnion.fFields.fCapacity = capacity;
+}
+
+inline const UChar *
+UnicodeString::getTerminatedBuffer() {
+  if(!isWritable()) {
+    return 0;
+  } else {
+    UChar *array = getArrayStart();
+    int32_t len = length();
+    if(len < getCapacity() && array[len] == 0) {
+      return array;
+    } else if(cloneArrayIfNeeded(len+1)) {
+      array = getArrayStart();
+      array[len] = 0;
+      return array;
+    } else {
+      return 0;
+    }
+  }
+}
+
+inline UnicodeString&
+UnicodeString::operator= (UChar ch)
+{ return doReplace(0, length(), &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator= (UChar32 ch)
+{ return replace(0, length(), ch); }
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+             int32_t srcStart,
+             int32_t srcLength)
+{
+  unBogus();
+  return doReplace(0, length(), srcText, srcStart, srcLength);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+             int32_t srcStart)
+{
+  unBogus();
+  srcText.pinIndex(srcStart);
+  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText)
+{
+  unBogus();
+  return doReplace(0, length(), srcText, 0, srcText.length());
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UChar *srcChars,
+             int32_t srcLength)
+{
+  unBogus();
+  return doReplace(0, length(), srcChars, 0, srcLength);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(UChar srcChar)
+{
+  unBogus();
+  return doReplace(0, length(), &srcChar, 0, 1);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(UChar32 srcChar)
+{
+  unBogus();
+  return replace(0, length(), srcChar);
+}
+
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText,
+              int32_t srcStart,
+              int32_t srcLength)
+{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText)
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::append(const UChar *srcChars,
+              int32_t srcStart,
+              int32_t srcLength)
+{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(const UChar *srcChars,
+              int32_t srcLength)
+{ return doReplace(length(), 0, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(UChar srcChar)
+{ return doReplace(length(), 0, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::append(UChar32 srcChar) {
+  UChar buffer[U16_MAX_LENGTH];
+  int32_t _length = 0;
+  UBool isError = FALSE;
+  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
+  return doReplace(length(), 0, buffer, 0, _length);
+}
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar ch)
+{ return doReplace(length(), 0, &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar32 ch) {
+  return append(ch);
+}
+
+inline UnicodeString&
+UnicodeString::operator+= (const UnicodeString& srcText)
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              const UnicodeString& srcText,
+              int32_t srcStart,
+              int32_t srcLength)
+{ return doReplace(start, 0, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              const UnicodeString& srcText)
+{ return doReplace(start, 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              const UChar *srcChars,
+              int32_t srcStart,
+              int32_t srcLength)
+{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              const UChar *srcChars,
+              int32_t srcLength)
+{ return doReplace(start, 0, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              UChar srcChar)
+{ return doReplace(start, 0, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              UChar32 srcChar)
+{ return replace(start, 0, srcChar); }
+
+
+inline UnicodeString&
+UnicodeString::remove()
+{
+  // remove() of a bogus string makes the string empty and non-bogus
+  if(isBogus()) {
+    unBogus();
+  } else {
+    setLength(0);
+  }
+  return *this;
+}
+
+inline UnicodeString&
+UnicodeString::remove(int32_t start,
+             int32_t _length)
+{
+    if(start <= 0 && _length == INT32_MAX) {
+        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
+        return remove();
+    }
+    return doReplace(start, _length, NULL, 0, 0);
+}
+
+inline UnicodeString&
+UnicodeString::removeBetween(int32_t start,
+                int32_t limit)
+{ return doReplace(start, limit - start, NULL, 0, 0); }
+
+inline UBool
+UnicodeString::truncate(int32_t targetLength)
+{
+  if(isBogus() && targetLength == 0) {
+    // truncate(0) of a bogus string makes the string empty and non-bogus
+    unBogus();
+    return FALSE;
+  } else if((uint32_t)targetLength < (uint32_t)length()) {
+    setLength(targetLength);
+    return TRUE;
+  } else {
+    return FALSE;
+  }
+}
+
+inline UnicodeString&
+UnicodeString::reverse()
+{ return doReverse(0, length()); }
+
+inline UnicodeString&
+UnicodeString::reverse(int32_t start,
+               int32_t _length)
+{ return doReverse(start, _length); }
+
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/unorm.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/unorm.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/unorm.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,576 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (c) 1996-2007, International Business Machines Corporation
-*               and others. All Rights Reserved.
-*******************************************************************************
-* File unorm.h
-*
-* Created by: Vladimir Weinstein 12052000
-*
-* Modification history :
-*
-* Date        Name        Description
-* 02/01/01    synwee      Added normalization quickcheck enum and method.
-*/
-#ifndef UNORM_H
-#define UNORM_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/uiter.h"
-
-/**
- * \file
- * \brief C API: Unicode Normalization 
- *
- * <h2>Unicode normalization API</h2>
- *
- * <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or
- * decomposed form, allowing for easier sorting and searching of text.
- * <code>unorm_normalize</code> supports the standard normalization forms described in
- * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
- * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
- *
- * Characters with accents or other adornments can be encoded in
- * several different ways in Unicode.  For example, take the character A-acute.
- * In Unicode, this can be encoded as a single character (the
- * "composed" form):
- *
- * \code
- *      00C1    LATIN CAPITAL LETTER A WITH ACUTE
- * \endcode
- *
- * or as two separate characters (the "decomposed" form):
- *
- * \code
- *      0041    LATIN CAPITAL LETTER A
- *      0301    COMBINING ACUTE ACCENT
- * \endcode
- *
- * To a user of your program, however, both of these sequences should be
- * treated as the same "user-level" character "A with acute accent".  When you are searching or
- * comparing text, you must ensure that these two sequences are treated 
- * equivalently.  In addition, you must handle characters with more than one
- * accent.  Sometimes the order of a character's combining accents is
- * significant, while in other cases accent sequences in different orders are
- * really equivalent.
- *
- * Similarly, the string "ffi" can be encoded as three separate letters:
- *
- * \code
- *      0066    LATIN SMALL LETTER F
- *      0066    LATIN SMALL LETTER F
- *      0069    LATIN SMALL LETTER I
- * \endcode
- *
- * or as the single character
- *
- * \code
- *      FB03    LATIN SMALL LIGATURE FFI
- * \endcode
- *
- * The ffi ligature is not a distinct semantic character, and strictly speaking
- * it shouldn't be in Unicode at all, but it was included for compatibility
- * with existing character sets that already provided it.  The Unicode standard
- * identifies such characters by giving them "compatibility" decompositions
- * into the corresponding semantic characters.  When sorting and searching, you
- * will often want to use these mappings.
- *
- * <code>unorm_normalize</code> helps solve these problems by transforming text into the
- * canonical composed and decomposed forms as shown in the first example above.  
- * In addition, you can have it perform compatibility decompositions so that 
- * you can treat compatibility characters the same as their equivalents.
- * Finally, <code>unorm_normalize</code> rearranges accents into the proper canonical
- * order, so that you do not have to worry about accent rearrangement on your
- * own.
- *
- * Form FCD, "Fast C or D", is also designed for collation.
- * It allows to work on strings that are not necessarily normalized
- * with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed
- * characters and their decomposed equivalents the same.
- *
- * It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings
- * may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical
- * themselves.
- *
- * The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character,
- * results in a string that is canonically ordered. This means that precomposed characters are allowed for as long
- * as their decompositions do not need canonical reordering.
- *
- * Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts -
- * already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will
- * return UNORM_YES for most strings in practice.
- *
- * unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD.
- *
- * For more details on FCD see the collation design document:
- * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
- *
- * ICU collation performs either NFD or FCD normalization automatically if normalization
- * is turned on for the collator object.
- * Beyond collation and string search, normalized strings may be useful for string equivalence comparisons,
- * transliteration/transcription, unique representations, etc.
- *
- * The W3C generally recommends to exchange texts in NFC.
- * Note also that most legacy character encodings use only precomposed forms and often do not
- * encode any combining marks by themselves. For conversion to such character encodings the
- * Unicode text needs to be normalized to NFC.
- * For more usage examples, see the Unicode Standard Annex.
- */
-
-/**
- * Constants for normalization modes.
- * @stable ICU 2.0
- */
-typedef enum {
-  /** No decomposition/composition. @stable ICU 2.0 */
-  UNORM_NONE = 1, 
-  /** Canonical decomposition. @stable ICU 2.0 */
-  UNORM_NFD = 2,
-  /** Compatibility decomposition. @stable ICU 2.0 */
-  UNORM_NFKD = 3,
-  /** Canonical decomposition followed by canonical composition. @stable ICU 2.0 */
-  UNORM_NFC = 4,
-  /** Default normalization. @stable ICU 2.0 */
-  UNORM_DEFAULT = UNORM_NFC, 
-  /** Compatibility decomposition followed by canonical composition. @stable ICU 2.0 */
-  UNORM_NFKC =5,
-  /** "Fast C or D" form. @stable ICU 2.0 */
-  UNORM_FCD = 6,
-
-  /** One more than the highest normalization mode constant. @stable ICU 2.0 */
-  UNORM_MODE_COUNT
-} UNormalizationMode;
-
-/**
- * Constants for options flags for normalization.
- * Use 0 for default options,
- * including normalization according to the Unicode version
- * that is currently supported by ICU (see u_getUnicodeVersion).
- * @stable ICU 2.6
- */
-enum {
-    /**
-     * Options bit set value to select Unicode 3.2 normalization
-     * (except NormalizationCorrections).
-     * At most one Unicode version can be selected at a time.
-     * @stable ICU 2.6
-     */
-    UNORM_UNICODE_3_2=0x20
-};
-
-/**
- * Lowest-order bit number of unorm_compare() options bits corresponding to
- * normalization options bits.
- *
- * The options parameter for unorm_compare() uses most bits for
- * itself and for various comparison and folding flags.
- * The most significant bits, however, are shifted down and passed on
- * to the normalization implementation.
- * (That is, from unorm_compare(..., options, ...),
- * options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the
- * internal normalization functions.)
- *
- * @see unorm_compare
- * @stable ICU 2.6
- */
-#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
-
-/**
- * Normalize a string.
- * The string will be normalized according the specified normalization mode
- * and options.
- * The source and result buffers must not be the same, nor overlap.
- *
- * @param source The string to normalize.
- * @param sourceLength The length of source, or -1 if NUL-terminated.
- * @param mode The normalization mode; one of UNORM_NONE, 
- *             UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT.
- * @param options The normalization options, ORed together (0 for no options).
- * @param result A pointer to a buffer to receive the result string.
- *               The result string is NUL-terminated if possible.
- * @param resultLength The maximum size of result.
- * @param status A pointer to a UErrorCode to receive any errors.
- * @return The total buffer size needed; if greater than resultLength,
- *         the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-unorm_normalize(const UChar *source, int32_t sourceLength,
-                UNormalizationMode mode, int32_t options,
-                UChar *result, int32_t resultLength,
-                UErrorCode *status);
-#endif
-/**
- * Result values for unorm_quickCheck().
- * For details see Unicode Technical Report 15.
- * @stable ICU 2.0
- */
-typedef enum UNormalizationCheckResult {
-  /** 
-   * Indicates that string is not in the normalized format
-   */
-  UNORM_NO,
-  /** 
-   * Indicates that string is in the normalized format
-   */
-  UNORM_YES,
-  /** 
-   * Indicates that string cannot be determined if it is in the normalized 
-   * format without further thorough checks.
-   */
-  UNORM_MAYBE
-} UNormalizationCheckResult;
-#if !UCONFIG_NO_NORMALIZATION
-/**
- * Performing quick check on a string, to quickly determine if the string is 
- * in a particular normalization format.
- * Three types of result can be returned UNORM_YES, UNORM_NO or
- * UNORM_MAYBE. Result UNORM_YES indicates that the argument
- * string is in the desired normalized format, UNORM_NO determines that
- * argument string is not in the desired normalized format. A 
- * UNORM_MAYBE result indicates that a more thorough check is required, 
- * the user may have to put the string in its normalized form and compare the 
- * results.
- *
- * @param source       string for determining if it is in a normalized format
- * @param sourcelength length of source to test, or -1 if NUL-terminated
- * @param mode         which normalization form to test for
- * @param status       a pointer to a UErrorCode to receive any errors
- * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
- *
- * @see unorm_isNormalized
- * @stable ICU 2.0
- */
-U_STABLE UNormalizationCheckResult U_EXPORT2
-unorm_quickCheck(const UChar *source, int32_t sourcelength,
-                 UNormalizationMode mode,
-                 UErrorCode *status);
-
-/**
- * Performing quick check on a string; same as unorm_quickCheck but
- * takes an extra options parameter like most normalization functions.
- *
- * @param src        String that is to be tested if it is in a normalization format.
- * @param srcLength  Length of source to test, or -1 if NUL-terminated.
- * @param mode       Which normalization form to test for.
- * @param options    The normalization options, ORed together (0 for no options).
- * @param pErrorCode ICU error code in/out parameter.
- *                   Must fulfill U_SUCCESS before the function call.
- * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
- *
- * @see unorm_quickCheck
- * @see unorm_isNormalized
- * @stable ICU 2.6
- */
-U_STABLE UNormalizationCheckResult U_EXPORT2
-unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, 
-                            UNormalizationMode mode, int32_t options,
-                            UErrorCode *pErrorCode);
-
-/**
- * Test if a string is in a given normalization form.
- * This is semantically equivalent to source.equals(normalize(source, mode)) .
- *
- * Unlike unorm_quickCheck(), this function returns a definitive result,
- * never a "maybe".
- * For NFD, NFKD, and FCD, both functions work exactly the same.
- * For NFC and NFKC where quickCheck may return "maybe", this function will
- * perform further tests to arrive at a TRUE/FALSE result.
- *
- * @param src        String that is to be tested if it is in a normalization format.
- * @param srcLength  Length of source to test, or -1 if NUL-terminated.
- * @param mode       Which normalization form to test for.
- * @param pErrorCode ICU error code in/out parameter.
- *                   Must fulfill U_SUCCESS before the function call.
- * @return Boolean value indicating whether the source string is in the
- *         "mode" normalization form.
- *
- * @see unorm_quickCheck
- * @stable ICU 2.2
- */
-U_STABLE UBool U_EXPORT2
-unorm_isNormalized(const UChar *src, int32_t srcLength,
-                   UNormalizationMode mode,
-                   UErrorCode *pErrorCode);
-
-/**
- * Test if a string is in a given normalization form; same as unorm_isNormalized but
- * takes an extra options parameter like most normalization functions.
- *
- * @param src        String that is to be tested if it is in a normalization format.
- * @param srcLength  Length of source to test, or -1 if NUL-terminated.
- * @param mode       Which normalization form to test for.
- * @param options    The normalization options, ORed together (0 for no options).
- * @param pErrorCode ICU error code in/out parameter.
- *                   Must fulfill U_SUCCESS before the function call.
- * @return Boolean value indicating whether the source string is in the
- *         "mode/options" normalization form.
- *
- * @see unorm_quickCheck
- * @see unorm_isNormalized
- * @stable ICU 2.6
- */
-U_STABLE UBool U_EXPORT2
-unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
-                              UNormalizationMode mode, int32_t options,
-                              UErrorCode *pErrorCode);
-
-/**
- * Iterative normalization forward.
- * This function (together with unorm_previous) is somewhat
- * similar to the C++ Normalizer class (see its non-static functions).
- *
- * Iterative normalization is useful when only a small portion of a longer
- * string/text needs to be processed.
- *
- * For example, the likelihood may be high that processing the first 10% of some
- * text will be sufficient to find certain data.
- * Another example: When one wants to concatenate two normalized strings and get a
- * normalized result, it is much more efficient to normalize just a small part of
- * the result around the concatenation place instead of re-normalizing everything.
- *
- * The input text is an instance of the C character iteration API UCharIterator.
- * It may wrap around a simple string, a CharacterIterator, a Replaceable, or any
- * other kind of text object.
- *
- * If a buffer overflow occurs, then the caller needs to reset the iterator to the
- * old index and call the function again with a larger buffer - if the caller cares
- * for the actual output.
- * Regardless of the output buffer, the iterator will always be moved to the next
- * normalization boundary.
- *
- * This function (like unorm_previous) serves two purposes:
- *
- * 1) To find the next boundary so that the normalization of the part of the text
- * from the current position to that boundary does not affect and is not affected
- * by the part of the text beyond that boundary.
- *
- * 2) To normalize the text up to the boundary.
- *
- * The second step is optional, per the doNormalize parameter.
- * It is omitted for operations like string concatenation, where the two adjacent
- * string ends need to be normalized together.
- * In such a case, the output buffer will just contain a copy of the text up to the
- * boundary.
- *
- * pNeededToNormalize is an output-only parameter. Its output value is only defined
- * if normalization was requested (doNormalize) and successful (especially, no
- * buffer overflow).
- * It is useful for operations like a normalizing transliterator, where one would
- * not want to replace a piece of text if it is not modified.
- *
- * If doNormalize==TRUE and pNeededToNormalize!=NULL then *pNeeded... is set TRUE
- * if the normalization was necessary.
- *
- * If doNormalize==FALSE then *pNeededToNormalize will be set to FALSE.
- *
- * If the buffer overflows, then *pNeededToNormalize will be undefined;
- * essentially, whenever U_FAILURE is true (like in buffer overflows), this result
- * will be undefined.
- *
- * @param src The input text in the form of a C character iterator.
- * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
- * @param destCapacity The number of UChars that fit into dest.
- * @param mode The normalization mode.
- * @param options The normalization options, ORed together (0 for no options).
- * @param doNormalize Indicates if the source text up to the next boundary
- *                    is to be normalized (TRUE) or just copied (FALSE).
- * @param pNeededToNormalize Output flag indicating if the normalization resulted in
- *                           different text from the input.
- *                           Not defined if an error occurs including buffer overflow.
- *                           Always FALSE if !doNormalize.
- * @param pErrorCode ICU error code in/out parameter.
- *                   Must fulfill U_SUCCESS before the function call.
- * @return Length of output (number of UChars) when successful or buffer overflow.
- *
- * @see unorm_previous
- * @see unorm_normalize
- *
- * @stable ICU 2.1
- */
-U_STABLE int32_t U_EXPORT2
-unorm_next(UCharIterator *src,
-           UChar *dest, int32_t destCapacity,
-           UNormalizationMode mode, int32_t options,
-           UBool doNormalize, UBool *pNeededToNormalize,
-           UErrorCode *pErrorCode);
-
-/**
- * Iterative normalization backward.
- * This function (together with unorm_next) is somewhat
- * similar to the C++ Normalizer class (see its non-static functions).
- * For all details see unorm_next.
- *
- * @param src The input text in the form of a C character iterator.
- * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
- * @param destCapacity The number of UChars that fit into dest.
- * @param mode The normalization mode.
- * @param options The normalization options, ORed together (0 for no options).
- * @param doNormalize Indicates if the source text up to the next boundary
- *                    is to be normalized (TRUE) or just copied (FALSE).
- * @param pNeededToNormalize Output flag indicating if the normalization resulted in
- *                           different text from the input.
- *                           Not defined if an error occurs including buffer overflow.
- *                           Always FALSE if !doNormalize.
- * @param pErrorCode ICU error code in/out parameter.
- *                   Must fulfill U_SUCCESS before the function call.
- * @return Length of output (number of UChars) when successful or buffer overflow.
- *
- * @see unorm_next
- * @see unorm_normalize
- *
- * @stable ICU 2.1
- */
-U_STABLE int32_t U_EXPORT2
-unorm_previous(UCharIterator *src,
-               UChar *dest, int32_t destCapacity,
-               UNormalizationMode mode, int32_t options,
-               UBool doNormalize, UBool *pNeededToNormalize,
-               UErrorCode *pErrorCode);
-
-/**
- * Concatenate normalized strings, making sure that the result is normalized as well.
- *
- * If both the left and the right strings are in
- * the normalization form according to "mode/options",
- * then the result will be
- *
- * \code
- *     dest=normalize(left+right, mode, options)
- * \endcode
- *
- * With the input strings already being normalized,
- * this function will use unorm_next() and unorm_previous()
- * to find the adjacent end pieces of the input strings.
- * Only the concatenation of these end pieces will be normalized and
- * then concatenated with the remaining parts of the input strings.
- *
- * It is allowed to have dest==left to avoid copying the entire left string.
- *
- * @param left Left source string, may be same as dest.
- * @param leftLength Length of left source string, or -1 if NUL-terminated.
- * @param right Right source string. Must not be the same as dest, nor overlap.
- * @param rightLength Length of right source string, or -1 if NUL-terminated.
- * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
- * @param destCapacity The number of UChars that fit into dest.
- * @param mode The normalization mode.
- * @param options The normalization options, ORed together (0 for no options).
- * @param pErrorCode ICU error code in/out parameter.
- *                   Must fulfill U_SUCCESS before the function call.
- * @return Length of output (number of UChars) when successful or buffer overflow.
- *
- * @see unorm_normalize
- * @see unorm_next
- * @see unorm_previous
- *
- * @stable ICU 2.1
- */
-U_STABLE int32_t U_EXPORT2
-unorm_concatenate(const UChar *left, int32_t leftLength,
-                  const UChar *right, int32_t rightLength,
-                  UChar *dest, int32_t destCapacity,
-                  UNormalizationMode mode, int32_t options,
-                  UErrorCode *pErrorCode);
-
-/**
- * Option bit for unorm_compare:
- * Both input strings are assumed to fulfill FCD conditions.
- * @stable ICU 2.2
- */
-#define UNORM_INPUT_IS_FCD          0x20000
-
-/**
- * Option bit for unorm_compare:
- * Perform case-insensitive comparison.
- * @stable ICU 2.2
- */
-#define U_COMPARE_IGNORE_CASE       0x10000
-
-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also unistr.h and ustring.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER  0x8000
-#endif
-
-/**
- * Compare two strings for canonical equivalence.
- * Further options include case-insensitive comparison and
- * code point order (as opposed to code unit order).
- *
- * Canonical equivalence between two strings is defined as their normalized
- * forms (NFD or NFC) being identical.
- * This function compares strings incrementally instead of normalizing
- * (and optionally case-folding) both strings entirely,
- * improving performance significantly.
- *
- * Bulk normalization is only necessary if the strings do not fulfill the FCD
- * conditions. Only in this case, and only if the strings are relatively long,
- * is memory allocated temporarily.
- * For FCD strings and short non-FCD strings there is no memory allocation.
- *
- * Semantically, this is equivalent to
- *   strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
- * where code point order and foldCase are all optional.
- *
- * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
- * the case folding must be performed first, then the normalization.
- *
- * @param s1 First source string.
- * @param length1 Length of first source string, or -1 if NUL-terminated.
- *
- * @param s2 Second source string.
- * @param length2 Length of second source string, or -1 if NUL-terminated.
- *
- * @param options A bit set of options:
- *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
- *     Case-sensitive comparison in code unit order, and the input strings
- *     are quick-checked for FCD.
- *
- *   - UNORM_INPUT_IS_FCD
- *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
- *     If not set, the function will quickCheck for FCD
- *     and normalize if necessary.
- *
- *   - U_COMPARE_CODE_POINT_ORDER
- *     Set to choose code point order instead of code unit order
- *     (see u_strCompare for details).
- *
- *   - U_COMPARE_IGNORE_CASE
- *     Set to compare strings case-insensitively using case folding,
- *     instead of case-sensitively.
- *     If set, then the following case folding options are used.
- *
- *   - Options as used with case-insensitive comparisons, currently:
- *
- *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
- *    (see u_strCaseCompare for details)
- *
- *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
- *
- * @param pErrorCode ICU error code in/out parameter.
- *                   Must fulfill U_SUCCESS before the function call.
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @see unorm_normalize
- * @see UNORM_FCD
- * @see u_strCompare
- * @see u_strCaseCompare
- *
- * @stable ICU 2.2
- */
-U_STABLE int32_t U_EXPORT2
-unorm_compare(const UChar *s1, int32_t length1,
-              const UChar *s2, int32_t length2,
-              uint32_t options,
-              UErrorCode *pErrorCode);
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/unorm.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/unorm.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/unorm.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/unorm.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,576 @@
+/*
+*******************************************************************************
+* Copyright (c) 1996-2007, International Business Machines Corporation
+*               and others. All Rights Reserved.
+*******************************************************************************
+* File unorm.h
+*
+* Created by: Vladimir Weinstein 12052000
+*
+* Modification history :
+*
+* Date        Name        Description
+* 02/01/01    synwee      Added normalization quickcheck enum and method.
+*/
+#ifndef UNORM_H
+#define UNORM_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/uiter.h"
+
+/**
+ * \file
+ * \brief C API: Unicode Normalization 
+ *
+ * <h2>Unicode normalization API</h2>
+ *
+ * <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or
+ * decomposed form, allowing for easier sorting and searching of text.
+ * <code>unorm_normalize</code> supports the standard normalization forms described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
+ * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
+ *
+ * Characters with accents or other adornments can be encoded in
+ * several different ways in Unicode.  For example, take the character A-acute.
+ * In Unicode, this can be encoded as a single character (the
+ * "composed" form):
+ *
+ * \code
+ *      00C1    LATIN CAPITAL LETTER A WITH ACUTE
+ * \endcode
+ *
+ * or as two separate characters (the "decomposed" form):
+ *
+ * \code
+ *      0041    LATIN CAPITAL LETTER A
+ *      0301    COMBINING ACUTE ACCENT
+ * \endcode
+ *
+ * To a user of your program, however, both of these sequences should be
+ * treated as the same "user-level" character "A with acute accent".  When you are searching or
+ * comparing text, you must ensure that these two sequences are treated 
+ * equivalently.  In addition, you must handle characters with more than one
+ * accent.  Sometimes the order of a character's combining accents is
+ * significant, while in other cases accent sequences in different orders are
+ * really equivalent.
+ *
+ * Similarly, the string "ffi" can be encoded as three separate letters:
+ *
+ * \code
+ *      0066    LATIN SMALL LETTER F
+ *      0066    LATIN SMALL LETTER F
+ *      0069    LATIN SMALL LETTER I
+ * \endcode
+ *
+ * or as the single character
+ *
+ * \code
+ *      FB03    LATIN SMALL LIGATURE FFI
+ * \endcode
+ *
+ * The ffi ligature is not a distinct semantic character, and strictly speaking
+ * it shouldn't be in Unicode at all, but it was included for compatibility
+ * with existing character sets that already provided it.  The Unicode standard
+ * identifies such characters by giving them "compatibility" decompositions
+ * into the corresponding semantic characters.  When sorting and searching, you
+ * will often want to use these mappings.
+ *
+ * <code>unorm_normalize</code> helps solve these problems by transforming text into the
+ * canonical composed and decomposed forms as shown in the first example above.  
+ * In addition, you can have it perform compatibility decompositions so that 
+ * you can treat compatibility characters the same as their equivalents.
+ * Finally, <code>unorm_normalize</code> rearranges accents into the proper canonical
+ * order, so that you do not have to worry about accent rearrangement on your
+ * own.
+ *
+ * Form FCD, "Fast C or D", is also designed for collation.
+ * It allows to work on strings that are not necessarily normalized
+ * with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed
+ * characters and their decomposed equivalents the same.
+ *
+ * It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings
+ * may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical
+ * themselves.
+ *
+ * The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character,
+ * results in a string that is canonically ordered. This means that precomposed characters are allowed for as long
+ * as their decompositions do not need canonical reordering.
+ *
+ * Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts -
+ * already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will
+ * return UNORM_YES for most strings in practice.
+ *
+ * unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD.
+ *
+ * For more details on FCD see the collation design document:
+ * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
+ *
+ * ICU collation performs either NFD or FCD normalization automatically if normalization
+ * is turned on for the collator object.
+ * Beyond collation and string search, normalized strings may be useful for string equivalence comparisons,
+ * transliteration/transcription, unique representations, etc.
+ *
+ * The W3C generally recommends to exchange texts in NFC.
+ * Note also that most legacy character encodings use only precomposed forms and often do not
+ * encode any combining marks by themselves. For conversion to such character encodings the
+ * Unicode text needs to be normalized to NFC.
+ * For more usage examples, see the Unicode Standard Annex.
+ */
+
+/**
+ * Constants for normalization modes.
+ * @stable ICU 2.0
+ */
+typedef enum {
+  /** No decomposition/composition. @stable ICU 2.0 */
+  UNORM_NONE = 1, 
+  /** Canonical decomposition. @stable ICU 2.0 */
+  UNORM_NFD = 2,
+  /** Compatibility decomposition. @stable ICU 2.0 */
+  UNORM_NFKD = 3,
+  /** Canonical decomposition followed by canonical composition. @stable ICU 2.0 */
+  UNORM_NFC = 4,
+  /** Default normalization. @stable ICU 2.0 */
+  UNORM_DEFAULT = UNORM_NFC, 
+  /** Compatibility decomposition followed by canonical composition. @stable ICU 2.0 */
+  UNORM_NFKC =5,
+  /** "Fast C or D" form. @stable ICU 2.0 */
+  UNORM_FCD = 6,
+
+  /** One more than the highest normalization mode constant. @stable ICU 2.0 */
+  UNORM_MODE_COUNT
+} UNormalizationMode;
+
+/**
+ * Constants for options flags for normalization.
+ * Use 0 for default options,
+ * including normalization according to the Unicode version
+ * that is currently supported by ICU (see u_getUnicodeVersion).
+ * @stable ICU 2.6
+ */
+enum {
+    /**
+     * Options bit set value to select Unicode 3.2 normalization
+     * (except NormalizationCorrections).
+     * At most one Unicode version can be selected at a time.
+     * @stable ICU 2.6
+     */
+    UNORM_UNICODE_3_2=0x20
+};
+
+/**
+ * Lowest-order bit number of unorm_compare() options bits corresponding to
+ * normalization options bits.
+ *
+ * The options parameter for unorm_compare() uses most bits for
+ * itself and for various comparison and folding flags.
+ * The most significant bits, however, are shifted down and passed on
+ * to the normalization implementation.
+ * (That is, from unorm_compare(..., options, ...),
+ * options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the
+ * internal normalization functions.)
+ *
+ * @see unorm_compare
+ * @stable ICU 2.6
+ */
+#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+
+/**
+ * Normalize a string.
+ * The string will be normalized according the specified normalization mode
+ * and options.
+ * The source and result buffers must not be the same, nor overlap.
+ *
+ * @param source The string to normalize.
+ * @param sourceLength The length of source, or -1 if NUL-terminated.
+ * @param mode The normalization mode; one of UNORM_NONE, 
+ *             UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param result A pointer to a buffer to receive the result string.
+ *               The result string is NUL-terminated if possible.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to a UErrorCode to receive any errors.
+ * @return The total buffer size needed; if greater than resultLength,
+ *         the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+unorm_normalize(const UChar *source, int32_t sourceLength,
+                UNormalizationMode mode, int32_t options,
+                UChar *result, int32_t resultLength,
+                UErrorCode *status);
+#endif
+/**
+ * Result values for unorm_quickCheck().
+ * For details see Unicode Technical Report 15.
+ * @stable ICU 2.0
+ */
+typedef enum UNormalizationCheckResult {
+  /** 
+   * Indicates that string is not in the normalized format
+   */
+  UNORM_NO,
+  /** 
+   * Indicates that string is in the normalized format
+   */
+  UNORM_YES,
+  /** 
+   * Indicates that string cannot be determined if it is in the normalized 
+   * format without further thorough checks.
+   */
+  UNORM_MAYBE
+} UNormalizationCheckResult;
+#if !UCONFIG_NO_NORMALIZATION
+/**
+ * Performing quick check on a string, to quickly determine if the string is 
+ * in a particular normalization format.
+ * Three types of result can be returned UNORM_YES, UNORM_NO or
+ * UNORM_MAYBE. Result UNORM_YES indicates that the argument
+ * string is in the desired normalized format, UNORM_NO determines that
+ * argument string is not in the desired normalized format. A 
+ * UNORM_MAYBE result indicates that a more thorough check is required, 
+ * the user may have to put the string in its normalized form and compare the 
+ * results.
+ *
+ * @param source       string for determining if it is in a normalized format
+ * @param sourcelength length of source to test, or -1 if NUL-terminated
+ * @param mode         which normalization form to test for
+ * @param status       a pointer to a UErrorCode to receive any errors
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_isNormalized
+ * @stable ICU 2.0
+ */
+U_STABLE UNormalizationCheckResult U_EXPORT2
+unorm_quickCheck(const UChar *source, int32_t sourcelength,
+                 UNormalizationMode mode,
+                 UErrorCode *status);
+
+/**
+ * Performing quick check on a string; same as unorm_quickCheck but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src        String that is to be tested if it is in a normalization format.
+ * @param srcLength  Length of source to test, or -1 if NUL-terminated.
+ * @param mode       Which normalization form to test for.
+ * @param options    The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ *                   Must fulfill U_SUCCESS before the function call.
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @stable ICU 2.6
+ */
+U_STABLE UNormalizationCheckResult U_EXPORT2
+unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, 
+                            UNormalizationMode mode, int32_t options,
+                            UErrorCode *pErrorCode);
+
+/**
+ * Test if a string is in a given normalization form.
+ * This is semantically equivalent to source.equals(normalize(source, mode)) .
+ *
+ * Unlike unorm_quickCheck(), this function returns a definitive result,
+ * never a "maybe".
+ * For NFD, NFKD, and FCD, both functions work exactly the same.
+ * For NFC and NFKC where quickCheck may return "maybe", this function will
+ * perform further tests to arrive at a TRUE/FALSE result.
+ *
+ * @param src        String that is to be tested if it is in a normalization format.
+ * @param srcLength  Length of source to test, or -1 if NUL-terminated.
+ * @param mode       Which normalization form to test for.
+ * @param pErrorCode ICU error code in/out parameter.
+ *                   Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ *         "mode" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @stable ICU 2.2
+ */
+U_STABLE UBool U_EXPORT2
+unorm_isNormalized(const UChar *src, int32_t srcLength,
+                   UNormalizationMode mode,
+                   UErrorCode *pErrorCode);
+
+/**
+ * Test if a string is in a given normalization form; same as unorm_isNormalized but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src        String that is to be tested if it is in a normalization format.
+ * @param srcLength  Length of source to test, or -1 if NUL-terminated.
+ * @param mode       Which normalization form to test for.
+ * @param options    The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ *                   Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ *         "mode/options" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
+                              UNormalizationMode mode, int32_t options,
+                              UErrorCode *pErrorCode);
+
+/**
+ * Iterative normalization forward.
+ * This function (together with unorm_previous) is somewhat
+ * similar to the C++ Normalizer class (see its non-static functions).
+ *
+ * Iterative normalization is useful when only a small portion of a longer
+ * string/text needs to be processed.
+ *
+ * For example, the likelihood may be high that processing the first 10% of some
+ * text will be sufficient to find certain data.
+ * Another example: When one wants to concatenate two normalized strings and get a
+ * normalized result, it is much more efficient to normalize just a small part of
+ * the result around the concatenation place instead of re-normalizing everything.
+ *
+ * The input text is an instance of the C character iteration API UCharIterator.
+ * It may wrap around a simple string, a CharacterIterator, a Replaceable, or any
+ * other kind of text object.
+ *
+ * If a buffer overflow occurs, then the caller needs to reset the iterator to the
+ * old index and call the function again with a larger buffer - if the caller cares
+ * for the actual output.
+ * Regardless of the output buffer, the iterator will always be moved to the next
+ * normalization boundary.
+ *
+ * This function (like unorm_previous) serves two purposes:
+ *
+ * 1) To find the next boundary so that the normalization of the part of the text
+ * from the current position to that boundary does not affect and is not affected
+ * by the part of the text beyond that boundary.
+ *
+ * 2) To normalize the text up to the boundary.
+ *
+ * The second step is optional, per the doNormalize parameter.
+ * It is omitted for operations like string concatenation, where the two adjacent
+ * string ends need to be normalized together.
+ * In such a case, the output buffer will just contain a copy of the text up to the
+ * boundary.
+ *
+ * pNeededToNormalize is an output-only parameter. Its output value is only defined
+ * if normalization was requested (doNormalize) and successful (especially, no
+ * buffer overflow).
+ * It is useful for operations like a normalizing transliterator, where one would
+ * not want to replace a piece of text if it is not modified.
+ *
+ * If doNormalize==TRUE and pNeededToNormalize!=NULL then *pNeeded... is set TRUE
+ * if the normalization was necessary.
+ *
+ * If doNormalize==FALSE then *pNeededToNormalize will be set to FALSE.
+ *
+ * If the buffer overflows, then *pNeededToNormalize will be undefined;
+ * essentially, whenever U_FAILURE is true (like in buffer overflows), this result
+ * will be undefined.
+ *
+ * @param src The input text in the form of a C character iterator.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param doNormalize Indicates if the source text up to the next boundary
+ *                    is to be normalized (TRUE) or just copied (FALSE).
+ * @param pNeededToNormalize Output flag indicating if the normalization resulted in
+ *                           different text from the input.
+ *                           Not defined if an error occurs including buffer overflow.
+ *                           Always FALSE if !doNormalize.
+ * @param pErrorCode ICU error code in/out parameter.
+ *                   Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_previous
+ * @see unorm_normalize
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_next(UCharIterator *src,
+           UChar *dest, int32_t destCapacity,
+           UNormalizationMode mode, int32_t options,
+           UBool doNormalize, UBool *pNeededToNormalize,
+           UErrorCode *pErrorCode);
+
+/**
+ * Iterative normalization backward.
+ * This function (together with unorm_next) is somewhat
+ * similar to the C++ Normalizer class (see its non-static functions).
+ * For all details see unorm_next.
+ *
+ * @param src The input text in the form of a C character iterator.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param doNormalize Indicates if the source text up to the next boundary
+ *                    is to be normalized (TRUE) or just copied (FALSE).
+ * @param pNeededToNormalize Output flag indicating if the normalization resulted in
+ *                           different text from the input.
+ *                           Not defined if an error occurs including buffer overflow.
+ *                           Always FALSE if !doNormalize.
+ * @param pErrorCode ICU error code in/out parameter.
+ *                   Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_next
+ * @see unorm_normalize
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_previous(UCharIterator *src,
+               UChar *dest, int32_t destCapacity,
+               UNormalizationMode mode, int32_t options,
+               UBool doNormalize, UBool *pNeededToNormalize,
+               UErrorCode *pErrorCode);
+
+/**
+ * Concatenate normalized strings, making sure that the result is normalized as well.
+ *
+ * If both the left and the right strings are in
+ * the normalization form according to "mode/options",
+ * then the result will be
+ *
+ * \code
+ *     dest=normalize(left+right, mode, options)
+ * \endcode
+ *
+ * With the input strings already being normalized,
+ * this function will use unorm_next() and unorm_previous()
+ * to find the adjacent end pieces of the input strings.
+ * Only the concatenation of these end pieces will be normalized and
+ * then concatenated with the remaining parts of the input strings.
+ *
+ * It is allowed to have dest==left to avoid copying the entire left string.
+ *
+ * @param left Left source string, may be same as dest.
+ * @param leftLength Length of left source string, or -1 if NUL-terminated.
+ * @param right Right source string. Must not be the same as dest, nor overlap.
+ * @param rightLength Length of right source string, or -1 if NUL-terminated.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ *                   Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_normalize
+ * @see unorm_next
+ * @see unorm_previous
+ *
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_concatenate(const UChar *left, int32_t leftLength,
+                  const UChar *right, int32_t rightLength,
+                  UChar *dest, int32_t destCapacity,
+                  UNormalizationMode mode, int32_t options,
+                  UErrorCode *pErrorCode);
+
+/**
+ * Option bit for unorm_compare:
+ * Both input strings are assumed to fulfill FCD conditions.
+ * @stable ICU 2.2
+ */
+#define UNORM_INPUT_IS_FCD          0x20000
+
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_IGNORE_CASE       0x10000
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also unistr.h and ustring.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER  0x8000
+#endif
+
+/**
+ * Compare two strings for canonical equivalence.
+ * Further options include case-insensitive comparison and
+ * code point order (as opposed to code unit order).
+ *
+ * Canonical equivalence between two strings is defined as their normalized
+ * forms (NFD or NFC) being identical.
+ * This function compares strings incrementally instead of normalizing
+ * (and optionally case-folding) both strings entirely,
+ * improving performance significantly.
+ *
+ * Bulk normalization is only necessary if the strings do not fulfill the FCD
+ * conditions. Only in this case, and only if the strings are relatively long,
+ * is memory allocated temporarily.
+ * For FCD strings and short non-FCD strings there is no memory allocation.
+ *
+ * Semantically, this is equivalent to
+ *   strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
+ * where code point order and foldCase are all optional.
+ *
+ * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
+ * the case folding must be performed first, then the normalization.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ *     Case-sensitive comparison in code unit order, and the input strings
+ *     are quick-checked for FCD.
+ *
+ *   - UNORM_INPUT_IS_FCD
+ *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
+ *     If not set, the function will quickCheck for FCD
+ *     and normalize if necessary.
+ *
+ *   - U_COMPARE_CODE_POINT_ORDER
+ *     Set to choose code point order instead of code unit order
+ *     (see u_strCompare for details).
+ *
+ *   - U_COMPARE_IGNORE_CASE
+ *     Set to compare strings case-insensitively using case folding,
+ *     instead of case-sensitively.
+ *     If set, then the following case folding options are used.
+ *
+ *   - Options as used with case-insensitive comparisons, currently:
+ *
+ *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *    (see u_strCaseCompare for details)
+ *
+ *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
+ *
+ * @param pErrorCode ICU error code in/out parameter.
+ *                   Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see unorm_normalize
+ * @see UNORM_FCD
+ * @see u_strCompare
+ * @see u_strCaseCompare
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+unorm_compare(const UChar *s1, int32_t length1,
+              const UChar *s2, int32_t length2,
+              uint32_t options,
+              UErrorCode *pErrorCode);
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/unum.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/unum.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/unum.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,869 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 1997-2008, International Business Machines Corporation and others.
-* All Rights Reserved.
-* Modification History:
-*
-*   Date        Name        Description
-*   06/24/99    helena      Integrated Alan's NF enhancements and Java2 bug fixes
-*******************************************************************************
-*/
-
-#ifndef _UNUM
-#define _UNUM
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/uloc.h"
-#include "unicode/umisc.h"
-#include "unicode/parseerr.h"
-/**
- * \file
- * \brief C API: NumberFormat
- *
- * <h2> Number Format C API </h2>
- *
- * Number Format C API  Provides functions for
- * formatting and parsing a number.  Also provides methods for
- * determining which locales have number formats, and what their names
- * are.
- * <P>
- * UNumberFormat helps you to format and parse numbers for any locale.
- * Your code can be completely independent of the locale conventions
- * for decimal points, thousands-separators, or even the particular
- * decimal digits used, or whether the number format is even decimal.
- * There are different number format styles like decimal, currency,
- * percent and spellout.
- * <P>
- * To format a number for the current Locale, use one of the static
- * factory methods:
- * <pre>
- * \code
- *    UChar myString[20];
- *    double myNumber = 7.0;
- *    UErrorCode status = U_ZERO_ERROR;
- *    UNumberFormat* nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status);
- *    unum_formatDouble(nf, myNumber, myString, 20, NULL, &status);
- *    printf(" Example 1: %s\n", austrdup(myString) ); //austrdup( a function used to convert UChar* to char*)
- * \endcode
- * </pre>
- * If you are formatting multiple numbers, it is more efficient to get
- * the format and use it multiple times so that the system doesn't
- * have to fetch the information about the local language and country
- * conventions multiple times.
- * <pre>
- * \code
- * uint32_t i, resultlength, reslenneeded;
- * UErrorCode status = U_ZERO_ERROR;
- * UFieldPosition pos;
- * uint32_t a[] = { 123, 3333, -1234567 };
- * const uint32_t a_len = sizeof(a) / sizeof(a[0]);
- * UNumberFormat* nf;
- * UChar* result = NULL;
- *
- * nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status);
- * for (i = 0; i < a_len; i++) {
- *    resultlength=0;
- *    reslenneeded=unum_format(nf, a[i], NULL, resultlength, &pos, &status);
- *    result = NULL;
- *    if(status==U_BUFFER_OVERFLOW_ERROR){
- *       status=U_ZERO_ERROR;
- *       resultlength=reslenneeded+1;
- *       result=(UChar*)malloc(sizeof(UChar) * resultlength);
- *       unum_format(nf, a[i], result, resultlength, &pos, &status);
- *    }
- *    printf( " Example 2: %s\n", austrdup(result));
- *    free(result);
- * }
- * \endcode
- * </pre>
- * To format a number for a different Locale, specify it in the
- * call to unum_open().
- * <pre>
- * \code
- *     UNumberFormat* nf = unum_open(UNUM_DEFAULT, NULL, -1, "fr_FR", NULL, &success)
- * \endcode
- * </pre>
- * You can use a NumberFormat API unum_parse() to parse.
- * <pre>
- * \code
- *    UErrorCode status = U_ZERO_ERROR;
- *    int32_t pos=0;
- *    int32_t num;
- *    num = unum_parse(nf, str, u_strlen(str), &pos, &status);
- * \endcode
- * </pre>
- * Use UNUM_DECIMAL to get the normal number format for that country.
- * There are other static options available.  Use UNUM_CURRENCY
- * to get the currency number format for that country.  Use UNUM_PERCENT
- * to get a format for displaying percentages. With this format, a
- * fraction from 0.53 is displayed as 53%.
- * <P>
- * Use a pattern to create either a DecimalFormat or a RuleBasedNumberFormat
- * formatter.  The pattern must conform to the syntax defined for those
- * formatters.
- * <P>
- * You can also control the display of numbers with such function as
- * unum_getAttribues() and unum_setAtributes(), which let you set the
- * miminum fraction digits, grouping, etc.
- * @see UNumberFormatAttributes for more details
- * <P>
- * You can also use forms of the parse and format methods with
- * ParsePosition and UFieldPosition to allow you to:
- * <ul type=round>
- *   <li>(a) progressively parse through pieces of a string.
- *   <li>(b) align the decimal point and other areas.
- * </ul>
- * <p>
- * It is also possible to change or set the symbols used for a particular
- * locale like the currency symbol, the grouping seperator , monetary seperator
- * etc by making use of functions unum_setSymbols() and unum_getSymbols().
- */
-
-/** A number formatter.
- *  For usage in C programs.
- *  @stable ICU 2.0
- */
-typedef void* UNumberFormat;
-
-/** The possible number format styles. 
- *  @stable ICU 2.0
- */
-typedef enum UNumberFormatStyle {
-    /**
-     * Decimal format defined by pattern 
-     * @stable ICU 3.0
-     */
-    UNUM_PATTERN_DECIMAL=0,
-    /** Decimal format */
-    UNUM_DECIMAL=1,
-    /** Currency format */
-    UNUM_CURRENCY,
-    /** Percent format */
-    UNUM_PERCENT,
-    /** Scientific format */
-    UNUM_SCIENTIFIC,
-    /** Spellout rule-based format */
-    UNUM_SPELLOUT,
-    /** 
-     * Ordinal rule-based format 
-     * @stable ICU 3.0
-     */
-    UNUM_ORDINAL,
-    /** 
-     * Duration rule-based format 
-     * @stable ICU 3.0
-     */
-    UNUM_DURATION,
-    /** 
-     * Rule-based format defined by pattern 
-     * @stable ICU 3.0
-     */
-    UNUM_PATTERN_RULEBASED,
-    /** Default format */
-    UNUM_DEFAULT = UNUM_DECIMAL,
-    /** (Alias for UNUM_PATTERN_DECIMAL) */
-    UNUM_IGNORE = UNUM_PATTERN_DECIMAL
-} UNumberFormatStyle;
-
-/** The possible number format rounding modes. 
- *  @stable ICU 2.0
- */
-typedef enum UNumberFormatRoundingMode {
-    UNUM_ROUND_CEILING,
-    UNUM_ROUND_FLOOR,
-    UNUM_ROUND_DOWN,
-    UNUM_ROUND_UP,
-    /**
-     * Half-even rounding, misspelled name
-     * @deprecated, ICU 3.8
-     */
-    UNUM_FOUND_HALFEVEN,
-    UNUM_ROUND_HALFDOWN,
-    UNUM_ROUND_HALFUP,
-    /**
-     * Half-even rounding
-     * @stable, ICU 3.8
-     */
-    UNUM_ROUND_HALFEVEN = UNUM_FOUND_HALFEVEN
-} UNumberFormatRoundingMode;
-
-/** The possible number format pad positions. 
- *  @stable ICU 2.0
- */
-typedef enum UNumberFormatPadPosition {
-    UNUM_PAD_BEFORE_PREFIX,
-    UNUM_PAD_AFTER_PREFIX,
-    UNUM_PAD_BEFORE_SUFFIX,
-    UNUM_PAD_AFTER_SUFFIX
-} UNumberFormatPadPosition;
-
-/**
- * Create and return a new UNumberFormat for formatting and parsing
- * numbers.  A UNumberFormat may be used to format numbers by calling
- * {@link #unum_format }, and to parse numbers by calling {@link #unum_parse }.
- * The caller must call {@link #unum_close } when done to release resources
- * used by this object.
- * @param style The type of number format to open: one of
- * UNUM_DECIMAL, UNUM_CURRENCY, UNUM_PERCENT, UNUM_SCIENTIFIC, UNUM_SPELLOUT,
- * UNUM_PATTERN_DECIMAL, UNUM_PATTERN_RULEBASED, or UNUM_DEFAULT.
- * If UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED is passed then the
- * number format is opened using the given pattern, which must conform
- * to the syntax described in DecimalFormat or RuleBasedNumberFormat,
- * respectively.
- * @param pattern A pattern specifying the format to use. 
- * This parameter is ignored unless the style is
- * UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED.
- * @param patternLength The number of characters in the pattern, or -1
- * if null-terminated. This parameter is ignored unless the style is
- * UNUM_PATTERN.
- * @param locale A locale identifier to use to determine formatting
- * and parsing conventions, or NULL to use the default locale.
- * @param parseErr A pointer to a UParseError struct to receive the
- * details of any parsing errors, or NULL if no parsing error details
- * are desired.
- * @param status A pointer to an input-output UErrorCode.
- * @return A pointer to a newly created UNumberFormat, or NULL if an
- * error occurred.
- * @see unum_close
- * @see DecimalFormat
- * @stable ICU 2.0
- */
-U_STABLE UNumberFormat* U_EXPORT2 
-unum_open(  UNumberFormatStyle    style,
-            const    UChar*    pattern,
-            int32_t            patternLength,
-            const    char*     locale,
-            UParseError*       parseErr,
-            UErrorCode*        status);
-
-
-/**
-* Close a UNumberFormat.
-* Once closed, a UNumberFormat may no longer be used.
-* @param fmt The formatter to close.
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2 
-unum_close(UNumberFormat* fmt);
-
-/**
- * Open a copy of a UNumberFormat.
- * This function performs a deep copy.
- * @param fmt The format to copy
- * @param status A pointer to an UErrorCode to receive any errors.
- * @return A pointer to a UNumberFormat identical to fmt.
- * @stable ICU 2.0
- */
-U_STABLE UNumberFormat* U_EXPORT2 
-unum_clone(const UNumberFormat *fmt,
-       UErrorCode *status);
-
-/**
-* Format an integer using a UNumberFormat.
-* The integer will be formatted according to the UNumberFormat's locale.
-* @param fmt The formatter to use.
-* @param number The number to format.
-* @param result A pointer to a buffer to receive the formatted number.
-* @param resultLength The maximum size of result.
-* @param pos    A pointer to a UFieldPosition.  On input, position->field
-* is read.  On output, position->beginIndex and position->endIndex indicate
-* the beginning and ending indices of field number position->field, if such
-* a field exists.  This parameter may be NULL, in which case no field
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The total buffer size needed; if greater than resultLength, the output was truncated.
-* @see unum_formatInt64
-* @see unum_formatDouble
-* @see unum_parse
-* @see unum_parseInt64
-* @see unum_parseDouble
-* @see UFieldPosition
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2 
-unum_format(    const    UNumberFormat*    fmt,
-        int32_t            number,
-        UChar*            result,
-        int32_t            resultLength,
-        UFieldPosition    *pos,
-        UErrorCode*        status);
-
-/**
-* Format an int64 using a UNumberFormat.
-* The int64 will be formatted according to the UNumberFormat's locale.
-* @param fmt The formatter to use.
-* @param number The number to format.
-* @param result A pointer to a buffer to receive the formatted number.
-* @param resultLength The maximum size of result.
-* @param pos    A pointer to a UFieldPosition.  On input, position->field
-* is read.  On output, position->beginIndex and position->endIndex indicate
-* the beginning and ending indices of field number position->field, if such
-* a field exists.  This parameter may be NULL, in which case no field
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The total buffer size needed; if greater than resultLength, the output was truncated.
-* @see unum_format
-* @see unum_formatDouble
-* @see unum_parse
-* @see unum_parseInt64
-* @see unum_parseDouble
-* @see UFieldPosition
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2 
-unum_formatInt64(const UNumberFormat *fmt,
-        int64_t         number,
-        UChar*          result,
-        int32_t         resultLength,
-        UFieldPosition *pos,
-        UErrorCode*     status);
-
-/**
-* Format a double using a UNumberFormat.
-* The double will be formatted according to the UNumberFormat's locale.
-* @param fmt The formatter to use.
-* @param number The number to format.
-* @param result A pointer to a buffer to receive the formatted number.
-* @param resultLength The maximum size of result.
-* @param pos    A pointer to a UFieldPosition.  On input, position->field
-* is read.  On output, position->beginIndex and position->endIndex indicate
-* the beginning and ending indices of field number position->field, if such
-* a field exists.  This parameter may be NULL, in which case no field
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The total buffer size needed; if greater than resultLength, the output was truncated.
-* @see unum_format
-* @see unum_formatInt64
-* @see unum_parse
-* @see unum_parseInt64
-* @see unum_parseDouble
-* @see UFieldPosition
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2 
-unum_formatDouble(    const    UNumberFormat*  fmt,
-            double          number,
-            UChar*          result,
-            int32_t         resultLength,
-            UFieldPosition  *pos, /* 0 if ignore */
-            UErrorCode*     status);
-
-/**
- * Format a double currency amount using a UNumberFormat.
- * The double will be formatted according to the UNumberFormat's locale.
- * @param fmt the formatter to use
- * @param number the number to format
- * @param currency the 3-letter null-terminated ISO 4217 currency code
- * @param result a pointer to the buffer to receive the formatted number
- * @param resultLength the maximum number of UChars to write to result
- * @param pos a pointer to a UFieldPosition.  On input,
- * position->field is read.  On output, position->beginIndex and
- * position->endIndex indicate the beginning and ending indices of
- * field number position->field, if such a field exists.  This
- * parameter may be NULL, in which case it is ignored.
- * @param status a pointer to an input-output UErrorCode
- * @return the total buffer size needed; if greater than resultLength,
- * the output was truncated.
- * @see unum_formatDouble
- * @see unum_parseDoubleCurrency
- * @see UFieldPosition
- * @stable ICU 3.0
- */
-U_STABLE int32_t U_EXPORT2 
-unum_formatDoubleCurrency(const UNumberFormat* fmt,
-                          double number,
-                          UChar* currency,
-                          UChar* result,
-                          int32_t resultLength,
-                          UFieldPosition* pos, /* ignored if 0 */
-                          UErrorCode* status);
-
-/**
-* Parse a string into an integer using a UNumberFormat.
-* The string will be parsed according to the UNumberFormat's locale.
-* @param fmt The formatter to use.
-* @param text The text to parse.
-* @param textLength The length of text, or -1 if null-terminated.
-* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
-* to begin parsing.  If not 0, on output the offset at which parsing ended.
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The value of the parsed integer
-* @see unum_parseInt64
-* @see unum_parseDouble
-* @see unum_format
-* @see unum_formatInt64
-* @see unum_formatDouble
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2 
-unum_parse(    const   UNumberFormat*  fmt,
-        const   UChar*          text,
-        int32_t         textLength,
-        int32_t         *parsePos /* 0 = start */,
-        UErrorCode      *status);
-
-/**
-* Parse a string into an int64 using a UNumberFormat.
-* The string will be parsed according to the UNumberFormat's locale.
-* @param fmt The formatter to use.
-* @param text The text to parse.
-* @param textLength The length of text, or -1 if null-terminated.
-* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
-* to begin parsing.  If not 0, on output the offset at which parsing ended.
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The value of the parsed integer
-* @see unum_parse
-* @see unum_parseDouble
-* @see unum_format
-* @see unum_formatInt64
-* @see unum_formatDouble
-* @stable ICU 2.8
-*/
-U_STABLE int64_t U_EXPORT2 
-unum_parseInt64(const UNumberFormat*  fmt,
-        const UChar*  text,
-        int32_t       textLength,
-        int32_t       *parsePos /* 0 = start */,
-        UErrorCode    *status);
-
-/**
-* Parse a string into a double using a UNumberFormat.
-* The string will be parsed according to the UNumberFormat's locale.
-* @param fmt The formatter to use.
-* @param text The text to parse.
-* @param textLength The length of text, or -1 if null-terminated.
-* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
-* to begin parsing.  If not 0, on output the offset at which parsing ended.
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The value of the parsed double
-* @see unum_parse
-* @see unum_parseInt64
-* @see unum_format
-* @see unum_formatInt64
-* @see unum_formatDouble
-* @stable ICU 2.0
-*/
-U_STABLE double U_EXPORT2 
-unum_parseDouble(    const   UNumberFormat*  fmt,
-            const   UChar*          text,
-            int32_t         textLength,
-            int32_t         *parsePos /* 0 = start */,
-            UErrorCode      *status);
-
-/**
- * Parse a string into a double and a currency using a UNumberFormat.
- * The string will be parsed according to the UNumberFormat's locale.
- * @param fmt the formatter to use
- * @param text the text to parse
- * @param textLength the length of text, or -1 if null-terminated
- * @param parsePos a pointer to an offset index into text at which to
- * begin parsing. On output, *parsePos will point after the last
- * parsed character.  This parameter may be 0, in which case parsing
- * begins at offset 0.
- * @param currency a pointer to the buffer to receive the parsed null-
- * terminated currency.  This buffer must have a capacity of at least
- * 4 UChars.
- * @param status a pointer to an input-output UErrorCode
- * @return the parsed double
- * @see unum_parseDouble
- * @see unum_formatDoubleCurrency
- * @stable ICU 3.0
- */
-U_STABLE double U_EXPORT2
-unum_parseDoubleCurrency(const UNumberFormat* fmt,
-                         const UChar* text,
-                         int32_t textLength,
-                         int32_t* parsePos, /* 0 = start */
-                         UChar* currency,
-                         UErrorCode* status);
-
-/**
- * Set the pattern used by a UNumberFormat.  This can only be used
- * on a DecimalFormat, other formats return U_ILLEGAL_ARGUMENT_ERROR
- * in the status.
- * @param format The formatter to set.
- * @param localized TRUE if the pattern is localized, FALSE otherwise.
- * @param pattern The new pattern
- * @param patternLength The length of pattern, or -1 if null-terminated.
- * @param parseError A pointer to UParseError to recieve information
- * about errors occurred during parsing, or NULL if no parse error
- * information is desired.
- * @param status A pointer to an input-output UErrorCode.
- * @see unum_toPattern
- * @see DecimalFormat
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-unum_applyPattern(          UNumberFormat  *format,
-                            UBool          localized,
-                    const   UChar          *pattern,
-                            int32_t         patternLength,
-                            UParseError    *parseError,
-                            UErrorCode     *status
-                                    );
-
-/**
-* Get a locale for which decimal formatting patterns are available.
-* A UNumberFormat in a locale returned by this function will perform the correct
-* formatting and parsing for the locale.  The results of this call are not
-* valid for rule-based number formats.
-* @param index The index of the desired locale.
-* @return A locale for which number formatting patterns are available, or 0 if none.
-* @see unum_countAvailable
-* @stable ICU 2.0
-*/
-U_STABLE const char* U_EXPORT2 
-unum_getAvailable(int32_t index);
-
-/**
-* Determine how many locales have decimal formatting patterns available.  The
-* results of this call are not valid for rule-based number formats.
-* This function is useful for determining the loop ending condition for
-* calls to {@link #unum_getAvailable }.
-* @return The number of locales for which decimal formatting patterns are available.
-* @see unum_getAvailable
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2 
-unum_countAvailable(void);
-
-/** The possible UNumberFormat numeric attributes @stable ICU 2.0 */
-typedef enum UNumberFormatAttribute {
-  /** Parse integers only */
-  UNUM_PARSE_INT_ONLY,
-  /** Use grouping separator */
-  UNUM_GROUPING_USED,
-  /** Always show decimal point */
-  UNUM_DECIMAL_ALWAYS_SHOWN,
-  /** Maximum integer digits */
-  UNUM_MAX_INTEGER_DIGITS,
-  /** Minimum integer digits */
-  UNUM_MIN_INTEGER_DIGITS,
-  /** Integer digits */
-  UNUM_INTEGER_DIGITS,
-  /** Maximum fraction digits */
-  UNUM_MAX_FRACTION_DIGITS,
-  /** Minimum fraction digits */
-  UNUM_MIN_FRACTION_DIGITS,
-  /** Fraction digits */
-  UNUM_FRACTION_DIGITS,
-  /** Multiplier */
-  UNUM_MULTIPLIER,
-  /** Grouping size */
-  UNUM_GROUPING_SIZE,
-  /** Rounding Mode */
-  UNUM_ROUNDING_MODE,
-  /** Rounding increment */
-  UNUM_ROUNDING_INCREMENT,
-  /** The width to which the output of <code>format()</code> is padded. */
-  UNUM_FORMAT_WIDTH,
-  /** The position at which padding will take place. */
-  UNUM_PADDING_POSITION,
-  /** Secondary grouping size */
-  UNUM_SECONDARY_GROUPING_SIZE,
-  /** Use significant digits
-   * @stable ICU 3.0 */
-  UNUM_SIGNIFICANT_DIGITS_USED,
-  /** Minimum significant digits
-   * @stable ICU 3.0 */
-  UNUM_MIN_SIGNIFICANT_DIGITS,
-  /** Maximum significant digits
-   * @stable ICU 3.0 */
-  UNUM_MAX_SIGNIFICANT_DIGITS,
-  /** Lenient parse mode used by rule-based formats.
-   * @stable ICU 3.0
-   */
-  UNUM_LENIENT_PARSE
-} UNumberFormatAttribute;
-
-/**
-* Get a numeric attribute associated with a UNumberFormat.
-* An example of a numeric attribute is the number of integer digits a formatter will produce.
-* @param fmt The formatter to query.
-* @param attr The attribute to query; one of UNUM_PARSE_INT_ONLY, UNUM_GROUPING_USED,
-* UNUM_DECIMAL_ALWAYS_SHOWN, UNUM_MAX_INTEGER_DIGITS, UNUM_MIN_INTEGER_DIGITS, UNUM_INTEGER_DIGITS,
-* UNUM_MAX_FRACTION_DIGITS, UNUM_MIN_FRACTION_DIGITS, UNUM_FRACTION_DIGITS, UNUM_MULTIPLIER,
-* UNUM_GROUPING_SIZE, UNUM_ROUNDING_MODE, UNUM_FORMAT_WIDTH, UNUM_PADDING_POSITION, UNUM_SECONDARY_GROUPING_SIZE.
-* @return The value of attr.
-* @see unum_setAttribute
-* @see unum_getDoubleAttribute
-* @see unum_setDoubleAttribute
-* @see unum_getTextAttribute
-* @see unum_setTextAttribute
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2 
-unum_getAttribute(const UNumberFormat*          fmt,
-          UNumberFormatAttribute  attr);
-
-/**
-* Set a numeric attribute associated with a UNumberFormat.
-* An example of a numeric attribute is the number of integer digits a formatter will produce.  If the
-* formatter does not understand the attribute, the call is ignored.  Rule-based formatters only understand
-* the lenient-parse attribute.
-* @param fmt The formatter to set.
-* @param attr The attribute to set; one of UNUM_PARSE_INT_ONLY, UNUM_GROUPING_USED,
-* UNUM_DECIMAL_ALWAYS_SHOWN, UNUM_MAX_INTEGER_DIGITS, UNUM_MIN_INTEGER_DIGITS, UNUM_INTEGER_DIGITS,
-* UNUM_MAX_FRACTION_DIGITS, UNUM_MIN_FRACTION_DIGITS, UNUM_FRACTION_DIGITS, UNUM_MULTIPLIER,
-* UNUM_GROUPING_SIZE, UNUM_ROUNDING_MODE, UNUM_FORMAT_WIDTH, UNUM_PADDING_POSITION, UNUM_SECONDARY_GROUPING_SIZE,
-* or UNUM_LENIENT_PARSE.
-* @param newValue The new value of attr.
-* @see unum_getAttribute
-* @see unum_getDoubleAttribute
-* @see unum_setDoubleAttribute
-* @see unum_getTextAttribute
-* @see unum_setTextAttribute
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2 
-unum_setAttribute(    UNumberFormat*          fmt,
-            UNumberFormatAttribute  attr,
-            int32_t                 newValue);
-
-
-/**
-* Get a numeric attribute associated with a UNumberFormat.
-* An example of a numeric attribute is the number of integer digits a formatter will produce.
-* If the formatter does not understand the attribute, -1 is returned.
-* @param fmt The formatter to query.
-* @param attr The attribute to query; e.g. UNUM_ROUNDING_INCREMENT.
-* @return The value of attr.
-* @see unum_getAttribute
-* @see unum_setAttribute
-* @see unum_setDoubleAttribute
-* @see unum_getTextAttribute
-* @see unum_setTextAttribute
-* @stable ICU 2.0
-*/
-U_STABLE double U_EXPORT2 
-unum_getDoubleAttribute(const UNumberFormat*          fmt,
-          UNumberFormatAttribute  attr);
-
-/**
-* Set a numeric attribute associated with a UNumberFormat.
-* An example of a numeric attribute is the number of integer digits a formatter will produce.
-* If the formatter does not understand the attribute, this call is ignored.
-* @param fmt The formatter to set.
-* @param attr The attribute to set; e.g. UNUM_ROUNDING_INCREMENT.
-* @param newValue The new value of attr.
-* @see unum_getAttribute
-* @see unum_setAttribute
-* @see unum_getDoubleAttribute
-* @see unum_getTextAttribute
-* @see unum_setTextAttribute
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2 
-unum_setDoubleAttribute(    UNumberFormat*          fmt,
-            UNumberFormatAttribute  attr,
-            double                 newValue);
-
-/** The possible UNumberFormat text attributes @stable ICU 2.0*/
-typedef enum UNumberFormatTextAttribute {
-  /** Positive prefix */
-  UNUM_POSITIVE_PREFIX,
-  /** Positive suffix */
-  UNUM_POSITIVE_SUFFIX,
-  /** Negative prefix */
-  UNUM_NEGATIVE_PREFIX,
-  /** Negative suffix */
-  UNUM_NEGATIVE_SUFFIX,
-  /** The character used to pad to the format width. */
-  UNUM_PADDING_CHARACTER,
-  /** The ISO currency code */
-  UNUM_CURRENCY_CODE,
-  /**
-   * The default rule set.  This is only available with rule-based formatters.
-   * @stable ICU 3.0
-   */
-  UNUM_DEFAULT_RULESET,
-  /**
-   * The public rule sets.  This is only available with rule-based formatters.
-   * This is a read-only attribute.  The public rulesets are returned as a
-   * single string, with each ruleset name delimited by ';' (semicolon).
-   * @stable ICU 3.0
-   */
-  UNUM_PUBLIC_RULESETS
-} UNumberFormatTextAttribute;
-
-/**
-* Get a text attribute associated with a UNumberFormat.
-* An example of a text attribute is the suffix for positive numbers.  If the formatter
-* does not understand the attributre, U_UNSUPPORTED_ERROR is returned as the status.
-* Rule-based formatters only understand UNUM_DEFAULT_RULESET and UNUM_PUBLIC_RULESETS.
-* @param fmt The formatter to query.
-* @param tag The attribute to query; one of UNUM_POSITIVE_PREFIX, UNUM_POSITIVE_SUFFIX,
-* UNUM_NEGATIVE_PREFIX, UNUM_NEGATIVE_SUFFIX, UNUM_PADDING_CHARACTER, UNUM_CURRENCY_CODE,
-* UNUM_DEFAULT_RULESET, or UNUM_PUBLIC_RULESETS.
-* @param result A pointer to a buffer to receive the attribute.
-* @param resultLength The maximum size of result.
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The total buffer size needed; if greater than resultLength, the output was truncated.
-* @see unum_setTextAttribute
-* @see unum_getAttribute
-* @see unum_setAttribute
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2 
-unum_getTextAttribute(    const    UNumberFormat*                    fmt,
-            UNumberFormatTextAttribute      tag,
-            UChar*                            result,
-            int32_t                            resultLength,
-            UErrorCode*                        status);
-
-/**
-* Set a text attribute associated with a UNumberFormat.
-* An example of a text attribute is the suffix for positive numbers.  Rule-based formatters
-* only understand UNUM_DEFAULT_RULESET.
-* @param fmt The formatter to set.
-* @param tag The attribute to set; one of UNUM_POSITIVE_PREFIX, UNUM_POSITIVE_SUFFIX,
-* UNUM_NEGATIVE_PREFIX, UNUM_NEGATIVE_SUFFIX, UNUM_PADDING_CHARACTER, UNUM_CURRENCY_CODE,
-* or UNUM_DEFAULT_RULESET.
-* @param newValue The new value of attr.
-* @param newValueLength The length of newValue, or -1 if null-terminated.
-* @param status A pointer to an UErrorCode to receive any errors
-* @see unum_getTextAttribute
-* @see unum_getAttribute
-* @see unum_setAttribute
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2 
-unum_setTextAttribute(    UNumberFormat*                    fmt,
-            UNumberFormatTextAttribute      tag,
-            const    UChar*                            newValue,
-            int32_t                            newValueLength,
-            UErrorCode                        *status);
-
-/**
- * Extract the pattern from a UNumberFormat.  The pattern will follow
- * the DecimalFormat pattern syntax.
- * @param fmt The formatter to query.
- * @param isPatternLocalized TRUE if the pattern should be localized,
- * FALSE otherwise.  This is ignored if the formatter is a rule-based
- * formatter.
- * @param result A pointer to a buffer to receive the pattern.
- * @param resultLength The maximum size of result.
- * @param status A pointer to an input-output UErrorCode.
- * @return The total buffer size needed; if greater than resultLength,
- * the output was truncated.
- * @see unum_applyPattern
- * @see DecimalFormat
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-unum_toPattern(    const    UNumberFormat*          fmt,
-        UBool                  isPatternLocalized,
-        UChar*                  result,
-        int32_t                 resultLength,
-        UErrorCode*             status);
-
-
-/**
- * Constants for specifying a number format symbol.
- * @stable ICU 2.0
- */
-typedef enum UNumberFormatSymbol {
-  /** The decimal separator */
-  UNUM_DECIMAL_SEPARATOR_SYMBOL = 0,
-  /** The grouping separator */
-  UNUM_GROUPING_SEPARATOR_SYMBOL = 1,
-  /** The pattern separator */
-  UNUM_PATTERN_SEPARATOR_SYMBOL = 2,
-  /** The percent sign */
-  UNUM_PERCENT_SYMBOL = 3,
-  /** Zero*/
-  UNUM_ZERO_DIGIT_SYMBOL = 4,
-  /** Character representing a digit in the pattern */
-  UNUM_DIGIT_SYMBOL = 5,
-  /** The minus sign */
-  UNUM_MINUS_SIGN_SYMBOL = 6,
-  /** The plus sign */
-  UNUM_PLUS_SIGN_SYMBOL = 7,
-  /** The currency symbol */
-  UNUM_CURRENCY_SYMBOL = 8,
-  /** The international currency symbol */
-  UNUM_INTL_CURRENCY_SYMBOL = 9,
-  /** The monetary separator */
-  UNUM_MONETARY_SEPARATOR_SYMBOL = 10,
-  /** The exponential symbol */
-  UNUM_EXPONENTIAL_SYMBOL = 11,
-  /** Per mill symbol */
-  UNUM_PERMILL_SYMBOL = 12,
-  /** Escape padding character */
-  UNUM_PAD_ESCAPE_SYMBOL = 13,
-  /** Infinity symbol */
-  UNUM_INFINITY_SYMBOL = 14,
-  /** Nan symbol */
-  UNUM_NAN_SYMBOL = 15,
-  /** Significant digit symbol
-   * @stable ICU 3.0 */
-  UNUM_SIGNIFICANT_DIGIT_SYMBOL = 16,
-  /** The monetary grouping separator 
-   * @stable ICU 3.6
-   */
-  UNUM_MONETARY_GROUPING_SEPARATOR_SYMBOL = 17,  
-  /** count symbol constants */
-  UNUM_FORMAT_SYMBOL_COUNT = 18
-} UNumberFormatSymbol;
-
-/**
-* Get a symbol associated with a UNumberFormat.
-* A UNumberFormat uses symbols to represent the special locale-dependent
-* characters in a number, for example the percent sign. This API is not
-* supported for rule-based formatters.
-* @param fmt The formatter to query.
-* @param symbol The UNumberFormatSymbol constant for the symbol to get
-* @param buffer The string buffer that will receive the symbol string;
-*               if it is NULL, then only the length of the symbol is returned
-* @param size The size of the string buffer
-* @param status A pointer to an UErrorCode to receive any errors
-* @return The length of the symbol; the buffer is not modified if
-*         <code>length&gt;=size</code>
-* @see unum_setSymbol
-* @stable ICU 2.0
-*/
-U_STABLE int32_t U_EXPORT2
-unum_getSymbol(const UNumberFormat *fmt,
-               UNumberFormatSymbol symbol,
-               UChar *buffer,
-               int32_t size,
-               UErrorCode *status);
-
-/**
-* Set a symbol associated with a UNumberFormat.
-* A UNumberFormat uses symbols to represent the special locale-dependent
-* characters in a number, for example the percent sign.  This API is not
-* supported for rule-based formatters.
-* @param fmt The formatter to set.
-* @param symbol The UNumberFormatSymbol constant for the symbol to set
-* @param value The string to set the symbol to
-* @param length The length of the string, or -1 for a zero-terminated string
-* @param status A pointer to an UErrorCode to receive any errors.
-* @see unum_getSymbol
-* @stable ICU 2.0
-*/
-U_STABLE void U_EXPORT2
-unum_setSymbol(UNumberFormat *fmt,
-               UNumberFormatSymbol symbol,
-               const UChar *value,
-               int32_t length,
-               UErrorCode *status);
-
-
-/**
- * Get the locale for this number format object.
- * You can choose between valid and actual locale.
- * @param fmt The formatter to get the locale from
- * @param type type of the locale we're looking for (valid or actual) 
- * @param status error code for the operation
- * @return the locale name
- * @stable ICU 2.8
- */
-U_STABLE const char* U_EXPORT2
-unum_getLocaleByType(const UNumberFormat *fmt,
-                     ULocDataLocaleType type,
-                     UErrorCode* status); 
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/unum.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/unum.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/unum.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/unum.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,869 @@
+/*
+*******************************************************************************
+* Copyright (C) 1997-2008, International Business Machines Corporation and others.
+* All Rights Reserved.
+* Modification History:
+*
+*   Date        Name        Description
+*   06/24/99    helena      Integrated Alan's NF enhancements and Java2 bug fixes
+*******************************************************************************
+*/
+
+#ifndef _UNUM
+#define _UNUM
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/uloc.h"
+#include "unicode/umisc.h"
+#include "unicode/parseerr.h"
+/**
+ * \file
+ * \brief C API: NumberFormat
+ *
+ * <h2> Number Format C API </h2>
+ *
+ * Number Format C API  Provides functions for
+ * formatting and parsing a number.  Also provides methods for
+ * determining which locales have number formats, and what their names
+ * are.
+ * <P>
+ * UNumberFormat helps you to format and parse numbers for any locale.
+ * Your code can be completely independent of the locale conventions
+ * for decimal points, thousands-separators, or even the particular
+ * decimal digits used, or whether the number format is even decimal.
+ * There are different number format styles like decimal, currency,
+ * percent and spellout.
+ * <P>
+ * To format a number for the current Locale, use one of the static
+ * factory methods:
+ * <pre>
+ * \code
+ *    UChar myString[20];
+ *    double myNumber = 7.0;
+ *    UErrorCode status = U_ZERO_ERROR;
+ *    UNumberFormat* nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status);
+ *    unum_formatDouble(nf, myNumber, myString, 20, NULL, &status);
+ *    printf(" Example 1: %s\n", austrdup(myString) ); //austrdup( a function used to convert UChar* to char*)
+ * \endcode
+ * </pre>
+ * If you are formatting multiple numbers, it is more efficient to get
+ * the format and use it multiple times so that the system doesn't
+ * have to fetch the information about the local language and country
+ * conventions multiple times.
+ * <pre>
+ * \code
+ * uint32_t i, resultlength, reslenneeded;
+ * UErrorCode status = U_ZERO_ERROR;
+ * UFieldPosition pos;
+ * uint32_t a[] = { 123, 3333, -1234567 };
+ * const uint32_t a_len = sizeof(a) / sizeof(a[0]);
+ * UNumberFormat* nf;
+ * UChar* result = NULL;
+ *
+ * nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status);
+ * for (i = 0; i < a_len; i++) {
+ *    resultlength=0;
+ *    reslenneeded=unum_format(nf, a[i], NULL, resultlength, &pos, &status);
+ *    result = NULL;
+ *    if(status==U_BUFFER_OVERFLOW_ERROR){
+ *       status=U_ZERO_ERROR;
+ *       resultlength=reslenneeded+1;
+ *       result=(UChar*)malloc(sizeof(UChar) * resultlength);
+ *       unum_format(nf, a[i], result, resultlength, &pos, &status);
+ *    }
+ *    printf( " Example 2: %s\n", austrdup(result));
+ *    free(result);
+ * }
+ * \endcode
+ * </pre>
+ * To format a number for a different Locale, specify it in the
+ * call to unum_open().
+ * <pre>
+ * \code
+ *     UNumberFormat* nf = unum_open(UNUM_DEFAULT, NULL, -1, "fr_FR", NULL, &success)
+ * \endcode
+ * </pre>
+ * You can use a NumberFormat API unum_parse() to parse.
+ * <pre>
+ * \code
+ *    UErrorCode status = U_ZERO_ERROR;
+ *    int32_t pos=0;
+ *    int32_t num;
+ *    num = unum_parse(nf, str, u_strlen(str), &pos, &status);
+ * \endcode
+ * </pre>
+ * Use UNUM_DECIMAL to get the normal number format for that country.
+ * There are other static options available.  Use UNUM_CURRENCY
+ * to get the currency number format for that country.  Use UNUM_PERCENT
+ * to get a format for displaying percentages. With this format, a
+ * fraction from 0.53 is displayed as 53%.
+ * <P>
+ * Use a pattern to create either a DecimalFormat or a RuleBasedNumberFormat
+ * formatter.  The pattern must conform to the syntax defined for those
+ * formatters.
+ * <P>
+ * You can also control the display of numbers with such function as
+ * unum_getAttribues() and unum_setAtributes(), which let you set the
+ * miminum fraction digits, grouping, etc.
+ * @see UNumberFormatAttributes for more details
+ * <P>
+ * You can also use forms of the parse and format methods with
+ * ParsePosition and UFieldPosition to allow you to:
+ * <ul type=round>
+ *   <li>(a) progressively parse through pieces of a string.
+ *   <li>(b) align the decimal point and other areas.
+ * </ul>
+ * <p>
+ * It is also possible to change or set the symbols used for a particular
+ * locale like the currency symbol, the grouping seperator , monetary seperator
+ * etc by making use of functions unum_setSymbols() and unum_getSymbols().
+ */
+
+/** A number formatter.
+ *  For usage in C programs.
+ *  @stable ICU 2.0
+ */
+typedef void* UNumberFormat;
+
+/** The possible number format styles. 
+ *  @stable ICU 2.0
+ */
+typedef enum UNumberFormatStyle {
+    /**
+     * Decimal format defined by pattern 
+     * @stable ICU 3.0
+     */
+    UNUM_PATTERN_DECIMAL=0,
+    /** Decimal format */
+    UNUM_DECIMAL=1,
+    /** Currency format */
+    UNUM_CURRENCY,
+    /** Percent format */
+    UNUM_PERCENT,
+    /** Scientific format */
+    UNUM_SCIENTIFIC,
+    /** Spellout rule-based format */
+    UNUM_SPELLOUT,
+    /** 
+     * Ordinal rule-based format 
+     * @stable ICU 3.0
+     */
+    UNUM_ORDINAL,
+    /** 
+     * Duration rule-based format 
+     * @stable ICU 3.0
+     */
+    UNUM_DURATION,
+    /** 
+     * Rule-based format defined by pattern 
+     * @stable ICU 3.0
+     */
+    UNUM_PATTERN_RULEBASED,
+    /** Default format */
+    UNUM_DEFAULT = UNUM_DECIMAL,
+    /** (Alias for UNUM_PATTERN_DECIMAL) */
+    UNUM_IGNORE = UNUM_PATTERN_DECIMAL
+} UNumberFormatStyle;
+
+/** The possible number format rounding modes. 
+ *  @stable ICU 2.0
+ */
+typedef enum UNumberFormatRoundingMode {
+    UNUM_ROUND_CEILING,
+    UNUM_ROUND_FLOOR,
+    UNUM_ROUND_DOWN,
+    UNUM_ROUND_UP,
+    /**
+     * Half-even rounding, misspelled name
+     * @deprecated, ICU 3.8
+     */
+    UNUM_FOUND_HALFEVEN,
+    UNUM_ROUND_HALFDOWN,
+    UNUM_ROUND_HALFUP,
+    /**
+     * Half-even rounding
+     * @stable, ICU 3.8
+     */
+    UNUM_ROUND_HALFEVEN = UNUM_FOUND_HALFEVEN
+} UNumberFormatRoundingMode;
+
+/** The possible number format pad positions. 
+ *  @stable ICU 2.0
+ */
+typedef enum UNumberFormatPadPosition {
+    UNUM_PAD_BEFORE_PREFIX,
+    UNUM_PAD_AFTER_PREFIX,
+    UNUM_PAD_BEFORE_SUFFIX,
+    UNUM_PAD_AFTER_SUFFIX
+} UNumberFormatPadPosition;
+
+/**
+ * Create and return a new UNumberFormat for formatting and parsing
+ * numbers.  A UNumberFormat may be used to format numbers by calling
+ * {@link #unum_format }, and to parse numbers by calling {@link #unum_parse }.
+ * The caller must call {@link #unum_close } when done to release resources
+ * used by this object.
+ * @param style The type of number format to open: one of
+ * UNUM_DECIMAL, UNUM_CURRENCY, UNUM_PERCENT, UNUM_SCIENTIFIC, UNUM_SPELLOUT,
+ * UNUM_PATTERN_DECIMAL, UNUM_PATTERN_RULEBASED, or UNUM_DEFAULT.
+ * If UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED is passed then the
+ * number format is opened using the given pattern, which must conform
+ * to the syntax described in DecimalFormat or RuleBasedNumberFormat,
+ * respectively.
+ * @param pattern A pattern specifying the format to use. 
+ * This parameter is ignored unless the style is
+ * UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED.
+ * @param patternLength The number of characters in the pattern, or -1
+ * if null-terminated. This parameter is ignored unless the style is
+ * UNUM_PATTERN.
+ * @param locale A locale identifier to use to determine formatting
+ * and parsing conventions, or NULL to use the default locale.
+ * @param parseErr A pointer to a UParseError struct to receive the
+ * details of any parsing errors, or NULL if no parsing error details
+ * are desired.
+ * @param status A pointer to an input-output UErrorCode.
+ * @return A pointer to a newly created UNumberFormat, or NULL if an
+ * error occurred.
+ * @see unum_close
+ * @see DecimalFormat
+ * @stable ICU 2.0
+ */
+U_STABLE UNumberFormat* U_EXPORT2 
+unum_open(  UNumberFormatStyle    style,
+            const    UChar*    pattern,
+            int32_t            patternLength,
+            const    char*     locale,
+            UParseError*       parseErr,
+            UErrorCode*        status);
+
+
+/**
+* Close a UNumberFormat.
+* Once closed, a UNumberFormat may no longer be used.
+* @param fmt The formatter to close.
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2 
+unum_close(UNumberFormat* fmt);
+
+/**
+ * Open a copy of a UNumberFormat.
+ * This function performs a deep copy.
+ * @param fmt The format to copy
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UNumberFormat identical to fmt.
+ * @stable ICU 2.0
+ */
+U_STABLE UNumberFormat* U_EXPORT2 
+unum_clone(const UNumberFormat *fmt,
+       UErrorCode *status);
+
+/**
+* Format an integer using a UNumberFormat.
+* The integer will be formatted according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param number The number to format.
+* @param result A pointer to a buffer to receive the formatted number.
+* @param resultLength The maximum size of result.
+* @param pos    A pointer to a UFieldPosition.  On input, position->field
+* is read.  On output, position->beginIndex and position->endIndex indicate
+* the beginning and ending indices of field number position->field, if such
+* a field exists.  This parameter may be NULL, in which case no field
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see UFieldPosition
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2 
+unum_format(    const    UNumberFormat*    fmt,
+        int32_t            number,
+        UChar*            result,
+        int32_t            resultLength,
+        UFieldPosition    *pos,
+        UErrorCode*        status);
+
+/**
+* Format an int64 using a UNumberFormat.
+* The int64 will be formatted according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param number The number to format.
+* @param result A pointer to a buffer to receive the formatted number.
+* @param resultLength The maximum size of result.
+* @param pos    A pointer to a UFieldPosition.  On input, position->field
+* is read.  On output, position->beginIndex and position->endIndex indicate
+* the beginning and ending indices of field number position->field, if such
+* a field exists.  This parameter may be NULL, in which case no field
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_format
+* @see unum_formatDouble
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see UFieldPosition
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2 
+unum_formatInt64(const UNumberFormat *fmt,
+        int64_t         number,
+        UChar*          result,
+        int32_t         resultLength,
+        UFieldPosition *pos,
+        UErrorCode*     status);
+
+/**
+* Format a double using a UNumberFormat.
+* The double will be formatted according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param number The number to format.
+* @param result A pointer to a buffer to receive the formatted number.
+* @param resultLength The maximum size of result.
+* @param pos    A pointer to a UFieldPosition.  On input, position->field
+* is read.  On output, position->beginIndex and position->endIndex indicate
+* the beginning and ending indices of field number position->field, if such
+* a field exists.  This parameter may be NULL, in which case no field
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see UFieldPosition
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2 
+unum_formatDouble(    const    UNumberFormat*  fmt,
+            double          number,
+            UChar*          result,
+            int32_t         resultLength,
+            UFieldPosition  *pos, /* 0 if ignore */
+            UErrorCode*     status);
+
+/**
+ * Format a double currency amount using a UNumberFormat.
+ * The double will be formatted according to the UNumberFormat's locale.
+ * @param fmt the formatter to use
+ * @param number the number to format
+ * @param currency the 3-letter null-terminated ISO 4217 currency code
+ * @param result a pointer to the buffer to receive the formatted number
+ * @param resultLength the maximum number of UChars to write to result
+ * @param pos a pointer to a UFieldPosition.  On input,
+ * position->field is read.  On output, position->beginIndex and
+ * position->endIndex indicate the beginning and ending indices of
+ * field number position->field, if such a field exists.  This
+ * parameter may be NULL, in which case it is ignored.
+ * @param status a pointer to an input-output UErrorCode
+ * @return the total buffer size needed; if greater than resultLength,
+ * the output was truncated.
+ * @see unum_formatDouble
+ * @see unum_parseDoubleCurrency
+ * @see UFieldPosition
+ * @stable ICU 3.0
+ */
+U_STABLE int32_t U_EXPORT2 
+unum_formatDoubleCurrency(const UNumberFormat* fmt,
+                          double number,
+                          UChar* currency,
+                          UChar* result,
+                          int32_t resultLength,
+                          UFieldPosition* pos, /* ignored if 0 */
+                          UErrorCode* status);
+
+/**
+* Parse a string into an integer using a UNumberFormat.
+* The string will be parsed according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
+* to begin parsing.  If not 0, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The value of the parsed integer
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2 
+unum_parse(    const   UNumberFormat*  fmt,
+        const   UChar*          text,
+        int32_t         textLength,
+        int32_t         *parsePos /* 0 = start */,
+        UErrorCode      *status);
+
+/**
+* Parse a string into an int64 using a UNumberFormat.
+* The string will be parsed according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
+* to begin parsing.  If not 0, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The value of the parsed integer
+* @see unum_parse
+* @see unum_parseDouble
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @stable ICU 2.8
+*/
+U_STABLE int64_t U_EXPORT2 
+unum_parseInt64(const UNumberFormat*  fmt,
+        const UChar*  text,
+        int32_t       textLength,
+        int32_t       *parsePos /* 0 = start */,
+        UErrorCode    *status);
+
+/**
+* Parse a string into a double using a UNumberFormat.
+* The string will be parsed according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
+* to begin parsing.  If not 0, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The value of the parsed double
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @stable ICU 2.0
+*/
+U_STABLE double U_EXPORT2 
+unum_parseDouble(    const   UNumberFormat*  fmt,
+            const   UChar*          text,
+            int32_t         textLength,
+            int32_t         *parsePos /* 0 = start */,
+            UErrorCode      *status);
+
+/**
+ * Parse a string into a double and a currency using a UNumberFormat.
+ * The string will be parsed according to the UNumberFormat's locale.
+ * @param fmt the formatter to use
+ * @param text the text to parse
+ * @param textLength the length of text, or -1 if null-terminated
+ * @param parsePos a pointer to an offset index into text at which to
+ * begin parsing. On output, *parsePos will point after the last
+ * parsed character.  This parameter may be 0, in which case parsing
+ * begins at offset 0.
+ * @param currency a pointer to the buffer to receive the parsed null-
+ * terminated currency.  This buffer must have a capacity of at least
+ * 4 UChars.
+ * @param status a pointer to an input-output UErrorCode
+ * @return the parsed double
+ * @see unum_parseDouble
+ * @see unum_formatDoubleCurrency
+ * @stable ICU 3.0
+ */
+U_STABLE double U_EXPORT2
+unum_parseDoubleCurrency(const UNumberFormat* fmt,
+                         const UChar* text,
+                         int32_t textLength,
+                         int32_t* parsePos, /* 0 = start */
+                         UChar* currency,
+                         UErrorCode* status);
+
+/**
+ * Set the pattern used by a UNumberFormat.  This can only be used
+ * on a DecimalFormat, other formats return U_ILLEGAL_ARGUMENT_ERROR
+ * in the status.
+ * @param format The formatter to set.
+ * @param localized TRUE if the pattern is localized, FALSE otherwise.
+ * @param pattern The new pattern
+ * @param patternLength The length of pattern, or -1 if null-terminated.
+ * @param parseError A pointer to UParseError to recieve information
+ * about errors occurred during parsing, or NULL if no parse error
+ * information is desired.
+ * @param status A pointer to an input-output UErrorCode.
+ * @see unum_toPattern
+ * @see DecimalFormat
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+unum_applyPattern(          UNumberFormat  *format,
+                            UBool          localized,
+                    const   UChar          *pattern,
+                            int32_t         patternLength,
+                            UParseError    *parseError,
+                            UErrorCode     *status
+                                    );
+
+/**
+* Get a locale for which decimal formatting patterns are available.
+* A UNumberFormat in a locale returned by this function will perform the correct
+* formatting and parsing for the locale.  The results of this call are not
+* valid for rule-based number formats.
+* @param index The index of the desired locale.
+* @return A locale for which number formatting patterns are available, or 0 if none.
+* @see unum_countAvailable
+* @stable ICU 2.0
+*/
+U_STABLE const char* U_EXPORT2 
+unum_getAvailable(int32_t index);
+
+/**
+* Determine how many locales have decimal formatting patterns available.  The
+* results of this call are not valid for rule-based number formats.
+* This function is useful for determining the loop ending condition for
+* calls to {@link #unum_getAvailable }.
+* @return The number of locales for which decimal formatting patterns are available.
+* @see unum_getAvailable
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2 
+unum_countAvailable(void);
+
+/** The possible UNumberFormat numeric attributes @stable ICU 2.0 */
+typedef enum UNumberFormatAttribute {
+  /** Parse integers only */
+  UNUM_PARSE_INT_ONLY,
+  /** Use grouping separator */
+  UNUM_GROUPING_USED,
+  /** Always show decimal point */
+  UNUM_DECIMAL_ALWAYS_SHOWN,
+  /** Maximum integer digits */
+  UNUM_MAX_INTEGER_DIGITS,
+  /** Minimum integer digits */
+  UNUM_MIN_INTEGER_DIGITS,
+  /** Integer digits */
+  UNUM_INTEGER_DIGITS,
+  /** Maximum fraction digits */
+  UNUM_MAX_FRACTION_DIGITS,
+  /** Minimum fraction digits */
+  UNUM_MIN_FRACTION_DIGITS,
+  /** Fraction digits */
+  UNUM_FRACTION_DIGITS,
+  /** Multiplier */
+  UNUM_MULTIPLIER,
+  /** Grouping size */
+  UNUM_GROUPING_SIZE,
+  /** Rounding Mode */
+  UNUM_ROUNDING_MODE,
+  /** Rounding increment */
+  UNUM_ROUNDING_INCREMENT,
+  /** The width to which the output of <code>format()</code> is padded. */
+  UNUM_FORMAT_WIDTH,
+  /** The position at which padding will take place. */
+  UNUM_PADDING_POSITION,
+  /** Secondary grouping size */
+  UNUM_SECONDARY_GROUPING_SIZE,
+  /** Use significant digits
+   * @stable ICU 3.0 */
+  UNUM_SIGNIFICANT_DIGITS_USED,
+  /** Minimum significant digits
+   * @stable ICU 3.0 */
+  UNUM_MIN_SIGNIFICANT_DIGITS,
+  /** Maximum significant digits
+   * @stable ICU 3.0 */
+  UNUM_MAX_SIGNIFICANT_DIGITS,
+  /** Lenient parse mode used by rule-based formats.
+   * @stable ICU 3.0
+   */
+  UNUM_LENIENT_PARSE
+} UNumberFormatAttribute;
+
+/**
+* Get a numeric attribute associated with a UNumberFormat.
+* An example of a numeric attribute is the number of integer digits a formatter will produce.
+* @param fmt The formatter to query.
+* @param attr The attribute to query; one of UNUM_PARSE_INT_ONLY, UNUM_GROUPING_USED,
+* UNUM_DECIMAL_ALWAYS_SHOWN, UNUM_MAX_INTEGER_DIGITS, UNUM_MIN_INTEGER_DIGITS, UNUM_INTEGER_DIGITS,
+* UNUM_MAX_FRACTION_DIGITS, UNUM_MIN_FRACTION_DIGITS, UNUM_FRACTION_DIGITS, UNUM_MULTIPLIER,
+* UNUM_GROUPING_SIZE, UNUM_ROUNDING_MODE, UNUM_FORMAT_WIDTH, UNUM_PADDING_POSITION, UNUM_SECONDARY_GROUPING_SIZE.
+* @return The value of attr.
+* @see unum_setAttribute
+* @see unum_getDoubleAttribute
+* @see unum_setDoubleAttribute
+* @see unum_getTextAttribute
+* @see unum_setTextAttribute
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2 
+unum_getAttribute(const UNumberFormat*          fmt,
+          UNumberFormatAttribute  attr);
+
+/**
+* Set a numeric attribute associated with a UNumberFormat.
+* An example of a numeric attribute is the number of integer digits a formatter will produce.  If the
+* formatter does not understand the attribute, the call is ignored.  Rule-based formatters only understand
+* the lenient-parse attribute.
+* @param fmt The formatter to set.
+* @param attr The attribute to set; one of UNUM_PARSE_INT_ONLY, UNUM_GROUPING_USED,
+* UNUM_DECIMAL_ALWAYS_SHOWN, UNUM_MAX_INTEGER_DIGITS, UNUM_MIN_INTEGER_DIGITS, UNUM_INTEGER_DIGITS,
+* UNUM_MAX_FRACTION_DIGITS, UNUM_MIN_FRACTION_DIGITS, UNUM_FRACTION_DIGITS, UNUM_MULTIPLIER,
+* UNUM_GROUPING_SIZE, UNUM_ROUNDING_MODE, UNUM_FORMAT_WIDTH, UNUM_PADDING_POSITION, UNUM_SECONDARY_GROUPING_SIZE,
+* or UNUM_LENIENT_PARSE.
+* @param newValue The new value of attr.
+* @see unum_getAttribute
+* @see unum_getDoubleAttribute
+* @see unum_setDoubleAttribute
+* @see unum_getTextAttribute
+* @see unum_setTextAttribute
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2 
+unum_setAttribute(    UNumberFormat*          fmt,
+            UNumberFormatAttribute  attr,
+            int32_t                 newValue);
+
+
+/**
+* Get a numeric attribute associated with a UNumberFormat.
+* An example of a numeric attribute is the number of integer digits a formatter will produce.
+* If the formatter does not understand the attribute, -1 is returned.
+* @param fmt The formatter to query.
+* @param attr The attribute to query; e.g. UNUM_ROUNDING_INCREMENT.
+* @return The value of attr.
+* @see unum_getAttribute
+* @see unum_setAttribute
+* @see unum_setDoubleAttribute
+* @see unum_getTextAttribute
+* @see unum_setTextAttribute
+* @stable ICU 2.0
+*/
+U_STABLE double U_EXPORT2 
+unum_getDoubleAttribute(const UNumberFormat*          fmt,
+          UNumberFormatAttribute  attr);
+
+/**
+* Set a numeric attribute associated with a UNumberFormat.
+* An example of a numeric attribute is the number of integer digits a formatter will produce.
+* If the formatter does not understand the attribute, this call is ignored.
+* @param fmt The formatter to set.
+* @param attr The attribute to set; e.g. UNUM_ROUNDING_INCREMENT.
+* @param newValue The new value of attr.
+* @see unum_getAttribute
+* @see unum_setAttribute
+* @see unum_getDoubleAttribute
+* @see unum_getTextAttribute
+* @see unum_setTextAttribute
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2 
+unum_setDoubleAttribute(    UNumberFormat*          fmt,
+            UNumberFormatAttribute  attr,
+            double                 newValue);
+
+/** The possible UNumberFormat text attributes @stable ICU 2.0*/
+typedef enum UNumberFormatTextAttribute {
+  /** Positive prefix */
+  UNUM_POSITIVE_PREFIX,
+  /** Positive suffix */
+  UNUM_POSITIVE_SUFFIX,
+  /** Negative prefix */
+  UNUM_NEGATIVE_PREFIX,
+  /** Negative suffix */
+  UNUM_NEGATIVE_SUFFIX,
+  /** The character used to pad to the format width. */
+  UNUM_PADDING_CHARACTER,
+  /** The ISO currency code */
+  UNUM_CURRENCY_CODE,
+  /**
+   * The default rule set.  This is only available with rule-based formatters.
+   * @stable ICU 3.0
+   */
+  UNUM_DEFAULT_RULESET,
+  /**
+   * The public rule sets.  This is only available with rule-based formatters.
+   * This is a read-only attribute.  The public rulesets are returned as a
+   * single string, with each ruleset name delimited by ';' (semicolon).
+   * @stable ICU 3.0
+   */
+  UNUM_PUBLIC_RULESETS
+} UNumberFormatTextAttribute;
+
+/**
+* Get a text attribute associated with a UNumberFormat.
+* An example of a text attribute is the suffix for positive numbers.  If the formatter
+* does not understand the attributre, U_UNSUPPORTED_ERROR is returned as the status.
+* Rule-based formatters only understand UNUM_DEFAULT_RULESET and UNUM_PUBLIC_RULESETS.
+* @param fmt The formatter to query.
+* @param tag The attribute to query; one of UNUM_POSITIVE_PREFIX, UNUM_POSITIVE_SUFFIX,
+* UNUM_NEGATIVE_PREFIX, UNUM_NEGATIVE_SUFFIX, UNUM_PADDING_CHARACTER, UNUM_CURRENCY_CODE,
+* UNUM_DEFAULT_RULESET, or UNUM_PUBLIC_RULESETS.
+* @param result A pointer to a buffer to receive the attribute.
+* @param resultLength The maximum size of result.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_setTextAttribute
+* @see unum_getAttribute
+* @see unum_setAttribute
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2 
+unum_getTextAttribute(    const    UNumberFormat*                    fmt,
+            UNumberFormatTextAttribute      tag,
+            UChar*                            result,
+            int32_t                            resultLength,
+            UErrorCode*                        status);
+
+/**
+* Set a text attribute associated with a UNumberFormat.
+* An example of a text attribute is the suffix for positive numbers.  Rule-based formatters
+* only understand UNUM_DEFAULT_RULESET.
+* @param fmt The formatter to set.
+* @param tag The attribute to set; one of UNUM_POSITIVE_PREFIX, UNUM_POSITIVE_SUFFIX,
+* UNUM_NEGATIVE_PREFIX, UNUM_NEGATIVE_SUFFIX, UNUM_PADDING_CHARACTER, UNUM_CURRENCY_CODE,
+* or UNUM_DEFAULT_RULESET.
+* @param newValue The new value of attr.
+* @param newValueLength The length of newValue, or -1 if null-terminated.
+* @param status A pointer to an UErrorCode to receive any errors
+* @see unum_getTextAttribute
+* @see unum_getAttribute
+* @see unum_setAttribute
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2 
+unum_setTextAttribute(    UNumberFormat*                    fmt,
+            UNumberFormatTextAttribute      tag,
+            const    UChar*                            newValue,
+            int32_t                            newValueLength,
+            UErrorCode                        *status);
+
+/**
+ * Extract the pattern from a UNumberFormat.  The pattern will follow
+ * the DecimalFormat pattern syntax.
+ * @param fmt The formatter to query.
+ * @param isPatternLocalized TRUE if the pattern should be localized,
+ * FALSE otherwise.  This is ignored if the formatter is a rule-based
+ * formatter.
+ * @param result A pointer to a buffer to receive the pattern.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to an input-output UErrorCode.
+ * @return The total buffer size needed; if greater than resultLength,
+ * the output was truncated.
+ * @see unum_applyPattern
+ * @see DecimalFormat
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+unum_toPattern(    const    UNumberFormat*          fmt,
+        UBool                  isPatternLocalized,
+        UChar*                  result,
+        int32_t                 resultLength,
+        UErrorCode*             status);
+
+
+/**
+ * Constants for specifying a number format symbol.
+ * @stable ICU 2.0
+ */
+typedef enum UNumberFormatSymbol {
+  /** The decimal separator */
+  UNUM_DECIMAL_SEPARATOR_SYMBOL = 0,
+  /** The grouping separator */
+  UNUM_GROUPING_SEPARATOR_SYMBOL = 1,
+  /** The pattern separator */
+  UNUM_PATTERN_SEPARATOR_SYMBOL = 2,
+  /** The percent sign */
+  UNUM_PERCENT_SYMBOL = 3,
+  /** Zero*/
+  UNUM_ZERO_DIGIT_SYMBOL = 4,
+  /** Character representing a digit in the pattern */
+  UNUM_DIGIT_SYMBOL = 5,
+  /** The minus sign */
+  UNUM_MINUS_SIGN_SYMBOL = 6,
+  /** The plus sign */
+  UNUM_PLUS_SIGN_SYMBOL = 7,
+  /** The currency symbol */
+  UNUM_CURRENCY_SYMBOL = 8,
+  /** The international currency symbol */
+  UNUM_INTL_CURRENCY_SYMBOL = 9,
+  /** The monetary separator */
+  UNUM_MONETARY_SEPARATOR_SYMBOL = 10,
+  /** The exponential symbol */
+  UNUM_EXPONENTIAL_SYMBOL = 11,
+  /** Per mill symbol */
+  UNUM_PERMILL_SYMBOL = 12,
+  /** Escape padding character */
+  UNUM_PAD_ESCAPE_SYMBOL = 13,
+  /** Infinity symbol */
+  UNUM_INFINITY_SYMBOL = 14,
+  /** Nan symbol */
+  UNUM_NAN_SYMBOL = 15,
+  /** Significant digit symbol
+   * @stable ICU 3.0 */
+  UNUM_SIGNIFICANT_DIGIT_SYMBOL = 16,
+  /** The monetary grouping separator 
+   * @stable ICU 3.6
+   */
+  UNUM_MONETARY_GROUPING_SEPARATOR_SYMBOL = 17,  
+  /** count symbol constants */
+  UNUM_FORMAT_SYMBOL_COUNT = 18
+} UNumberFormatSymbol;
+
+/**
+* Get a symbol associated with a UNumberFormat.
+* A UNumberFormat uses symbols to represent the special locale-dependent
+* characters in a number, for example the percent sign. This API is not
+* supported for rule-based formatters.
+* @param fmt The formatter to query.
+* @param symbol The UNumberFormatSymbol constant for the symbol to get
+* @param buffer The string buffer that will receive the symbol string;
+*               if it is NULL, then only the length of the symbol is returned
+* @param size The size of the string buffer
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The length of the symbol; the buffer is not modified if
+*         <code>length&gt;=size</code>
+* @see unum_setSymbol
+* @stable ICU 2.0
+*/
+U_STABLE int32_t U_EXPORT2
+unum_getSymbol(const UNumberFormat *fmt,
+               UNumberFormatSymbol symbol,
+               UChar *buffer,
+               int32_t size,
+               UErrorCode *status);
+
+/**
+* Set a symbol associated with a UNumberFormat.
+* A UNumberFormat uses symbols to represent the special locale-dependent
+* characters in a number, for example the percent sign.  This API is not
+* supported for rule-based formatters.
+* @param fmt The formatter to set.
+* @param symbol The UNumberFormatSymbol constant for the symbol to set
+* @param value The string to set the symbol to
+* @param length The length of the string, or -1 for a zero-terminated string
+* @param status A pointer to an UErrorCode to receive any errors.
+* @see unum_getSymbol
+* @stable ICU 2.0
+*/
+U_STABLE void U_EXPORT2
+unum_setSymbol(UNumberFormat *fmt,
+               UNumberFormatSymbol symbol,
+               const UChar *value,
+               int32_t length,
+               UErrorCode *status);
+
+
+/**
+ * Get the locale for this number format object.
+ * You can choose between valid and actual locale.
+ * @param fmt The formatter to get the locale from
+ * @param type type of the locale we're looking for (valid or actual) 
+ * @param status error code for the operation
+ * @return the locale name
+ * @stable ICU 2.8
+ */
+U_STABLE const char* U_EXPORT2
+unum_getLocaleByType(const UNumberFormat *fmt,
+                     ULocDataLocaleType type,
+                     UErrorCode* status); 
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/uobject.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uobject.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uobject.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,308 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 2002-2007, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-******************************************************************************
-*   file name:  uobject.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2002jun26
-*   created by: Markus W. Scherer
-*/
-
-#ifndef __UOBJECT_H__
-#define __UOBJECT_H__
-
-#include "unicode/utypes.h"
-
-U_NAMESPACE_BEGIN
-
-/**
- * \file
- * \brief C++ API: Common ICU base class UObject.
- */
-
-/**  U_OVERRIDE_CXX_ALLOCATION - Define this to override operator new and
- *                               delete in UMemory. Enabled by default for ICU.
- *
- *         Enabling forces all allocation of ICU object types to use ICU's
- *         memory allocation. On Windows, this allows the ICU DLL to be used by
- *         applications that statically link the C Runtime library, meaning that
- *         the app and ICU will be using different heaps.
- *
- * @stable ICU 2.2
- */                              
-#ifndef U_OVERRIDE_CXX_ALLOCATION
-#define U_OVERRIDE_CXX_ALLOCATION 1
-#endif
-
-/**  U_HAVE_PLACEMENT_NEW - Define this to define the placement new and
- *                          delete in UMemory for STL.
- *
- * @stable ICU 2.6
- */                              
-#ifndef U_HAVE_PLACEMENT_NEW
-#define U_HAVE_PLACEMENT_NEW 1
-#endif
-
-
-/**  U_HAVE_DEBUG_LOCATION_NEW - Define this to define the MFC debug
- * version of the operator new.
- *
- * @stable ICU 3.4
- */                              
-#ifndef U_HAVE_DEBUG_LOCATION_NEW
-#define U_HAVE_DEBUG_LOCATION_NEW 0
-#endif
-
-/**
- * UMemory is the common ICU base class.
- * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
- *
- * This is primarily to make it possible and simple to override the
- * C++ memory management by adding new/delete operators to this base class.
- *
- * To override ALL ICU memory management, including that from plain C code,
- * replace the allocation functions declared in cmemory.h
- *
- * UMemory does not contain any virtual functions.
- * Common "boilerplate" functions are defined in UObject.
- *
- * @stable ICU 2.4
- */
-class U_COMMON_API UMemory {
-public:
-
-#if U_OVERRIDE_CXX_ALLOCATION
-    /**
-     * Override for ICU4C C++ memory management.
-     * simple, non-class types are allocated using the macros in common/cmemory.h
-     * (uprv_malloc(), uprv_free(), uprv_realloc());
-     * they or something else could be used here to implement C++ new/delete
-     * for ICU4C C++ classes
-     * @stable ICU 2.4
-     */
-    static void * U_EXPORT2 operator new(size_t size);
-
-    /**
-     * Override for ICU4C C++ memory management.
-     * See new().
-     * @stable ICU 2.4
-     */
-    static void * U_EXPORT2 operator new[](size_t size);
-
-    /**
-     * Override for ICU4C C++ memory management.
-     * simple, non-class types are allocated using the macros in common/cmemory.h
-     * (uprv_malloc(), uprv_free(), uprv_realloc());
-     * they or something else could be used here to implement C++ new/delete
-     * for ICU4C C++ classes
-     * @stable ICU 2.4
-     */
-    static void U_EXPORT2 operator delete(void *p);
-
-    /**
-     * Override for ICU4C C++ memory management.
-     * See delete().
-     * @stable ICU 2.4
-     */
-    static void U_EXPORT2 operator delete[](void *p);
-
-#if U_HAVE_PLACEMENT_NEW
-    /**
-     * Override for ICU4C C++ memory management for STL.
-     * See new().
-     * @stable ICU 2.6
-     */
-    static inline void * U_EXPORT2 operator new(size_t, void *ptr) { return ptr; }
-
-    /**
-     * Override for ICU4C C++ memory management for STL.
-     * See delete().
-     * @stable ICU 2.6
-     */
-    static inline void U_EXPORT2 operator delete(void *, void *) {}
-#endif /* U_HAVE_PLACEMENT_NEW */
-#if U_HAVE_DEBUG_LOCATION_NEW
-    /**
-      * This method overrides the MFC debug version of the operator new
-      * 
-      * @param size   The requested memory size
-      * @param file   The file where the allocation was requested
-      * @param line   The line where the allocation was requested 
-      */ 
-    static void * U_EXPORT2 operator new(size_t size, const char* file, int line);
-    /**
-      * This method provides a matching delete for the MFC debug new
-      * 
-      * @param p      The pointer to the allocated memory
-      * @param file   The file where the allocation was requested
-      * @param line   The line where the allocation was requested 
-      */ 
-    static void U_EXPORT2 operator delete(void* p, const char* file, int line);
-#endif /* U_HAVE_DEBUG_LOCATION_NEW */
-#endif /* U_OVERRIDE_CXX_ALLOCATION */
-
-    /*
-     * Assignment operator not declared. The compiler will provide one
-     * which does nothing since this class does not contain any data members.
-     * API/code coverage may show the assignment operator as present and
-     * untested - ignore.
-     * Subclasses need this assignment operator if they use compiler-provided
-     * assignment operators of their own. An alternative to not declaring one
-     * here would be to declare and empty-implement a protected or public one.
-    UMemory &UMemory::operator=(const UMemory &);
-     */
-};
-
-/**
- * UObject is the common ICU "boilerplate" class.
- * UObject inherits UMemory (starting with ICU 2.4),
- * and all other public ICU C++ classes
- * are derived from UObject (starting with ICU 2.2).
- *
- * UObject contains common virtual functions like for ICU's "poor man's RTTI".
- * It does not contain default implementations of virtual methods
- * like getDynamicClassID to allow derived classes such as Format
- * to declare these as pure virtual.
- *
- * The clone() function is not available in UObject because it is not
- * implemented by all ICU classes.
- * Many ICU services provide a clone() function for their class trees,
- * defined on the service's C++ base class, and all subclasses within that
- * service class tree return a pointer to the service base class
- * (which itself is a subclass of UObject).
- * This is because some compilers do not support covariant (same-as-this)
- * return types; cast to the appropriate subclass if necessary.
- *
- * @stable ICU 2.2
- */
-class U_COMMON_API UObject : public UMemory {
-public:
-    /**
-     * Destructor.
-     *
-     * @stable ICU 2.2
-     */
-    virtual ~UObject();
-
-    /**
-     * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
-     *
-     * @stable ICU 2.2
-     */
-    virtual UClassID getDynamicClassID() const = 0;
-
-protected:
-    // the following functions are protected to prevent instantiation and
-    // direct use of UObject itself
-
-    // default constructor
-    // commented out because UObject is abstract (see getDynamicClassID)
-    // inline UObject() {}
-
-    // copy constructor
-    // commented out because UObject is abstract (see getDynamicClassID)
-    // inline UObject(const UObject &other) {}
-
-#if 0
-    // TODO Sometime in the future. Implement operator==().
-    // (This comment inserted in 2.2)
-    // some or all of the following "boilerplate" functions may be made public
-    // in a future ICU4C release when all subclasses implement them
-
-    // assignment operator
-    // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
-    // commented out because the implementation is the same as a compiler's default
-    // UObject &operator=(const UObject &other) { return *this; }
-
-    // comparison operators
-    virtual inline UBool operator==(const UObject &other) const { return this==&other; }
-    inline UBool operator!=(const UObject &other) const { return !operator==(other); }
-
-    // clone() commented out from the base class:
-    // some compilers do not support co-variant return types
-    // (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
-    // see also UObject class documentation.
-    // virtual UObject *clone() const;
-#endif
-
-    /*
-     * Assignment operator not declared. The compiler will provide one
-     * which does nothing since this class does not contain any data members.
-     * API/code coverage may show the assignment operator as present and
-     * untested - ignore.
-     * Subclasses need this assignment operator if they use compiler-provided
-     * assignment operators of their own. An alternative to not declaring one
-     * here would be to declare and empty-implement a protected or public one.
-    UObject &UObject::operator=(const UObject &);
-     */
-
-// Future implementation for RTTI that support subtyping. [alan]
-// 
-//  public:
-//     /**
-//      * @internal
-//      */
-//     static UClassID getStaticClassID();
-// 
-//     /**
-//      * @internal
-//      */
-//     UBool instanceOf(UClassID type) const;
-};
-
-/**
- * This is a simple macro to add ICU RTTI to an ICU object implementation.
- * This does not go into the header. This should only be used in *.cpp files.
- *
- * @param myClass The name of the class that needs RTTI defined.
- * @internal
- */
-#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
-    UClassID U_EXPORT2 myClass::getStaticClassID() { \
-        static char classID = 0; \
-        return (UClassID)&classID; \
-    } \
-    UClassID myClass::getDynamicClassID() const \
-    { return myClass::getStaticClassID(); }
-
-
-/**
- * This macro adds ICU RTTI to an ICU abstract class implementation.
- * This macro should be invoked in *.cpp files.  The corresponding
- * header should declare getStaticClassID.
- *
- * @param myClass The name of the class that needs RTTI defined.
- * @internal
- */
-#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
-    UClassID U_EXPORT2 myClass::getStaticClassID() { \
-        static char classID = 0; \
-        return (UClassID)&classID; \
-    }
-
-// /**
-//  * This macro adds ICU RTTI to an ICU concrete class implementation.
-//  * This macro should be invoked in *.cpp files.  The corresponding
-//  * header should declare getDynamicClassID and getStaticClassID.
-//  *
-//  * @param myClass The name of the class that needs RTTI defined.
-//  * @param myParent The name of the myClass's parent.
-//  * @internal
-//  */
-/*#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass, myParent) \
-    UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass, myParent) \
-    UClassID myClass::getDynamicClassID() const { \
-        return myClass::getStaticClassID(); \
-    }
-*/
-
-
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/uobject.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uobject.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uobject.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uobject.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,308 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2002-2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  uobject.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002jun26
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UOBJECT_H__
+#define __UOBJECT_H__
+
+#include "unicode/utypes.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \file
+ * \brief C++ API: Common ICU base class UObject.
+ */
+
+/**  U_OVERRIDE_CXX_ALLOCATION - Define this to override operator new and
+ *                               delete in UMemory. Enabled by default for ICU.
+ *
+ *         Enabling forces all allocation of ICU object types to use ICU's
+ *         memory allocation. On Windows, this allows the ICU DLL to be used by
+ *         applications that statically link the C Runtime library, meaning that
+ *         the app and ICU will be using different heaps.
+ *
+ * @stable ICU 2.2
+ */                              
+#ifndef U_OVERRIDE_CXX_ALLOCATION
+#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+
+/**  U_HAVE_PLACEMENT_NEW - Define this to define the placement new and
+ *                          delete in UMemory for STL.
+ *
+ * @stable ICU 2.6
+ */                              
+#ifndef U_HAVE_PLACEMENT_NEW
+#define U_HAVE_PLACEMENT_NEW 1
+#endif
+
+
+/**  U_HAVE_DEBUG_LOCATION_NEW - Define this to define the MFC debug
+ * version of the operator new.
+ *
+ * @stable ICU 3.4
+ */                              
+#ifndef U_HAVE_DEBUG_LOCATION_NEW
+#define U_HAVE_DEBUG_LOCATION_NEW 0
+#endif
+
+/**
+ * UMemory is the common ICU base class.
+ * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
+ *
+ * This is primarily to make it possible and simple to override the
+ * C++ memory management by adding new/delete operators to this base class.
+ *
+ * To override ALL ICU memory management, including that from plain C code,
+ * replace the allocation functions declared in cmemory.h
+ *
+ * UMemory does not contain any virtual functions.
+ * Common "boilerplate" functions are defined in UObject.
+ *
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UMemory {
+public:
+
+#if U_OVERRIDE_CXX_ALLOCATION
+    /**
+     * Override for ICU4C C++ memory management.
+     * simple, non-class types are allocated using the macros in common/cmemory.h
+     * (uprv_malloc(), uprv_free(), uprv_realloc());
+     * they or something else could be used here to implement C++ new/delete
+     * for ICU4C C++ classes
+     * @stable ICU 2.4
+     */
+    static void * U_EXPORT2 operator new(size_t size);
+
+    /**
+     * Override for ICU4C C++ memory management.
+     * See new().
+     * @stable ICU 2.4
+     */
+    static void * U_EXPORT2 operator new[](size_t size);
+
+    /**
+     * Override for ICU4C C++ memory management.
+     * simple, non-class types are allocated using the macros in common/cmemory.h
+     * (uprv_malloc(), uprv_free(), uprv_realloc());
+     * they or something else could be used here to implement C++ new/delete
+     * for ICU4C C++ classes
+     * @stable ICU 2.4
+     */
+    static void U_EXPORT2 operator delete(void *p);
+
+    /**
+     * Override for ICU4C C++ memory management.
+     * See delete().
+     * @stable ICU 2.4
+     */
+    static void U_EXPORT2 operator delete[](void *p);
+
+#if U_HAVE_PLACEMENT_NEW
+    /**
+     * Override for ICU4C C++ memory management for STL.
+     * See new().
+     * @stable ICU 2.6
+     */
+    static inline void * U_EXPORT2 operator new(size_t, void *ptr) { return ptr; }
+
+    /**
+     * Override for ICU4C C++ memory management for STL.
+     * See delete().
+     * @stable ICU 2.6
+     */
+    static inline void U_EXPORT2 operator delete(void *, void *) {}
+#endif /* U_HAVE_PLACEMENT_NEW */
+#if U_HAVE_DEBUG_LOCATION_NEW
+    /**
+      * This method overrides the MFC debug version of the operator new
+      * 
+      * @param size   The requested memory size
+      * @param file   The file where the allocation was requested
+      * @param line   The line where the allocation was requested 
+      */ 
+    static void * U_EXPORT2 operator new(size_t size, const char* file, int line);
+    /**
+      * This method provides a matching delete for the MFC debug new
+      * 
+      * @param p      The pointer to the allocated memory
+      * @param file   The file where the allocation was requested
+      * @param line   The line where the allocation was requested 
+      */ 
+    static void U_EXPORT2 operator delete(void* p, const char* file, int line);
+#endif /* U_HAVE_DEBUG_LOCATION_NEW */
+#endif /* U_OVERRIDE_CXX_ALLOCATION */
+
+    /*
+     * Assignment operator not declared. The compiler will provide one
+     * which does nothing since this class does not contain any data members.
+     * API/code coverage may show the assignment operator as present and
+     * untested - ignore.
+     * Subclasses need this assignment operator if they use compiler-provided
+     * assignment operators of their own. An alternative to not declaring one
+     * here would be to declare and empty-implement a protected or public one.
+    UMemory &UMemory::operator=(const UMemory &);
+     */
+};
+
+/**
+ * UObject is the common ICU "boilerplate" class.
+ * UObject inherits UMemory (starting with ICU 2.4),
+ * and all other public ICU C++ classes
+ * are derived from UObject (starting with ICU 2.2).
+ *
+ * UObject contains common virtual functions like for ICU's "poor man's RTTI".
+ * It does not contain default implementations of virtual methods
+ * like getDynamicClassID to allow derived classes such as Format
+ * to declare these as pure virtual.
+ *
+ * The clone() function is not available in UObject because it is not
+ * implemented by all ICU classes.
+ * Many ICU services provide a clone() function for their class trees,
+ * defined on the service's C++ base class, and all subclasses within that
+ * service class tree return a pointer to the service base class
+ * (which itself is a subclass of UObject).
+ * This is because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ *
+ * @stable ICU 2.2
+ */
+class U_COMMON_API UObject : public UMemory {
+public:
+    /**
+     * Destructor.
+     *
+     * @stable ICU 2.2
+     */
+    virtual ~UObject();
+
+    /**
+     * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const = 0;
+
+protected:
+    // the following functions are protected to prevent instantiation and
+    // direct use of UObject itself
+
+    // default constructor
+    // commented out because UObject is abstract (see getDynamicClassID)
+    // inline UObject() {}
+
+    // copy constructor
+    // commented out because UObject is abstract (see getDynamicClassID)
+    // inline UObject(const UObject &other) {}
+
+#if 0
+    // TODO Sometime in the future. Implement operator==().
+    // (This comment inserted in 2.2)
+    // some or all of the following "boilerplate" functions may be made public
+    // in a future ICU4C release when all subclasses implement them
+
+    // assignment operator
+    // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
+    // commented out because the implementation is the same as a compiler's default
+    // UObject &operator=(const UObject &other) { return *this; }
+
+    // comparison operators
+    virtual inline UBool operator==(const UObject &other) const { return this==&other; }
+    inline UBool operator!=(const UObject &other) const { return !operator==(other); }
+
+    // clone() commented out from the base class:
+    // some compilers do not support co-variant return types
+    // (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
+    // see also UObject class documentation.
+    // virtual UObject *clone() const;
+#endif
+
+    /*
+     * Assignment operator not declared. The compiler will provide one
+     * which does nothing since this class does not contain any data members.
+     * API/code coverage may show the assignment operator as present and
+     * untested - ignore.
+     * Subclasses need this assignment operator if they use compiler-provided
+     * assignment operators of their own. An alternative to not declaring one
+     * here would be to declare and empty-implement a protected or public one.
+    UObject &UObject::operator=(const UObject &);
+     */
+
+// Future implementation for RTTI that support subtyping. [alan]
+// 
+//  public:
+//     /**
+//      * @internal
+//      */
+//     static UClassID getStaticClassID();
+// 
+//     /**
+//      * @internal
+//      */
+//     UBool instanceOf(UClassID type) const;
+};
+
+/**
+ * This is a simple macro to add ICU RTTI to an ICU object implementation.
+ * This does not go into the header. This should only be used in *.cpp files.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
+    UClassID U_EXPORT2 myClass::getStaticClassID() { \
+        static char classID = 0; \
+        return (UClassID)&classID; \
+    } \
+    UClassID myClass::getDynamicClassID() const \
+    { return myClass::getStaticClassID(); }
+
+
+/**
+ * This macro adds ICU RTTI to an ICU abstract class implementation.
+ * This macro should be invoked in *.cpp files.  The corresponding
+ * header should declare getStaticClassID.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
+    UClassID U_EXPORT2 myClass::getStaticClassID() { \
+        static char classID = 0; \
+        return (UClassID)&classID; \
+    }
+
+// /**
+//  * This macro adds ICU RTTI to an ICU concrete class implementation.
+//  * This macro should be invoked in *.cpp files.  The corresponding
+//  * header should declare getDynamicClassID and getStaticClassID.
+//  *
+//  * @param myClass The name of the class that needs RTTI defined.
+//  * @param myParent The name of the myClass's parent.
+//  * @internal
+//  */
+/*#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass, myParent) \
+    UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass, myParent) \
+    UClassID myClass::getDynamicClassID() const { \
+        return myClass::getStaticClassID(); \
+    }
+*/
+
+
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/uobslete.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uobslete.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uobslete.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,32 +0,0 @@
-/*
-*******************************************************************************
-*   Copyright (C) 2004-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*******************************************************************************
-*
-*   file name:  
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   Created by: genheaders.pl, a perl script written by Ram Viswanadha
-*
-*  Contains data for commenting out APIs.
-*  Gets included by umachine.h
-*
-*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
-*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
-*/
-
-#ifndef UOBSLETE_H
-#define UOBSLETE_H
-
-#ifdef U_HIDE_OBSOLETE_API
-
-#    if U_DISABLE_RENAMING
-#    else
-#    endif /* U_DISABLE_RENAMING */
-
-#endif /* U_HIDE_OBSOLETE_API */
-#endif /* UOBSLETE_H */
-

Copied: MacRuby/trunk/icu-1060/unicode/uobslete.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uobslete.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uobslete.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uobslete.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,32 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2004-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*
+*   file name:  
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   Created by: genheaders.pl, a perl script written by Ram Viswanadha
+*
+*  Contains data for commenting out APIs.
+*  Gets included by umachine.h
+*
+*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef UOBSLETE_H
+#define UOBSLETE_H
+
+#ifdef U_HIDE_OBSOLETE_API
+
+#    if U_DISABLE_RENAMING
+#    else
+#    endif /* U_DISABLE_RENAMING */
+
+#endif /* U_HIDE_OBSOLETE_API */
+#endif /* UOBSLETE_H */
+

Deleted: MacRuby/trunk/icu-1060/unicode/urbtok.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/urbtok.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/urbtok.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,126 +0,0 @@
-/*
-******************************************************************************
-* Copyright (C) 2006-2008 Apple Inc. All Rights Reserved.
-******************************************************************************
-*/
-
-#ifndef URBTOK_H
-#define URBTOK_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/utext.h"
-#include "unicode/ubrk.h"
-#include "unicode/parseerr.h"
-
-
-typedef struct RuleBasedTokenRange {
-    signed long location;
-    signed long length;
-} RuleBasedTokenRange;
-
-/**
- * Open a new UBreakIterator for tokenizing text using specified breaking rules.
- * The rule syntax is ... (TBD)
- * @param rules A set of rules specifying the text breaking conventions.
- * @param rulesLength The number of characters in rules, or -1 if null-terminated.
- * @param parseErr   Receives position and context information for any syntax errors
- *                   detected while parsing the rules.
- * @param status A UErrorCode to receive any errors.
- * @return A UBreakIterator for the specified rules.
- * @see ubrk_open
- * @internal
- */
-U_INTERNAL UBreakIterator* U_EXPORT2
-urbtok_openRules(const UChar     *rules,
-               int32_t         rulesLength,
-               UParseError     *parseErr,
-               UErrorCode      *status);
-
-/**
- * Open a new UBreakIterator for tokenizing text using specified breaking rules.
- * @param rules A set of rules specifying the text breaking conventions. The binary rules
- *              must be at least 32-bit aligned. Note: This version makes a copy of the
- *				rules, so after calling this function the caller can close or release
- *				the rules that were passed to this function. The copy created by this
- *				call will be freed when ubrk_close() is called on the UBreakIterator*.
- * @param status A UErrorCode to receive any errors.
- * @return A UBreakIterator for the specified rules.
- * @see ubrk_open
- * @internal
- */
-U_INTERNAL UBreakIterator* U_EXPORT2
-urbtok_openBinaryRules(const uint8_t *rules,
-               UErrorCode      *status);
-
-/**
- * Open a new UBreakIterator for tokenizing text using specified breaking rules.
- * @param rules A set of rules specifying the text breaking conventions. The binary rules
- *              must be at least 32-bit aligned. Note: This version does NOT make a copy
- *				of the rules, so after calling this function the caller must not close or
- *				release the rules passed to this function until after they are finished
- *				with this UBreakIterator* (and any others created using the same rules)
-  *				and have called ubrk_close() to close the UBreakIterator* (and any others
- *				using the same rules).
- * @param status A UErrorCode to receive any errors.
- * @return A UBreakIterator for the specified rules.
- * @see ubrk_open
- * @internal
- */
-U_INTERNAL UBreakIterator* U_EXPORT2
-urbtok_openBinaryRulesNoCopy(const uint8_t *rules,
-               UErrorCode      *status);
-
-/**
- * Get the (native-endian) binary break rules for this tokenizer.
- * @param bi The tokenizer to use.
- * @param buffer The output buffer for the rules. You can pass 0 to get the required size.
- * @param buffSize The size of the output buffer.
- * @param status A UErrorCode to receive any errors.
- * @return The actual size of the binary rules, whether they fit the buffer or not.
- * @internal
- */
-U_INTERNAL uint32_t U_EXPORT2
-urbtok_getBinaryRules(UBreakIterator      *bi,
-                uint8_t             *buffer,
-                uint32_t            buffSize,
-                UErrorCode          *status);
-
-/**
- * Tokenize text using a rule-based tokenizer.
- * @param bi The tokenizer to use.
- * @param maxTokens The maximum number of tokens to return.
- * @param outTokens An array of RuleBasedTokenRange to fill in with the tokens.
- * @param outTokenFlags An (optional) array of uint32_t to fill in with token flags.
- * @return The number of tokens returned, 0 if done.
- * @internal
- */
-U_INTERNAL int32_t U_EXPORT2
-urbtok_tokenize(UBreakIterator      *bi,
-               int32_t              maxTokens,
-               RuleBasedTokenRange  *outTokens,
-               unsigned long        *outTokenFlags);
-
-/**
- * Swap the endianness of a set of binary break rules.
- * @param rules A set of rules which need swapping.
- * @param buffer The output buffer for the swapped rules, which must be the same
- *               size as the input rules buffer.
- * @param inIsBigEndian UBool indicating whether the input is big-endian
- * @param outIsBigEndian UBool indicating whether the output should be big-endian
- * @param status A UErrorCode to receive any errors.
- * @internal
- */
-U_INTERNAL void U_EXPORT2
-urbtok_swapBinaryRules(const uint8_t *rules,
-               uint8_t          *buffer,
-               UBool            inIsBigEndian,
-               UBool            outIsBigEndian,
-               UErrorCode       *status);
-
-
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/urbtok.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/urbtok.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/urbtok.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/urbtok.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,126 @@
+/*
+******************************************************************************
+* Copyright (C) 2006-2008 Apple Inc. All Rights Reserved.
+******************************************************************************
+*/
+
+#ifndef URBTOK_H
+#define URBTOK_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/utext.h"
+#include "unicode/ubrk.h"
+#include "unicode/parseerr.h"
+
+
+typedef struct RuleBasedTokenRange {
+    signed long location;
+    signed long length;
+} RuleBasedTokenRange;
+
+/**
+ * Open a new UBreakIterator for tokenizing text using specified breaking rules.
+ * The rule syntax is ... (TBD)
+ * @param rules A set of rules specifying the text breaking conventions.
+ * @param rulesLength The number of characters in rules, or -1 if null-terminated.
+ * @param parseErr   Receives position and context information for any syntax errors
+ *                   detected while parsing the rules.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified rules.
+ * @see ubrk_open
+ * @internal
+ */
+U_INTERNAL UBreakIterator* U_EXPORT2
+urbtok_openRules(const UChar     *rules,
+               int32_t         rulesLength,
+               UParseError     *parseErr,
+               UErrorCode      *status);
+
+/**
+ * Open a new UBreakIterator for tokenizing text using specified breaking rules.
+ * @param rules A set of rules specifying the text breaking conventions. The binary rules
+ *              must be at least 32-bit aligned. Note: This version makes a copy of the
+ *				rules, so after calling this function the caller can close or release
+ *				the rules that were passed to this function. The copy created by this
+ *				call will be freed when ubrk_close() is called on the UBreakIterator*.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified rules.
+ * @see ubrk_open
+ * @internal
+ */
+U_INTERNAL UBreakIterator* U_EXPORT2
+urbtok_openBinaryRules(const uint8_t *rules,
+               UErrorCode      *status);
+
+/**
+ * Open a new UBreakIterator for tokenizing text using specified breaking rules.
+ * @param rules A set of rules specifying the text breaking conventions. The binary rules
+ *              must be at least 32-bit aligned. Note: This version does NOT make a copy
+ *				of the rules, so after calling this function the caller must not close or
+ *				release the rules passed to this function until after they are finished
+ *				with this UBreakIterator* (and any others created using the same rules)
+  *				and have called ubrk_close() to close the UBreakIterator* (and any others
+ *				using the same rules).
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified rules.
+ * @see ubrk_open
+ * @internal
+ */
+U_INTERNAL UBreakIterator* U_EXPORT2
+urbtok_openBinaryRulesNoCopy(const uint8_t *rules,
+               UErrorCode      *status);
+
+/**
+ * Get the (native-endian) binary break rules for this tokenizer.
+ * @param bi The tokenizer to use.
+ * @param buffer The output buffer for the rules. You can pass 0 to get the required size.
+ * @param buffSize The size of the output buffer.
+ * @param status A UErrorCode to receive any errors.
+ * @return The actual size of the binary rules, whether they fit the buffer or not.
+ * @internal
+ */
+U_INTERNAL uint32_t U_EXPORT2
+urbtok_getBinaryRules(UBreakIterator      *bi,
+                uint8_t             *buffer,
+                uint32_t            buffSize,
+                UErrorCode          *status);
+
+/**
+ * Tokenize text using a rule-based tokenizer.
+ * @param bi The tokenizer to use.
+ * @param maxTokens The maximum number of tokens to return.
+ * @param outTokens An array of RuleBasedTokenRange to fill in with the tokens.
+ * @param outTokenFlags An (optional) array of uint32_t to fill in with token flags.
+ * @return The number of tokens returned, 0 if done.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2
+urbtok_tokenize(UBreakIterator      *bi,
+               int32_t              maxTokens,
+               RuleBasedTokenRange  *outTokens,
+               unsigned long        *outTokenFlags);
+
+/**
+ * Swap the endianness of a set of binary break rules.
+ * @param rules A set of rules which need swapping.
+ * @param buffer The output buffer for the swapped rules, which must be the same
+ *               size as the input rules buffer.
+ * @param inIsBigEndian UBool indicating whether the input is big-endian
+ * @param outIsBigEndian UBool indicating whether the output should be big-endian
+ * @param status A UErrorCode to receive any errors.
+ * @internal
+ */
+U_INTERNAL void U_EXPORT2
+urbtok_swapBinaryRules(const uint8_t *rules,
+               uint8_t          *buffer,
+               UBool            inIsBigEndian,
+               UBool            outIsBigEndian,
+               UErrorCode       *status);
+
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/uregex.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uregex.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uregex.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,989 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 2004-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*   file name:  regex.h
-*   encoding:   US-ASCII
-*   indentation:4
-*
-*   created on: 2004mar09
-*   created by: Andy Heninger
-*
-*   ICU Regular Expressions, API for C
-*/
-
-/**
- * \file
- * \brief C API: Regular Expressions
- *
- * <p>This is a C wrapper around the C++ RegexPattern and RegexMatcher classes.</p>
- */
-
-#ifndef UREGEX_H
-#define UREGEX_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_REGULAR_EXPRESSIONS
-
-#include "unicode/parseerr.h"
-
-struct URegularExpression;
-/**
-  * Structure represeting a compiled regular rexpression, plus the results
-  *    of a match operation.
-  * @stable ICU 3.0
-  */
-typedef struct URegularExpression URegularExpression;
-
-
-/**
- * Constants for Regular Expression Match Modes.
- * @stable ICU 2.4
- */
-typedef enum URegexpFlag{
-
-#ifndef U_HIDE_DRAFT_API 
-    /** Forces normalization of pattern and strings. 
-    Not implemented yet, just a placeholder, hence draft. 
-    @draft ICU 2.4 */
-    UREGEX_CANON_EQ         = 128,
-#endif
-    /**  Enable case insensitive matching.  @stable ICU 2.4 */
-    UREGEX_CASE_INSENSITIVE = 2,
-
-    /**  Allow white space and comments within patterns  @stable ICU 2.4 */
-    UREGEX_COMMENTS         = 4,
-
-    /**  If set, '.' matches line terminators,  otherwise '.' matching stops at line end.
-      *  @stable ICU 2.4 */
-    UREGEX_DOTALL           = 32,
-    
-    /**  If set, treat the entire pattern as a literal string.  
-      *  Metacharacters or escape sequences in the input sequence will be given 
-      *  no special meaning.
-      *
-      *  The flags CASE_INSENSITIVE and UNICODE_CASE retain their impact
-      *  on matching when used in conjunction with this flag.
-      *  The other flags become superfluous.
-      *  TODO:  say which escapes are still handled; anything Java does
-      *         early (\u) we should still do.
-      * @draft ICU 4.0
-      */
-    UREGEX_LITERAL = 16,
-
-    /**   Control behavior of "$" and "^"
-      *    If set, recognize line terminators within string,
-      *    otherwise, match only at start and end of input string.
-      *   @stable ICU 2.4 */
-    UREGEX_MULTILINE        = 8,
-    
-    /**   Unix-only line endings.
-      *   When this mode is enabled, only \u000a is recognized as a line ending
-      *    in the behavior of ., ^, and $.
-      *   @draft ICU 4.0
-      */
-    UREGEX_UNIX_LINES = 1,
-
-    /**  Unicode word boundaries.
-      *     If set, \b uses the Unicode TR 29 definition of word boundaries.
-      *     Warning: Unicode word boundaries are quite different from
-      *     traditional regular expression word boundaries.  See
-      *     http://unicode.org/reports/tr29/#Word_Boundaries
-      *     @stable ICU 2.8
-      */
-    UREGEX_UWORD            = 256,
-
-     /**  Error on Unrecognized backslash escapes.
-       *     If set, fail with an error on patterns that contain
-       *     backslash-escaped ASCII letters without a known specail
-       *     meaning.  If this flag is not set, these
-       *     escaped letters represent themselves.
-       *     @draft ICU 4.0
-       */
-     UREGEX_ERROR_ON_UNKNOWN_ESCAPES = 512
-
-}  URegexpFlag;
-
-/**
-  *  Open (compile) an ICU regular expression.  Compiles the regular expression in
-  *  string form into an internal representation using the specified match mode flags.
-  *  The resulting regular expression handle can then be used to perform various
-  *   matching operations.
-  *
-  * @param pattern        The Regular Expression pattern to be compiled. 
-  * @param patternLength  The length of the pattern, or -1 if the pattern is
-  *                       NUL termintated.
-  * @param flags          Flags that alter the default matching behavior for
-  *                       the regular expression, UREGEX_CASE_INSENSITIVE, for
-  *                       example.  For default behavior, set this parameter to zero.
-  *                       See <code>enum URegexpFlag</code>.  All desired flags
-  *                       are bitwise-ORed together.
-  * @param pe             Receives the position (line and column nubers) of any syntax
-  *                       error within the source regular expression string.  If this
-  *                       information is not wanted, pass NULL for this parameter.
-  * @param status         Receives error detected by this function.
-  * @stable ICU 3.0
-  *
-  */
-U_STABLE URegularExpression * U_EXPORT2
-uregex_open( const  UChar          *pattern,
-                    int32_t         patternLength,
-                    uint32_t        flags,
-                    UParseError    *pe,
-                    UErrorCode     *status);
-
-/**
-  *  Open (compile) an ICU regular expression.  The resulting regular expression
-  *   handle can then be used to perform various matching operations.
-  *  <p>
-  *   This function is the same as uregex_open, except that the pattern
-  *   is supplied as an 8 bit char * string in the default code page.
-  *
-  * @param pattern        The Regular Expression pattern to be compiled, 
-  *                       NUL termintated.  
-  * @param flags          Flags that alter the default matching behavior for
-  *                       the regular expression, UREGEX_CASE_INSENSITIVE, for
-  *                       example.  For default behavior, set this parameter to zero.
-  *                       See <code>enum URegexpFlag</code>.  All desired flags
-  *                       are bitwise-ORed together.
-  * @param pe             Receives the position (line and column nubers) of any syntax
-  *                       error within the source regular expression string.  If this
-  *                       information is not wanted, pass NULL for this parameter.
-  * @param status         Receives errors detected by this function.
-  * @return               The URegularExpression object representing the compiled
-  *                       pattern.
-  *
-  * @stable ICU 3.0
-  */
-#if !UCONFIG_NO_CONVERSION
-U_STABLE URegularExpression * U_EXPORT2
-uregex_openC( const char           *pattern,
-                    uint32_t        flags,
-                    UParseError    *pe,
-                    UErrorCode     *status);
-#endif
-
-
-
-/**
-  *  Close the regular expression, recovering all resources (memory) it
-  *   was holding.
-  *
-  * @param regexp   The regular expression to be closed.
-  * @stable ICU 3.0
-  */
-U_STABLE void U_EXPORT2 
-uregex_close(URegularExpression *regexp);
-
-/**
- * Make a copy of a compiled regular expression.  Cloning a regular
- * expression is faster than opening a second instance from the source
- * form of the expression, and requires less memory.
- * <p>
- * Note that the current input string and the position of any matched text
- *  within it are not cloned; only the pattern itself and and the
- *  match mode flags are copied.
- * <p>
- * Cloning can be particularly useful to threaded applications that perform
- * multiple match operations in parallel.  Each concurrent RE
- * operation requires its own instance of a URegularExpression.
- *
- * @param regexp   The compiled regular expression to be cloned.
- * @param status   Receives indication of any errors encountered
- * @return the cloned copy of the compiled regular expression.
- * @stable ICU 3.0
- */
-U_STABLE URegularExpression * U_EXPORT2 
-uregex_clone(const URegularExpression *regexp, UErrorCode *status);
-
-/**
- *  Return a pointer to the source form of the pattern for this regular expression.
- *
- * @param regexp     The compiled regular expression.
- * @param patLength  This output parameter will be set to the length of the
- *                   pattern string.  A NULL pointer may be used here if the
- *                   pattern length is not needed, as would be the case if
- *                   the pattern is known in advance to be a NUL terminated
- *                   string.
- * @param status     Receives errors detected by this function.
- * @return a pointer to the pattern string.  The storage for the string is
- *                   owned by the regular expression object, and must not be
- *                   altered or deleted by the application.  The returned string
- *                   will remain valid until the regular expression is closed.
- * @stable ICU 3.0
- */
-U_STABLE const UChar * U_EXPORT2 
-uregex_pattern(const  URegularExpression   *regexp,
-                         int32_t           *patLength,
-                         UErrorCode        *status);
-
-
-/**
-  * Get the match mode flags that were specified when compiling this regular expression.
-  * @param status   Receives errors detected by this function.
-  * @param regexp   The compiled regular expression.
-  * @return         The match mode flags
-  * @see URegexpFlag
-  * @stable ICU 3.0
-  */
-U_STABLE int32_t U_EXPORT2 
-uregex_flags(const  URegularExpression   *regexp,
-                    UErrorCode           *status);
-
-
-/**
-  *  Set the subject text string upon which the regular expression will look for matches.
-  *  This function may be called any number of times, allowing the regular
-  *  expression pattern to be applied to different strings.
-  *  <p>
-  *  Regular expression matching operations work directly on the application's
-  *  string data.  No copy is made.  The subject string data must not be
-  *  altered after calling this function until after all regular expression
-  *  operations involving this string data are completed.  
-  *  <p>
-  *  Zero length strings are permitted.  In this case, no subsequent match
-  *  operation will dereference the text string pointer.
-  *
-  * @param regexp     The compiled regular expression.
-  * @param text       The subject text string.
-  * @param textLength The length of the subject text, or -1 if the string
-  *                   is NUL terminated.
-  * @param status     Receives errors detected by this function.
-  * @stable ICU 3.0
-  */
-U_STABLE void U_EXPORT2 
-uregex_setText(URegularExpression *regexp,
-               const UChar        *text,
-               int32_t             textLength,
-               UErrorCode         *status);
-
-/**
-  *  Get the subject text that is currently associated with this 
-  *   regular expression object.  This simply returns whatever string
-  *   pointer was previously supplied via uregex_setText().
-  *
-  * @param regexp      The compiled regular expression.
-  * @param textLength  The length of the string is returned in this output parameter. 
-  *                    A NULL pointer may be used here if the
-  *                    text length is not needed, as would be the case if
-  *                    the text is known in advance to be a NUL terminated
-  *                    string.
-  * @param status      Receives errors detected by this function.
-  * @return            Poiner to the subject text string currently associated with
-  *                    this regular expression.
-  * @stable ICU 3.0
-  */
-U_STABLE const UChar * U_EXPORT2 
-uregex_getText(URegularExpression *regexp,
-               int32_t            *textLength,
-               UErrorCode         *status);
-
-/**
-  *   Attempts to match the input string against the pattern.
-  *   To succeed, the match must extend to the end of the string,
-  *   or cover the complete match region.
-  *
-  *   If startIndex >= zero the match operation starts at the specified
-  *   index and must extend to the end of the input string.  Any region
-  *   that has been specified is reset.
-  *
-  *   If startIndex == -1 the match must cover the input region, or the entire
-  *   input string if no region has been set.  This directly corresponds to
-  *   Matcher.matches() in Java
-  *
-  *    @param  regexp      The compiled regular expression.
-  *    @param  startIndex  The input string index at which to begin matching, or -1
-  *                        to match the input Region.
-  *    @param  status      Receives errors detected by this function.
-  *    @return             TRUE if there is a match
-  *    @stable ICU 3.0
-  */
-U_STABLE UBool U_EXPORT2 
-uregex_matches(URegularExpression *regexp,
-                int32_t            startIndex,
-                UErrorCode        *status);
-
-/**
-  *   Attempts to match the input string, starting from the specified index, against the pattern.
-  *   The match may be of any length, and is not required to extend to the end
-  *   of the input string.  Contrast with uregex_matches().
-  *
-  *   <p>If startIndex is >= 0 any input region that was set for this
-  *   URegularExpression is reset before the operation begins.
-  *
-  *   <p>If the specified starting index == -1 the match begins at the start of the input 
-  *   region, or at the start of the full string if no region has been specified.
-  *   This corresponds directly with Matcher.lookingAt() in Java.
-  *
-  *   <p>If the match succeeds then more information can be obtained via the
-  *    <code>uregexp_start()</code>, <code>uregexp_end()</code>,
-  *    and <code>uregexp_group()</code> functions.</p>
-  *
-  *    @param   regexp      The compiled regular expression.
-  *    @param   startIndex  The input string index at which to begin matching, or
-  *                         -1 to match the Input Region
-  *    @param   status      A reference to a UErrorCode to receive any errors.
-  *    @return  TRUE if there is a match.
-  *    @stable ICU 3.0
-  */
-U_STABLE UBool U_EXPORT2 
-uregex_lookingAt(URegularExpression *regexp,
-                 int32_t             startIndex,
-                 UErrorCode         *status);
-
-/**
-  *   Find the first matching substring of the input string that matches the pattern.
-  *   If startIndex is >= zero the search for a match begins at the specified index,
-  *          and any match region is reset.  This corresponds directly with
-  *          Matcher.find(startIndex) in Java.
-  *
-  *   If startIndex == -1 the search begins at the start of the input region,
-  *           or at the start of the full string if no region has been specified.
-  *
-  *   If a match is found, <code>uregex_start(), uregex_end()</code>, and
-  *   <code>uregex_group()</code> will provide more information regarding the match.
-  *
-  *   @param   regexp      The compiled regular expression.
-  *   @param   startIndex  The position in the input string to begin the search, or
-  *                        -1 to search within the Input Region.
-  *   @param   status      A reference to a UErrorCode to receive any errors.
-  *   @return              TRUE if a match is found.
-  *   @stable ICU 3.0
-  */
-U_STABLE UBool U_EXPORT2 
-uregex_find(URegularExpression *regexp,
-            int32_t             startIndex, 
-            UErrorCode         *status);
-
-/**
-  *  Find the next pattern match in the input string.  Begin searching 
-  *  the input at the location following the end of he previous match, 
-  *  or at the start of the string (or region) if there is no 
-  *  previous match.  If a match is found, <code>uregex_start(), uregex_end()</code>, and
-  *  <code>uregex_group()</code> will provide more information regarding the match.
-  *
-  *  @param   regexp      The compiled regular expression.
-  *  @param   status      A reference to a UErrorCode to receive any errors.
-  *  @return              TRUE if a match is found.
-  *  @see uregex_reset
-  *  @stable ICU 3.0
-  */
-U_STABLE UBool U_EXPORT2 
-uregex_findNext(URegularExpression *regexp,
-                UErrorCode         *status);
-
-/**
-  *   Get the number of capturing groups in this regular expression's pattern.
-  *   @param   regexp      The compiled regular expression.
-  *   @param   status      A reference to a UErrorCode to receive any errors.
-  *   @return the number of capture groups
-  *   @stable ICU 3.0
-  */
-U_STABLE int32_t U_EXPORT2 
-uregex_groupCount(URegularExpression *regexp,
-                  UErrorCode         *status);
-
-/** Extract the string for the specified matching expression or subexpression.
-  * Group #0 is the complete string of matched text.
-  * Group #1 is the text matched by the first set of capturing parentheses.
-  *
-  *   @param   regexp       The compiled regular expression.
-  *   @param   groupNum     The capture group to extract.  Group 0 is the complete
-  *                         match.  The value of this parameter must be
-  *                         less than or equal to the number of capture groups in
-  *                         the pattern.
-  *   @param   dest         Buffer to receive the matching string data
-  *   @param   destCapacity Capacity of the dest buffer.
-  *   @param   status       A reference to a UErrorCode to receive any errors.
-  *   @return               Length of matching data,
-  *                         or -1 if no applicable match.
-  *   @stable ICU 3.0
-  */
-U_STABLE int32_t U_EXPORT2 
-uregex_group(URegularExpression *regexp,
-             int32_t             groupNum,
-             UChar              *dest,
-             int32_t             destCapacity,
-             UErrorCode          *status);
-
-
-/**
-  *   Returns the index in the input string of the start of the text matched by the
-  *   specified capture group during the previous match operation.  Return -1 if
-  *   the capture group was not part of the last match.
-  *   Group #0 refers to the complete range of matched text.
-  *   Group #1 refers to the text matched by the first set of capturing parentheses.
-  *
-  *    @param   regexp      The compiled regular expression.
-  *    @param   groupNum    The capture group number
-  *    @param   status      A reference to a UErrorCode to receive any errors.
-  *    @return              the starting position in the input of the text matched 
-  *                         by the specified group.
-  *    @stable ICU 3.0
-  */
-U_STABLE int32_t U_EXPORT2 
-uregex_start(URegularExpression *regexp,
-             int32_t             groupNum,
-             UErrorCode          *status);
-
-/**
-  *   Returns the index in the input string of the position following the end
-  *   of the text matched by the specified capture group.
-  *   Return -1 if the capture group was not part of the last match.
-  *   Group #0 refers to the complete range of matched text.
-  *   Group #1 refers to the text matched by the first set of capturing parentheses.
-  *
-  *    @param   regexp      The compiled regular expression.
-  *    @param   groupNum    The capture group number
-  *    @param   status      A reference to a UErrorCode to receive any errors.
-  *    @return              the index of the position following the last matched character.
-  *    @stable ICU 3.0
-  */
-U_STABLE int32_t U_EXPORT2 
-uregex_end(URegularExpression   *regexp,
-           int32_t               groupNum,
-           UErrorCode           *status);
-
-/**
-  *  Reset any saved state from the previous match.  Has the effect of
-  *  causing uregex_findNext to begin at the specified index, and causing
-  *  uregex_start(), uregex_end() and uregex_group() to return an error 
-  *  indicating that there is no match information available.  Clears any
-  *  match region that may have been set.
-  *
-  *    @param   regexp      The compiled regular expression.
-  *    @param   index       The position in the text at which a
-  *                         uregex_findNext() should begin searching.
-  *    @param   status      A reference to a UErrorCode to receive any errors.
-  *    @stable ICU 3.0
-  */
-U_STABLE void U_EXPORT2 
-uregex_reset(URegularExpression    *regexp,
-             int32_t               index,
-             UErrorCode            *status);
-             
-             
-/** Sets the limits of the matching region for this URegularExpression.
-  * The region is the part of the input string that will be considered when matching.
-  * Invoking this method resets any saved state from the previous match, 
-  * then sets the region to start at the index specified by the start parameter
-  * and end at the index specified by the end parameter.
-  *
-  * Depending on the transparency and anchoring being used (see useTransparentBounds
-  * and useAnchoringBounds), certain constructs such as anchors may behave differently
-  * at or around the boundaries of the region
-  *
-  * The function will fail if start is greater than limit, or if either index
-  *  is less than zero or greater than the length of the string being matched.
-  *
-  * @param regexp The compiled regular expression.
-  * @param regionStart  The index to begin searches at.
-  * @param regionLimit  The index to end searches at (exclusive).
-  * @param status A pointer to a UErrorCode to receive any errors.
-  * @draft ICU 4.0
-  */
-U_DRAFT void U_EXPORT2
-uregex_setRegion(URegularExpression   *regexp,
-                 int32_t               regionStart,
-                 int32_t               regionLimit,
-                 UErrorCode           *status);
-
-/**
-  * Reports the start index of the matching region. Any matches found are limited to
-  * to the region bounded by regionStart (inclusive) and regionEnd (exclusive).
-  *
-  * @param regexp The compiled regular expression.
-  * @param status A pointer to a UErrorCode to receive any errors.
-  * @return The starting index of this matcher's region.
-  * @draft ICU 4.0
-  */
-U_DRAFT int32_t U_EXPORT2
-uregex_regionStart(const  URegularExpression   *regexp,
-                          UErrorCode           *status);
-
-
-
-/**
-  * Reports the end index (exclusive) of the matching region for this URegularExpression.
-  * Any matches found are limited to to the region bounded by regionStart (inclusive)
-  * and regionEnd (exclusive).
-  *
-  * @param regexp The compiled regular expression.
-  * @param status A pointer to a UErrorCode to receive any errors.
-  * @return The ending point of this matcher's region.
-  * @draft ICU 4.0
-  */
-U_DRAFT int32_t U_EXPORT2
-uregex_regionEnd(const  URegularExpression   *regexp,
-                        UErrorCode           *status);
-
-/**
-  * Queries the transparency of region bounds for this URegularExpression.
-  * See useTransparentBounds for a description of transparent and opaque bounds.
-  * By default, matching boundaries are opaque.
-  *
-  * @param regexp The compiled regular expression.
-  * @param status A pointer to a UErrorCode to receive any errors.
-  * @return TRUE if this matcher is using opaque bounds, false if it is not.
-  * @draft ICU 4.0
-  */
-U_DRAFT UBool U_EXPORT2
-uregex_hasTransparentBounds(const  URegularExpression   *regexp,
-                                   UErrorCode           *status);
-
-
-/**
-  * Sets the transparency of region bounds for this URegularExpression.
-  * Invoking this function with an argument of TRUE will set matches to use transparent bounds.
-  * If the boolean argument is FALSE, then opaque bounds will be used.
-  *
-  * Using transparent bounds, the boundaries of the matching region are transparent
-  * to lookahead, lookbehind, and boundary matching constructs. Those constructs can
-  * see text beyond the boundaries of the region while checking for a match.
-  *
-  * With opaque bounds, no text outside of the matching region is visible to lookahead,
-  * lookbehind, and boundary matching constructs.
-  *
-  * By default, opaque bounds are used.
-  *
-  * @param   regexp The compiled regular expression.
-  * @param   b      TRUE for transparent bounds; FALSE for opaque bounds
-  * @param   status A pointer to a UErrorCode to receive any errors.
-  * @draft   ICU 4.0
-  **/
-U_DRAFT void U_EXPORT2  
-uregex_useTransparentBounds(URegularExpression   *regexp, 
-                            UBool                b,
-                            UErrorCode           *status);
-
-
-/**
-  * Return true if this URegularExpression is using anchoring bounds.
-  * By default, anchoring region bounds are used.
-  *
-  * @param  regexp The compiled regular expression.
-  * @param  status A pointer to a UErrorCode to receive any errors.
-  * @return TRUE if this matcher is using anchoring bounds.
-  * @draft  ICU 4.0
-  */
-U_DRAFT UBool U_EXPORT2
-uregex_hasAnchoringBounds(const  URegularExpression   *regexp,
-                                 UErrorCode           *status);
-
-
-/**
-  * Set whether this URegularExpression is using Anchoring Bounds for its region.
-  * With anchoring bounds, pattern anchors such as ^ and $ will match at the start
-  * and end of the region.  Without Anchoring Bounds, anchors will only match at
-  * the positions they would in the complete text.
-  *
-  * Anchoring Bounds are the default for regions.
-  *
-  * @param regexp The compiled regular expression.
-  * @param b      TRUE if to enable anchoring bounds; FALSE to disable them.
-  * @param status A pointer to a UErrorCode to receive any errors.
-  * @draft   ICU 4.0
-  */
-U_DRAFT void U_EXPORT2
-uregex_useAnchoringBounds(URegularExpression   *regexp,
-                          UBool                 b,
-                          UErrorCode           *status);
-
-/**
-  * Return TRUE if the most recent matching operation touched the
-  *  end of the text being processed.  In this case, additional input text could
-  *  change the results of that match.
-  *
-  *  @param regexp The compiled regular expression.
-  *  @param status A pointer to a UErrorCode to receive any errors.
-  *  @return  TRUE if the most recent match hit the end of input
-  *  @draft   ICU 4.0
-  */
-U_DRAFT UBool U_EXPORT2
-uregex_hitEnd(const  URegularExpression   *regexp,
-                     UErrorCode           *status);
-
-/**
-  * Return TRUE the most recent match succeeded and additional input could cause
-  * it to fail. If this function returns false and a match was found, then more input
-  * might change the match but the match won't be lost. If a match was not found,
-  * then requireEnd has no meaning.
-  *
-  * @param regexp The compiled regular expression.
-  * @param status A pointer to a UErrorCode to receive any errors.
-  * @return TRUE  if more input could cause the most recent match to no longer match.
-  * @draft  ICU 4.0
-  */
-U_DRAFT UBool U_EXPORT2   
-uregex_requireEnd(const  URegularExpression   *regexp,
-                         UErrorCode           *status);
-
-
-
-
-
-/**
-  *    Replaces every substring of the input that matches the pattern
-  *    with the given replacement string.  This is a convenience function that
-  *    provides a complete find-and-replace-all operation.
-  *
-  *    This method scans the input string looking for matches of the pattern. 
-  *    Input that is not part of any match is copied unchanged to the
-  *    destination buffer.  Matched regions are replaced in the output
-  *    buffer by the replacement string.   The replacement string may contain
-  *    references to capture groups; these take the form of $1, $2, etc.
-  *
-  *    @param   regexp             The compiled regular expression.
-  *    @param   replacementText    A string containing the replacement text.
-  *    @param   replacementLength  The length of the replacement string, or
-  *                                -1 if it is NUL terminated.
-  *    @param   destBuf            A (UChar *) buffer that will receive the result.
-  *    @param   destCapacity       The capacity of the desitnation buffer.
-  *    @param   status             A reference to a UErrorCode to receive any errors.
-  *    @return                     The length of the string resulting from the find
-  *                                and replace operation.  In the event that the
-  *                                destination capacity is inadequate, the return value
-  *                                is still the full length of the untruncated string.
-  *    @stable ICU 3.0
-  */
-U_STABLE int32_t U_EXPORT2 
-uregex_replaceAll(URegularExpression    *regexp,
-                  const UChar           *replacementText,
-                  int32_t                replacementLength,
-                  UChar                 *destBuf,
-                  int32_t                destCapacity,
-                  UErrorCode            *status);
-
-
-/**
-  *    Replaces the first substring of the input that matches the pattern
-  *    with the given replacement string.  This is a convenience function that
-  *    provides a complete find-and-replace operation.
-  *
-  *    This method scans the input string looking for a match of the pattern. 
-  *    All input that is not part of the match is copied unchanged to the
-  *    destination buffer.  The matched region is replaced in the output
-  *    buffer by the replacement string.   The replacement string may contain
-  *    references to capture groups; these take the form of $1, $2, etc.
-  *
-  *    @param   regexp             The compiled regular expression.
-  *    @param   replacementText    A string containing the replacement text.
-  *    @param   replacementLength  The length of the replacement string, or
-  *                                -1 if it is NUL terminated.
-  *    @param   destBuf            A (UChar *) buffer that will receive the result.
-  *    @param   destCapacity       The capacity of the desitnation buffer.
-  *    @param   status             a reference to a UErrorCode to receive any errors.
-  *    @return                     The length of the string resulting from the find
-  *                                and replace operation.  In the event that the
-  *                                destination capacity is inadequate, the return value
-  *                                is still the full length of the untruncated string.
-  *    @stable ICU 3.0
-  */
-U_STABLE int32_t U_EXPORT2 
-uregex_replaceFirst(URegularExpression  *regexp,
-                    const UChar         *replacementText,
-                    int32_t              replacementLength,
-                    UChar               *destBuf,
-                    int32_t              destCapacity,
-                    UErrorCode          *status);
-
-
-/**
-  *   Implements a replace operation intended to be used as part of an
-  *   incremental find-and-replace.
-  *
-  *   <p>The input string, starting from the end of the previous match and ending at
-  *   the start of the current match, is appended to the destination string.  Then the
-  *   replacement string is appended to the output string,
-  *   including handling any substitutions of captured text.</p>
-  *
-  *   <p>A note on preflight computation of buffersize and error handling:
-  *   Calls to uregex_appendReplacement() and uregex_appendTail() are
-  *   designed to be chained, one after another, with the destination
-  *   buffer pointer and buffer capacity updated after each in preparation
-  *   to for the next.  If the destination buffer is exhausted partway through such a
-  *   sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned.  Normal
-  *   ICU conventions are for a function to perform no action if it is
-  *   called with an error status, but for this one case, uregex_appendRepacement()
-  *   will operate normally so that buffer size computations will complete
-  *   correctly.
-  *
-  *   <p>For simple, prepackaged, non-incremental find-and-replace
-  *      operations, see replaceFirst() or replaceAll().</p>
-  *
-  *   @param   regexp      The regular expression object.  
-  *   @param   replacementText The string that will replace the matched portion of the
-  *                        input string as it is copied to the destination buffer.
-  *                        The replacement text may contain references ($1, for
-  *                        example) to capture groups from the match.
-  *   @param   replacementLength  The length of the replacement text string,
-  *                        or -1 if the string is NUL terminated.
-  *   @param   destBuf     The buffer into which the results of the
-  *                        find-and-replace are placed.  On return, this pointer
-  *                        will be updated to refer to the beginning of the
-  *                        unused portion of buffer, leaving it in position for
-  *                        a subsequent call to this function.
-  *   @param   destCapacity The size of the output buffer,  On return, this
-  *                        parameter will be updated to reflect the space remaining
-  *                        unused in the output buffer.
-  *   @param   status      A reference to a UErrorCode to receive any errors. 
-  *   @return              The length of the result string.  In the event that
-  *                        destCapacity is inadequate, the full length of the
-  *                        untruncated output string is returned.
-  *
-  *   @stable ICU 3.0
-  *
-  */
-U_STABLE int32_t U_EXPORT2 
-uregex_appendReplacement(URegularExpression    *regexp,
-                  const UChar           *replacementText,
-                  int32_t                replacementLength,
-                  UChar                **destBuf,
-                  int32_t               *destCapacity,
-                  UErrorCode            *status);
-
-
-/**
-  * As the final step in a find-and-replace operation, append the remainder
-  * of the input string, starting at the position following the last match,
-  * to the destination string. <code>uregex_appendTail()</code> is intended 
-  *  to be invoked after one or more invocations of the
-  *  <code>uregex_appendReplacement()</code> function.
-  *
-  *   @param   regexp      The regular expression object.  This is needed to 
-  *                        obtain the input string and with the position
-  *                        of the last match within it.
-  *   @param   destBuf     The buffer in which the results of the
-  *                        find-and-replace are placed.  On return, the pointer
-  *                        will be updated to refer to the beginning of the
-  *                        unused portion of buffer.
-  *   @param   destCapacity The size of the output buffer,  On return, this
-  *                        value will be updated to reflect the space remaining
-  *                        unused in the output buffer.
-  *   @param   status      A reference to a UErrorCode to receive any errors. 
-  *   @return              The length of the result string.  In the event that
-  *                        destCapacity is inadequate, the full length of the
-  *                        untruncated output string is returned.
-  *
-  *   @stable ICU 3.0
-  */
-U_STABLE int32_t U_EXPORT2 
-uregex_appendTail(URegularExpression    *regexp,
-                  UChar                **destBuf,
-                  int32_t               *destCapacity,
-                  UErrorCode            *status);
-
-
-
-
- /**
-   * Split a string into fields.  Somewhat like split() from Perl.
-   *  The pattern matches identify delimiters that separate the input
-   *  into fields.  The input data between the matches becomes the
-   *  fields themselves.
-   * <p>
-   *  Each of the fields is copied from the input string to the destination
-   *  buffer, and the NUL terminated.  The position of each field within
-   *  the destination buffer is returned in the destFields array.
-   *
-   *  Note:  another choice for the design of this function would be to not
-   *         copy the resulting fields at all, but to return indexes and
-   *         lengths within the source text.  
-   *           Advantages would be
-   *             o  Faster.  No Copying.
-   *             o  Nothing extra needed when field data may contain embedded NUL chars.
-   *             o  Less memory needed if working on large data.
-   *           Disadvantages
-   *             o  Less consistent with C++ split, which copies into an
-   *                array of UnicodeStrings.
-   *             o  No NUL termination, extracted fields would be less convenient
-   *                to use in most cases.
-   *             o  Possible problems in the future, when support Unicode Normalization
-   *                could cause the fields to not correspond exactly to
-   *                a range of the source text.
-   * 
-   *    @param   regexp      The compiled regular expression.
-   *    @param   destBuf     A (UChar *) buffer to receive the fields that
-   *                         are extracted from the input string. These
-   *                         field pointers will refer to positions within the
-   *                         destination buffer supplied by the caller.  Any
-   *                         extra positions within the destFields array will be
-   *                         set to NULL.
-   *    @param   destCapacity The capacity of the destBuf.
-   *    @param   requiredCapacity  The actual capacity required of the destBuf.
-   *                         If destCapacity is too small, requiredCapacity will return 
-   *                         the total capacity required to hold all of the output, and
-   *                         a U_BUFFER_OVERFLOW_ERROR will be returned.
-   *    @param   destFields  An array to be filled with the position of each
-   *                         of the extracted fields within destBuf.
-   *    @param   destFieldsCapacity  The number of elements in the destFields array.
-   *                If the number of fields found is less than destFieldsCapacity,
-   *                the extra destFields elements are set to zero.
-   *                If destFieldsCapacity is too small, the trailing part of the
-   *                input, including any field delimiters, is treated as if it
-   *                were the last field - it is copied to the destBuf, and
-   *                its position is in the destBuf is stored in the last element
-   *                of destFields.  This behavior mimics that of Perl.  It is not
-   *                an error condition, and no error status is returned when all destField
-   *                positions are used.
-   * @param status  A reference to a UErrorCode to receive any errors.
-   * @return        The number of fields into which the input string was split.
-   * @stable ICU 3.0
-   */
-U_STABLE int32_t U_EXPORT2 
-uregex_split(   URegularExpression      *regexp,
-                  UChar                 *destBuf,
-                  int32_t                destCapacity,
-                  int32_t               *requiredCapacity,
-                  UChar                 *destFields[],
-                  int32_t                destFieldsCapacity,
-                  UErrorCode            *status);
-
-
-
-
-/**
- * Set a processing time limit for match operations with this URegularExpression.
- *
- * Some patterns, when matching certain strings, can run in exponential time.
- * For practical purposes, the match operation may appear to be in an
- * infinite loop.
- * When a limit is set a match operation will fail with an error if the
- * limit is exceeded.
- * <p>
- * The units of the limit are steps of the match engine.
- * Correspondence with actual processor time will depend on the speed
- * of the processor and the details of the specific pattern, but will
- * typically be on the order of milliseconds.
- * <p>
- * By default, the matching time is not limited.
- * <p>
- *
- * @param   regexp      The compiled regular expression.
- * @param   limit       The limit value, or 0 for no limit.
- * @param   status      A reference to a UErrorCode to receive any errors.
- * @draft ICU 4.0
- */
-U_DRAFT void U_EXPORT2
-uregex_setTimeLimit(URegularExpression      *regexp,
-                    int32_t                  limit,
-                    UErrorCode              *status);
-
-/**
- * Get the time limit for for matches with this URegularExpression.
- * A return value of zero indicates that there is no limit.
- *
- * @param   regexp      The compiled regular expression.
- * @param   status      A reference to a UErrorCode to receive any errors.
- * @return the maximum allowed time for a match, in units of processing steps.
- * @draft ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-uregex_getTimeLimit(const URegularExpression      *regexp,
-                          UErrorCode              *status);
-
-/**
- * Set the amount of heap storage avaliable for use by the match backtracking stack.
- * <p>
- * ICU uses a backtracking regular expression engine, with the backtrack stack
- * maintained on the heap.  This function sets the limit to the amount of memory
- * that can be used  for this purpose.  A backtracking stack overflow will
- * result in an error from the match operation that caused it.
- * <p>
- * A limit is desirable because a malicious or poorly designed pattern can use
- * excessive memory, potentially crashing the process.  A limit is enabled
- * by default.
- * <p>
- * @param   regexp      The compiled regular expression.
- * @param   limit       The maximum size, in bytes, of the matching backtrack stack.
- *                      A value of -1 means no limit.
- *                      The limit must be greater than zero, or -1.
- * @param   status      A reference to a UErrorCode to receive any errors.
- *
- * @draft ICU 4.0
- */
-U_DRAFT void U_EXPORT2
-uregex_setStackLimit(URegularExpression      *regexp,
-                     int32_t                  limit,
-                     UErrorCode              *status);
-
-/**
- * Get the size of the heap storage available for use by the back tracking stack.
- *
- * @return  the maximum backtracking stack size, in bytes, or zero if the
- *          stack size is unlimited.
- * @draft ICU 4.0
- */
-U_DRAFT int32_t U_EXPORT2
-uregex_getStackLimit(const URegularExpression      *regexp,
-                           UErrorCode              *status);
-
-
-/**
- * Function pointer for a regular expression matching callback function.
- * When set, a callback function will be called periodically during matching
- * operations.  If the call back function returns FALSE, the matching
- * operation will be terminated early.
- *
- * Note:  the callback function must not call other functions on this
- *        URegularExpression.
- *
- * @param context  context pointer.  The callback function will be invoked
- *                 with the context specified at the time that
- *                 uregex_setMatchCallback() is called.
- * @param steps    the accumulated processing time, in match steps, 
- *                 for this matching operation.
- * @return         TRUE to continue the matching operation.
- *                 FALSE to terminate the matching operation.
- * @draft ICU 4.0
- */
-U_CDECL_BEGIN
-typedef UBool U_CALLCONV URegexMatchCallback (
-                   const void *context,
-                   int32_t     steps);
-U_CDECL_END
-
-/**
- * Set a callback function for this URegularExpression.
- * During matching operations the function will be called periodically,
- * giving the application the opportunity to terminate a long-running
- * match.
- *
- * @param   regexp      The compiled regular expression.
- * @param   callback    A pointer to the user-supplied callback function.
- * @param   context     User context pointer.  The value supplied at the
- *                      time the callback function is set will be saved
- *                      and passed to the callback each time that it is called.
- * @param   status      A reference to a UErrorCode to receive any errors.
- * @draft ICU 4.0
- */
-U_DRAFT void U_EXPORT2
-uregex_setMatchCallback(URegularExpression      *regexp,
-                        URegexMatchCallback     *callback,
-                        const void              *context,
-                        UErrorCode              *status);
-
-
-/**
- *  Get the callback function for this URegularExpression.
- *
- * @param   regexp      The compiled regular expression.
- * @param   callback    Out paramater, receives a pointer to the user-supplied 
- *                      callback function.
- * @param   context     Out parameter, receives the user context pointer that
- *                      was set when uregex_setMatchCallback() was called.
- * @param   status      A reference to a UErrorCode to receive any errors.
- * @draft ICU 4.0
- */
-U_DRAFT void U_EXPORT2
-uregex_getMatchCallback(const URegularExpression    *regexp,
-                        URegexMatchCallback        **callback,
-                        const void                 **context,
-                        UErrorCode                  *status);
-
-
-
-#endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS  */
-#endif   /*  UREGEX_H  */

Copied: MacRuby/trunk/icu-1060/unicode/uregex.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uregex.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uregex.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uregex.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,989 @@
+/*
+**********************************************************************
+*   Copyright (C) 2004-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  regex.h
+*   encoding:   US-ASCII
+*   indentation:4
+*
+*   created on: 2004mar09
+*   created by: Andy Heninger
+*
+*   ICU Regular Expressions, API for C
+*/
+
+/**
+ * \file
+ * \brief C API: Regular Expressions
+ *
+ * <p>This is a C wrapper around the C++ RegexPattern and RegexMatcher classes.</p>
+ */
+
+#ifndef UREGEX_H
+#define UREGEX_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
+#include "unicode/parseerr.h"
+
+struct URegularExpression;
+/**
+  * Structure represeting a compiled regular rexpression, plus the results
+  *    of a match operation.
+  * @stable ICU 3.0
+  */
+typedef struct URegularExpression URegularExpression;
+
+
+/**
+ * Constants for Regular Expression Match Modes.
+ * @stable ICU 2.4
+ */
+typedef enum URegexpFlag{
+
+#ifndef U_HIDE_DRAFT_API 
+    /** Forces normalization of pattern and strings. 
+    Not implemented yet, just a placeholder, hence draft. 
+    @draft ICU 2.4 */
+    UREGEX_CANON_EQ         = 128,
+#endif
+    /**  Enable case insensitive matching.  @stable ICU 2.4 */
+    UREGEX_CASE_INSENSITIVE = 2,
+
+    /**  Allow white space and comments within patterns  @stable ICU 2.4 */
+    UREGEX_COMMENTS         = 4,
+
+    /**  If set, '.' matches line terminators,  otherwise '.' matching stops at line end.
+      *  @stable ICU 2.4 */
+    UREGEX_DOTALL           = 32,
+    
+    /**  If set, treat the entire pattern as a literal string.  
+      *  Metacharacters or escape sequences in the input sequence will be given 
+      *  no special meaning.
+      *
+      *  The flags CASE_INSENSITIVE and UNICODE_CASE retain their impact
+      *  on matching when used in conjunction with this flag.
+      *  The other flags become superfluous.
+      *  TODO:  say which escapes are still handled; anything Java does
+      *         early (\u) we should still do.
+      * @draft ICU 4.0
+      */
+    UREGEX_LITERAL = 16,
+
+    /**   Control behavior of "$" and "^"
+      *    If set, recognize line terminators within string,
+      *    otherwise, match only at start and end of input string.
+      *   @stable ICU 2.4 */
+    UREGEX_MULTILINE        = 8,
+    
+    /**   Unix-only line endings.
+      *   When this mode is enabled, only \u000a is recognized as a line ending
+      *    in the behavior of ., ^, and $.
+      *   @draft ICU 4.0
+      */
+    UREGEX_UNIX_LINES = 1,
+
+    /**  Unicode word boundaries.
+      *     If set, \b uses the Unicode TR 29 definition of word boundaries.
+      *     Warning: Unicode word boundaries are quite different from
+      *     traditional regular expression word boundaries.  See
+      *     http://unicode.org/reports/tr29/#Word_Boundaries
+      *     @stable ICU 2.8
+      */
+    UREGEX_UWORD            = 256,
+
+     /**  Error on Unrecognized backslash escapes.
+       *     If set, fail with an error on patterns that contain
+       *     backslash-escaped ASCII letters without a known specail
+       *     meaning.  If this flag is not set, these
+       *     escaped letters represent themselves.
+       *     @draft ICU 4.0
+       */
+     UREGEX_ERROR_ON_UNKNOWN_ESCAPES = 512
+
+}  URegexpFlag;
+
+/**
+  *  Open (compile) an ICU regular expression.  Compiles the regular expression in
+  *  string form into an internal representation using the specified match mode flags.
+  *  The resulting regular expression handle can then be used to perform various
+  *   matching operations.
+  *
+  * @param pattern        The Regular Expression pattern to be compiled. 
+  * @param patternLength  The length of the pattern, or -1 if the pattern is
+  *                       NUL termintated.
+  * @param flags          Flags that alter the default matching behavior for
+  *                       the regular expression, UREGEX_CASE_INSENSITIVE, for
+  *                       example.  For default behavior, set this parameter to zero.
+  *                       See <code>enum URegexpFlag</code>.  All desired flags
+  *                       are bitwise-ORed together.
+  * @param pe             Receives the position (line and column nubers) of any syntax
+  *                       error within the source regular expression string.  If this
+  *                       information is not wanted, pass NULL for this parameter.
+  * @param status         Receives error detected by this function.
+  * @stable ICU 3.0
+  *
+  */
+U_STABLE URegularExpression * U_EXPORT2
+uregex_open( const  UChar          *pattern,
+                    int32_t         patternLength,
+                    uint32_t        flags,
+                    UParseError    *pe,
+                    UErrorCode     *status);
+
+/**
+  *  Open (compile) an ICU regular expression.  The resulting regular expression
+  *   handle can then be used to perform various matching operations.
+  *  <p>
+  *   This function is the same as uregex_open, except that the pattern
+  *   is supplied as an 8 bit char * string in the default code page.
+  *
+  * @param pattern        The Regular Expression pattern to be compiled, 
+  *                       NUL termintated.  
+  * @param flags          Flags that alter the default matching behavior for
+  *                       the regular expression, UREGEX_CASE_INSENSITIVE, for
+  *                       example.  For default behavior, set this parameter to zero.
+  *                       See <code>enum URegexpFlag</code>.  All desired flags
+  *                       are bitwise-ORed together.
+  * @param pe             Receives the position (line and column nubers) of any syntax
+  *                       error within the source regular expression string.  If this
+  *                       information is not wanted, pass NULL for this parameter.
+  * @param status         Receives errors detected by this function.
+  * @return               The URegularExpression object representing the compiled
+  *                       pattern.
+  *
+  * @stable ICU 3.0
+  */
+#if !UCONFIG_NO_CONVERSION
+U_STABLE URegularExpression * U_EXPORT2
+uregex_openC( const char           *pattern,
+                    uint32_t        flags,
+                    UParseError    *pe,
+                    UErrorCode     *status);
+#endif
+
+
+
+/**
+  *  Close the regular expression, recovering all resources (memory) it
+  *   was holding.
+  *
+  * @param regexp   The regular expression to be closed.
+  * @stable ICU 3.0
+  */
+U_STABLE void U_EXPORT2 
+uregex_close(URegularExpression *regexp);
+
+/**
+ * Make a copy of a compiled regular expression.  Cloning a regular
+ * expression is faster than opening a second instance from the source
+ * form of the expression, and requires less memory.
+ * <p>
+ * Note that the current input string and the position of any matched text
+ *  within it are not cloned; only the pattern itself and and the
+ *  match mode flags are copied.
+ * <p>
+ * Cloning can be particularly useful to threaded applications that perform
+ * multiple match operations in parallel.  Each concurrent RE
+ * operation requires its own instance of a URegularExpression.
+ *
+ * @param regexp   The compiled regular expression to be cloned.
+ * @param status   Receives indication of any errors encountered
+ * @return the cloned copy of the compiled regular expression.
+ * @stable ICU 3.0
+ */
+U_STABLE URegularExpression * U_EXPORT2 
+uregex_clone(const URegularExpression *regexp, UErrorCode *status);
+
+/**
+ *  Return a pointer to the source form of the pattern for this regular expression.
+ *
+ * @param regexp     The compiled regular expression.
+ * @param patLength  This output parameter will be set to the length of the
+ *                   pattern string.  A NULL pointer may be used here if the
+ *                   pattern length is not needed, as would be the case if
+ *                   the pattern is known in advance to be a NUL terminated
+ *                   string.
+ * @param status     Receives errors detected by this function.
+ * @return a pointer to the pattern string.  The storage for the string is
+ *                   owned by the regular expression object, and must not be
+ *                   altered or deleted by the application.  The returned string
+ *                   will remain valid until the regular expression is closed.
+ * @stable ICU 3.0
+ */
+U_STABLE const UChar * U_EXPORT2 
+uregex_pattern(const  URegularExpression   *regexp,
+                         int32_t           *patLength,
+                         UErrorCode        *status);
+
+
+/**
+  * Get the match mode flags that were specified when compiling this regular expression.
+  * @param status   Receives errors detected by this function.
+  * @param regexp   The compiled regular expression.
+  * @return         The match mode flags
+  * @see URegexpFlag
+  * @stable ICU 3.0
+  */
+U_STABLE int32_t U_EXPORT2 
+uregex_flags(const  URegularExpression   *regexp,
+                    UErrorCode           *status);
+
+
+/**
+  *  Set the subject text string upon which the regular expression will look for matches.
+  *  This function may be called any number of times, allowing the regular
+  *  expression pattern to be applied to different strings.
+  *  <p>
+  *  Regular expression matching operations work directly on the application's
+  *  string data.  No copy is made.  The subject string data must not be
+  *  altered after calling this function until after all regular expression
+  *  operations involving this string data are completed.  
+  *  <p>
+  *  Zero length strings are permitted.  In this case, no subsequent match
+  *  operation will dereference the text string pointer.
+  *
+  * @param regexp     The compiled regular expression.
+  * @param text       The subject text string.
+  * @param textLength The length of the subject text, or -1 if the string
+  *                   is NUL terminated.
+  * @param status     Receives errors detected by this function.
+  * @stable ICU 3.0
+  */
+U_STABLE void U_EXPORT2 
+uregex_setText(URegularExpression *regexp,
+               const UChar        *text,
+               int32_t             textLength,
+               UErrorCode         *status);
+
+/**
+  *  Get the subject text that is currently associated with this 
+  *   regular expression object.  This simply returns whatever string
+  *   pointer was previously supplied via uregex_setText().
+  *
+  * @param regexp      The compiled regular expression.
+  * @param textLength  The length of the string is returned in this output parameter. 
+  *                    A NULL pointer may be used here if the
+  *                    text length is not needed, as would be the case if
+  *                    the text is known in advance to be a NUL terminated
+  *                    string.
+  * @param status      Receives errors detected by this function.
+  * @return            Poiner to the subject text string currently associated with
+  *                    this regular expression.
+  * @stable ICU 3.0
+  */
+U_STABLE const UChar * U_EXPORT2 
+uregex_getText(URegularExpression *regexp,
+               int32_t            *textLength,
+               UErrorCode         *status);
+
+/**
+  *   Attempts to match the input string against the pattern.
+  *   To succeed, the match must extend to the end of the string,
+  *   or cover the complete match region.
+  *
+  *   If startIndex >= zero the match operation starts at the specified
+  *   index and must extend to the end of the input string.  Any region
+  *   that has been specified is reset.
+  *
+  *   If startIndex == -1 the match must cover the input region, or the entire
+  *   input string if no region has been set.  This directly corresponds to
+  *   Matcher.matches() in Java
+  *
+  *    @param  regexp      The compiled regular expression.
+  *    @param  startIndex  The input string index at which to begin matching, or -1
+  *                        to match the input Region.
+  *    @param  status      Receives errors detected by this function.
+  *    @return             TRUE if there is a match
+  *    @stable ICU 3.0
+  */
+U_STABLE UBool U_EXPORT2 
+uregex_matches(URegularExpression *regexp,
+                int32_t            startIndex,
+                UErrorCode        *status);
+
+/**
+  *   Attempts to match the input string, starting from the specified index, against the pattern.
+  *   The match may be of any length, and is not required to extend to the end
+  *   of the input string.  Contrast with uregex_matches().
+  *
+  *   <p>If startIndex is >= 0 any input region that was set for this
+  *   URegularExpression is reset before the operation begins.
+  *
+  *   <p>If the specified starting index == -1 the match begins at the start of the input 
+  *   region, or at the start of the full string if no region has been specified.
+  *   This corresponds directly with Matcher.lookingAt() in Java.
+  *
+  *   <p>If the match succeeds then more information can be obtained via the
+  *    <code>uregexp_start()</code>, <code>uregexp_end()</code>,
+  *    and <code>uregexp_group()</code> functions.</p>
+  *
+  *    @param   regexp      The compiled regular expression.
+  *    @param   startIndex  The input string index at which to begin matching, or
+  *                         -1 to match the Input Region
+  *    @param   status      A reference to a UErrorCode to receive any errors.
+  *    @return  TRUE if there is a match.
+  *    @stable ICU 3.0
+  */
+U_STABLE UBool U_EXPORT2 
+uregex_lookingAt(URegularExpression *regexp,
+                 int32_t             startIndex,
+                 UErrorCode         *status);
+
+/**
+  *   Find the first matching substring of the input string that matches the pattern.
+  *   If startIndex is >= zero the search for a match begins at the specified index,
+  *          and any match region is reset.  This corresponds directly with
+  *          Matcher.find(startIndex) in Java.
+  *
+  *   If startIndex == -1 the search begins at the start of the input region,
+  *           or at the start of the full string if no region has been specified.
+  *
+  *   If a match is found, <code>uregex_start(), uregex_end()</code>, and
+  *   <code>uregex_group()</code> will provide more information regarding the match.
+  *
+  *   @param   regexp      The compiled regular expression.
+  *   @param   startIndex  The position in the input string to begin the search, or
+  *                        -1 to search within the Input Region.
+  *   @param   status      A reference to a UErrorCode to receive any errors.
+  *   @return              TRUE if a match is found.
+  *   @stable ICU 3.0
+  */
+U_STABLE UBool U_EXPORT2 
+uregex_find(URegularExpression *regexp,
+            int32_t             startIndex, 
+            UErrorCode         *status);
+
+/**
+  *  Find the next pattern match in the input string.  Begin searching 
+  *  the input at the location following the end of he previous match, 
+  *  or at the start of the string (or region) if there is no 
+  *  previous match.  If a match is found, <code>uregex_start(), uregex_end()</code>, and
+  *  <code>uregex_group()</code> will provide more information regarding the match.
+  *
+  *  @param   regexp      The compiled regular expression.
+  *  @param   status      A reference to a UErrorCode to receive any errors.
+  *  @return              TRUE if a match is found.
+  *  @see uregex_reset
+  *  @stable ICU 3.0
+  */
+U_STABLE UBool U_EXPORT2 
+uregex_findNext(URegularExpression *regexp,
+                UErrorCode         *status);
+
+/**
+  *   Get the number of capturing groups in this regular expression's pattern.
+  *   @param   regexp      The compiled regular expression.
+  *   @param   status      A reference to a UErrorCode to receive any errors.
+  *   @return the number of capture groups
+  *   @stable ICU 3.0
+  */
+U_STABLE int32_t U_EXPORT2 
+uregex_groupCount(URegularExpression *regexp,
+                  UErrorCode         *status);
+
+/** Extract the string for the specified matching expression or subexpression.
+  * Group #0 is the complete string of matched text.
+  * Group #1 is the text matched by the first set of capturing parentheses.
+  *
+  *   @param   regexp       The compiled regular expression.
+  *   @param   groupNum     The capture group to extract.  Group 0 is the complete
+  *                         match.  The value of this parameter must be
+  *                         less than or equal to the number of capture groups in
+  *                         the pattern.
+  *   @param   dest         Buffer to receive the matching string data
+  *   @param   destCapacity Capacity of the dest buffer.
+  *   @param   status       A reference to a UErrorCode to receive any errors.
+  *   @return               Length of matching data,
+  *                         or -1 if no applicable match.
+  *   @stable ICU 3.0
+  */
+U_STABLE int32_t U_EXPORT2 
+uregex_group(URegularExpression *regexp,
+             int32_t             groupNum,
+             UChar              *dest,
+             int32_t             destCapacity,
+             UErrorCode          *status);
+
+
+/**
+  *   Returns the index in the input string of the start of the text matched by the
+  *   specified capture group during the previous match operation.  Return -1 if
+  *   the capture group was not part of the last match.
+  *   Group #0 refers to the complete range of matched text.
+  *   Group #1 refers to the text matched by the first set of capturing parentheses.
+  *
+  *    @param   regexp      The compiled regular expression.
+  *    @param   groupNum    The capture group number
+  *    @param   status      A reference to a UErrorCode to receive any errors.
+  *    @return              the starting position in the input of the text matched 
+  *                         by the specified group.
+  *    @stable ICU 3.0
+  */
+U_STABLE int32_t U_EXPORT2 
+uregex_start(URegularExpression *regexp,
+             int32_t             groupNum,
+             UErrorCode          *status);
+
+/**
+  *   Returns the index in the input string of the position following the end
+  *   of the text matched by the specified capture group.
+  *   Return -1 if the capture group was not part of the last match.
+  *   Group #0 refers to the complete range of matched text.
+  *   Group #1 refers to the text matched by the first set of capturing parentheses.
+  *
+  *    @param   regexp      The compiled regular expression.
+  *    @param   groupNum    The capture group number
+  *    @param   status      A reference to a UErrorCode to receive any errors.
+  *    @return              the index of the position following the last matched character.
+  *    @stable ICU 3.0
+  */
+U_STABLE int32_t U_EXPORT2 
+uregex_end(URegularExpression   *regexp,
+           int32_t               groupNum,
+           UErrorCode           *status);
+
+/**
+  *  Reset any saved state from the previous match.  Has the effect of
+  *  causing uregex_findNext to begin at the specified index, and causing
+  *  uregex_start(), uregex_end() and uregex_group() to return an error 
+  *  indicating that there is no match information available.  Clears any
+  *  match region that may have been set.
+  *
+  *    @param   regexp      The compiled regular expression.
+  *    @param   index       The position in the text at which a
+  *                         uregex_findNext() should begin searching.
+  *    @param   status      A reference to a UErrorCode to receive any errors.
+  *    @stable ICU 3.0
+  */
+U_STABLE void U_EXPORT2 
+uregex_reset(URegularExpression    *regexp,
+             int32_t               index,
+             UErrorCode            *status);
+             
+             
+/** Sets the limits of the matching region for this URegularExpression.
+  * The region is the part of the input string that will be considered when matching.
+  * Invoking this method resets any saved state from the previous match, 
+  * then sets the region to start at the index specified by the start parameter
+  * and end at the index specified by the end parameter.
+  *
+  * Depending on the transparency and anchoring being used (see useTransparentBounds
+  * and useAnchoringBounds), certain constructs such as anchors may behave differently
+  * at or around the boundaries of the region
+  *
+  * The function will fail if start is greater than limit, or if either index
+  *  is less than zero or greater than the length of the string being matched.
+  *
+  * @param regexp The compiled regular expression.
+  * @param regionStart  The index to begin searches at.
+  * @param regionLimit  The index to end searches at (exclusive).
+  * @param status A pointer to a UErrorCode to receive any errors.
+  * @draft ICU 4.0
+  */
+U_DRAFT void U_EXPORT2
+uregex_setRegion(URegularExpression   *regexp,
+                 int32_t               regionStart,
+                 int32_t               regionLimit,
+                 UErrorCode           *status);
+
+/**
+  * Reports the start index of the matching region. Any matches found are limited to
+  * to the region bounded by regionStart (inclusive) and regionEnd (exclusive).
+  *
+  * @param regexp The compiled regular expression.
+  * @param status A pointer to a UErrorCode to receive any errors.
+  * @return The starting index of this matcher's region.
+  * @draft ICU 4.0
+  */
+U_DRAFT int32_t U_EXPORT2
+uregex_regionStart(const  URegularExpression   *regexp,
+                          UErrorCode           *status);
+
+
+
+/**
+  * Reports the end index (exclusive) of the matching region for this URegularExpression.
+  * Any matches found are limited to to the region bounded by regionStart (inclusive)
+  * and regionEnd (exclusive).
+  *
+  * @param regexp The compiled regular expression.
+  * @param status A pointer to a UErrorCode to receive any errors.
+  * @return The ending point of this matcher's region.
+  * @draft ICU 4.0
+  */
+U_DRAFT int32_t U_EXPORT2
+uregex_regionEnd(const  URegularExpression   *regexp,
+                        UErrorCode           *status);
+
+/**
+  * Queries the transparency of region bounds for this URegularExpression.
+  * See useTransparentBounds for a description of transparent and opaque bounds.
+  * By default, matching boundaries are opaque.
+  *
+  * @param regexp The compiled regular expression.
+  * @param status A pointer to a UErrorCode to receive any errors.
+  * @return TRUE if this matcher is using opaque bounds, false if it is not.
+  * @draft ICU 4.0
+  */
+U_DRAFT UBool U_EXPORT2
+uregex_hasTransparentBounds(const  URegularExpression   *regexp,
+                                   UErrorCode           *status);
+
+
+/**
+  * Sets the transparency of region bounds for this URegularExpression.
+  * Invoking this function with an argument of TRUE will set matches to use transparent bounds.
+  * If the boolean argument is FALSE, then opaque bounds will be used.
+  *
+  * Using transparent bounds, the boundaries of the matching region are transparent
+  * to lookahead, lookbehind, and boundary matching constructs. Those constructs can
+  * see text beyond the boundaries of the region while checking for a match.
+  *
+  * With opaque bounds, no text outside of the matching region is visible to lookahead,
+  * lookbehind, and boundary matching constructs.
+  *
+  * By default, opaque bounds are used.
+  *
+  * @param   regexp The compiled regular expression.
+  * @param   b      TRUE for transparent bounds; FALSE for opaque bounds
+  * @param   status A pointer to a UErrorCode to receive any errors.
+  * @draft   ICU 4.0
+  **/
+U_DRAFT void U_EXPORT2  
+uregex_useTransparentBounds(URegularExpression   *regexp, 
+                            UBool                b,
+                            UErrorCode           *status);
+
+
+/**
+  * Return true if this URegularExpression is using anchoring bounds.
+  * By default, anchoring region bounds are used.
+  *
+  * @param  regexp The compiled regular expression.
+  * @param  status A pointer to a UErrorCode to receive any errors.
+  * @return TRUE if this matcher is using anchoring bounds.
+  * @draft  ICU 4.0
+  */
+U_DRAFT UBool U_EXPORT2
+uregex_hasAnchoringBounds(const  URegularExpression   *regexp,
+                                 UErrorCode           *status);
+
+
+/**
+  * Set whether this URegularExpression is using Anchoring Bounds for its region.
+  * With anchoring bounds, pattern anchors such as ^ and $ will match at the start
+  * and end of the region.  Without Anchoring Bounds, anchors will only match at
+  * the positions they would in the complete text.
+  *
+  * Anchoring Bounds are the default for regions.
+  *
+  * @param regexp The compiled regular expression.
+  * @param b      TRUE if to enable anchoring bounds; FALSE to disable them.
+  * @param status A pointer to a UErrorCode to receive any errors.
+  * @draft   ICU 4.0
+  */
+U_DRAFT void U_EXPORT2
+uregex_useAnchoringBounds(URegularExpression   *regexp,
+                          UBool                 b,
+                          UErrorCode           *status);
+
+/**
+  * Return TRUE if the most recent matching operation touched the
+  *  end of the text being processed.  In this case, additional input text could
+  *  change the results of that match.
+  *
+  *  @param regexp The compiled regular expression.
+  *  @param status A pointer to a UErrorCode to receive any errors.
+  *  @return  TRUE if the most recent match hit the end of input
+  *  @draft   ICU 4.0
+  */
+U_DRAFT UBool U_EXPORT2
+uregex_hitEnd(const  URegularExpression   *regexp,
+                     UErrorCode           *status);
+
+/**
+  * Return TRUE the most recent match succeeded and additional input could cause
+  * it to fail. If this function returns false and a match was found, then more input
+  * might change the match but the match won't be lost. If a match was not found,
+  * then requireEnd has no meaning.
+  *
+  * @param regexp The compiled regular expression.
+  * @param status A pointer to a UErrorCode to receive any errors.
+  * @return TRUE  if more input could cause the most recent match to no longer match.
+  * @draft  ICU 4.0
+  */
+U_DRAFT UBool U_EXPORT2   
+uregex_requireEnd(const  URegularExpression   *regexp,
+                         UErrorCode           *status);
+
+
+
+
+
+/**
+  *    Replaces every substring of the input that matches the pattern
+  *    with the given replacement string.  This is a convenience function that
+  *    provides a complete find-and-replace-all operation.
+  *
+  *    This method scans the input string looking for matches of the pattern. 
+  *    Input that is not part of any match is copied unchanged to the
+  *    destination buffer.  Matched regions are replaced in the output
+  *    buffer by the replacement string.   The replacement string may contain
+  *    references to capture groups; these take the form of $1, $2, etc.
+  *
+  *    @param   regexp             The compiled regular expression.
+  *    @param   replacementText    A string containing the replacement text.
+  *    @param   replacementLength  The length of the replacement string, or
+  *                                -1 if it is NUL terminated.
+  *    @param   destBuf            A (UChar *) buffer that will receive the result.
+  *    @param   destCapacity       The capacity of the desitnation buffer.
+  *    @param   status             A reference to a UErrorCode to receive any errors.
+  *    @return                     The length of the string resulting from the find
+  *                                and replace operation.  In the event that the
+  *                                destination capacity is inadequate, the return value
+  *                                is still the full length of the untruncated string.
+  *    @stable ICU 3.0
+  */
+U_STABLE int32_t U_EXPORT2 
+uregex_replaceAll(URegularExpression    *regexp,
+                  const UChar           *replacementText,
+                  int32_t                replacementLength,
+                  UChar                 *destBuf,
+                  int32_t                destCapacity,
+                  UErrorCode            *status);
+
+
+/**
+  *    Replaces the first substring of the input that matches the pattern
+  *    with the given replacement string.  This is a convenience function that
+  *    provides a complete find-and-replace operation.
+  *
+  *    This method scans the input string looking for a match of the pattern. 
+  *    All input that is not part of the match is copied unchanged to the
+  *    destination buffer.  The matched region is replaced in the output
+  *    buffer by the replacement string.   The replacement string may contain
+  *    references to capture groups; these take the form of $1, $2, etc.
+  *
+  *    @param   regexp             The compiled regular expression.
+  *    @param   replacementText    A string containing the replacement text.
+  *    @param   replacementLength  The length of the replacement string, or
+  *                                -1 if it is NUL terminated.
+  *    @param   destBuf            A (UChar *) buffer that will receive the result.
+  *    @param   destCapacity       The capacity of the desitnation buffer.
+  *    @param   status             a reference to a UErrorCode to receive any errors.
+  *    @return                     The length of the string resulting from the find
+  *                                and replace operation.  In the event that the
+  *                                destination capacity is inadequate, the return value
+  *                                is still the full length of the untruncated string.
+  *    @stable ICU 3.0
+  */
+U_STABLE int32_t U_EXPORT2 
+uregex_replaceFirst(URegularExpression  *regexp,
+                    const UChar         *replacementText,
+                    int32_t              replacementLength,
+                    UChar               *destBuf,
+                    int32_t              destCapacity,
+                    UErrorCode          *status);
+
+
+/**
+  *   Implements a replace operation intended to be used as part of an
+  *   incremental find-and-replace.
+  *
+  *   <p>The input string, starting from the end of the previous match and ending at
+  *   the start of the current match, is appended to the destination string.  Then the
+  *   replacement string is appended to the output string,
+  *   including handling any substitutions of captured text.</p>
+  *
+  *   <p>A note on preflight computation of buffersize and error handling:
+  *   Calls to uregex_appendReplacement() and uregex_appendTail() are
+  *   designed to be chained, one after another, with the destination
+  *   buffer pointer and buffer capacity updated after each in preparation
+  *   to for the next.  If the destination buffer is exhausted partway through such a
+  *   sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned.  Normal
+  *   ICU conventions are for a function to perform no action if it is
+  *   called with an error status, but for this one case, uregex_appendRepacement()
+  *   will operate normally so that buffer size computations will complete
+  *   correctly.
+  *
+  *   <p>For simple, prepackaged, non-incremental find-and-replace
+  *      operations, see replaceFirst() or replaceAll().</p>
+  *
+  *   @param   regexp      The regular expression object.  
+  *   @param   replacementText The string that will replace the matched portion of the
+  *                        input string as it is copied to the destination buffer.
+  *                        The replacement text may contain references ($1, for
+  *                        example) to capture groups from the match.
+  *   @param   replacementLength  The length of the replacement text string,
+  *                        or -1 if the string is NUL terminated.
+  *   @param   destBuf     The buffer into which the results of the
+  *                        find-and-replace are placed.  On return, this pointer
+  *                        will be updated to refer to the beginning of the
+  *                        unused portion of buffer, leaving it in position for
+  *                        a subsequent call to this function.
+  *   @param   destCapacity The size of the output buffer,  On return, this
+  *                        parameter will be updated to reflect the space remaining
+  *                        unused in the output buffer.
+  *   @param   status      A reference to a UErrorCode to receive any errors. 
+  *   @return              The length of the result string.  In the event that
+  *                        destCapacity is inadequate, the full length of the
+  *                        untruncated output string is returned.
+  *
+  *   @stable ICU 3.0
+  *
+  */
+U_STABLE int32_t U_EXPORT2 
+uregex_appendReplacement(URegularExpression    *regexp,
+                  const UChar           *replacementText,
+                  int32_t                replacementLength,
+                  UChar                **destBuf,
+                  int32_t               *destCapacity,
+                  UErrorCode            *status);
+
+
+/**
+  * As the final step in a find-and-replace operation, append the remainder
+  * of the input string, starting at the position following the last match,
+  * to the destination string. <code>uregex_appendTail()</code> is intended 
+  *  to be invoked after one or more invocations of the
+  *  <code>uregex_appendReplacement()</code> function.
+  *
+  *   @param   regexp      The regular expression object.  This is needed to 
+  *                        obtain the input string and with the position
+  *                        of the last match within it.
+  *   @param   destBuf     The buffer in which the results of the
+  *                        find-and-replace are placed.  On return, the pointer
+  *                        will be updated to refer to the beginning of the
+  *                        unused portion of buffer.
+  *   @param   destCapacity The size of the output buffer,  On return, this
+  *                        value will be updated to reflect the space remaining
+  *                        unused in the output buffer.
+  *   @param   status      A reference to a UErrorCode to receive any errors. 
+  *   @return              The length of the result string.  In the event that
+  *                        destCapacity is inadequate, the full length of the
+  *                        untruncated output string is returned.
+  *
+  *   @stable ICU 3.0
+  */
+U_STABLE int32_t U_EXPORT2 
+uregex_appendTail(URegularExpression    *regexp,
+                  UChar                **destBuf,
+                  int32_t               *destCapacity,
+                  UErrorCode            *status);
+
+
+
+
+ /**
+   * Split a string into fields.  Somewhat like split() from Perl.
+   *  The pattern matches identify delimiters that separate the input
+   *  into fields.  The input data between the matches becomes the
+   *  fields themselves.
+   * <p>
+   *  Each of the fields is copied from the input string to the destination
+   *  buffer, and the NUL terminated.  The position of each field within
+   *  the destination buffer is returned in the destFields array.
+   *
+   *  Note:  another choice for the design of this function would be to not
+   *         copy the resulting fields at all, but to return indexes and
+   *         lengths within the source text.  
+   *           Advantages would be
+   *             o  Faster.  No Copying.
+   *             o  Nothing extra needed when field data may contain embedded NUL chars.
+   *             o  Less memory needed if working on large data.
+   *           Disadvantages
+   *             o  Less consistent with C++ split, which copies into an
+   *                array of UnicodeStrings.
+   *             o  No NUL termination, extracted fields would be less convenient
+   *                to use in most cases.
+   *             o  Possible problems in the future, when support Unicode Normalization
+   *                could cause the fields to not correspond exactly to
+   *                a range of the source text.
+   * 
+   *    @param   regexp      The compiled regular expression.
+   *    @param   destBuf     A (UChar *) buffer to receive the fields that
+   *                         are extracted from the input string. These
+   *                         field pointers will refer to positions within the
+   *                         destination buffer supplied by the caller.  Any
+   *                         extra positions within the destFields array will be
+   *                         set to NULL.
+   *    @param   destCapacity The capacity of the destBuf.
+   *    @param   requiredCapacity  The actual capacity required of the destBuf.
+   *                         If destCapacity is too small, requiredCapacity will return 
+   *                         the total capacity required to hold all of the output, and
+   *                         a U_BUFFER_OVERFLOW_ERROR will be returned.
+   *    @param   destFields  An array to be filled with the position of each
+   *                         of the extracted fields within destBuf.
+   *    @param   destFieldsCapacity  The number of elements in the destFields array.
+   *                If the number of fields found is less than destFieldsCapacity,
+   *                the extra destFields elements are set to zero.
+   *                If destFieldsCapacity is too small, the trailing part of the
+   *                input, including any field delimiters, is treated as if it
+   *                were the last field - it is copied to the destBuf, and
+   *                its position is in the destBuf is stored in the last element
+   *                of destFields.  This behavior mimics that of Perl.  It is not
+   *                an error condition, and no error status is returned when all destField
+   *                positions are used.
+   * @param status  A reference to a UErrorCode to receive any errors.
+   * @return        The number of fields into which the input string was split.
+   * @stable ICU 3.0
+   */
+U_STABLE int32_t U_EXPORT2 
+uregex_split(   URegularExpression      *regexp,
+                  UChar                 *destBuf,
+                  int32_t                destCapacity,
+                  int32_t               *requiredCapacity,
+                  UChar                 *destFields[],
+                  int32_t                destFieldsCapacity,
+                  UErrorCode            *status);
+
+
+
+
+/**
+ * Set a processing time limit for match operations with this URegularExpression.
+ *
+ * Some patterns, when matching certain strings, can run in exponential time.
+ * For practical purposes, the match operation may appear to be in an
+ * infinite loop.
+ * When a limit is set a match operation will fail with an error if the
+ * limit is exceeded.
+ * <p>
+ * The units of the limit are steps of the match engine.
+ * Correspondence with actual processor time will depend on the speed
+ * of the processor and the details of the specific pattern, but will
+ * typically be on the order of milliseconds.
+ * <p>
+ * By default, the matching time is not limited.
+ * <p>
+ *
+ * @param   regexp      The compiled regular expression.
+ * @param   limit       The limit value, or 0 for no limit.
+ * @param   status      A reference to a UErrorCode to receive any errors.
+ * @draft ICU 4.0
+ */
+U_DRAFT void U_EXPORT2
+uregex_setTimeLimit(URegularExpression      *regexp,
+                    int32_t                  limit,
+                    UErrorCode              *status);
+
+/**
+ * Get the time limit for for matches with this URegularExpression.
+ * A return value of zero indicates that there is no limit.
+ *
+ * @param   regexp      The compiled regular expression.
+ * @param   status      A reference to a UErrorCode to receive any errors.
+ * @return the maximum allowed time for a match, in units of processing steps.
+ * @draft ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+uregex_getTimeLimit(const URegularExpression      *regexp,
+                          UErrorCode              *status);
+
+/**
+ * Set the amount of heap storage avaliable for use by the match backtracking stack.
+ * <p>
+ * ICU uses a backtracking regular expression engine, with the backtrack stack
+ * maintained on the heap.  This function sets the limit to the amount of memory
+ * that can be used  for this purpose.  A backtracking stack overflow will
+ * result in an error from the match operation that caused it.
+ * <p>
+ * A limit is desirable because a malicious or poorly designed pattern can use
+ * excessive memory, potentially crashing the process.  A limit is enabled
+ * by default.
+ * <p>
+ * @param   regexp      The compiled regular expression.
+ * @param   limit       The maximum size, in bytes, of the matching backtrack stack.
+ *                      A value of -1 means no limit.
+ *                      The limit must be greater than zero, or -1.
+ * @param   status      A reference to a UErrorCode to receive any errors.
+ *
+ * @draft ICU 4.0
+ */
+U_DRAFT void U_EXPORT2
+uregex_setStackLimit(URegularExpression      *regexp,
+                     int32_t                  limit,
+                     UErrorCode              *status);
+
+/**
+ * Get the size of the heap storage available for use by the back tracking stack.
+ *
+ * @return  the maximum backtracking stack size, in bytes, or zero if the
+ *          stack size is unlimited.
+ * @draft ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+uregex_getStackLimit(const URegularExpression      *regexp,
+                           UErrorCode              *status);
+
+
+/**
+ * Function pointer for a regular expression matching callback function.
+ * When set, a callback function will be called periodically during matching
+ * operations.  If the call back function returns FALSE, the matching
+ * operation will be terminated early.
+ *
+ * Note:  the callback function must not call other functions on this
+ *        URegularExpression.
+ *
+ * @param context  context pointer.  The callback function will be invoked
+ *                 with the context specified at the time that
+ *                 uregex_setMatchCallback() is called.
+ * @param steps    the accumulated processing time, in match steps, 
+ *                 for this matching operation.
+ * @return         TRUE to continue the matching operation.
+ *                 FALSE to terminate the matching operation.
+ * @draft ICU 4.0
+ */
+U_CDECL_BEGIN
+typedef UBool U_CALLCONV URegexMatchCallback (
+                   const void *context,
+                   int32_t     steps);
+U_CDECL_END
+
+/**
+ * Set a callback function for this URegularExpression.
+ * During matching operations the function will be called periodically,
+ * giving the application the opportunity to terminate a long-running
+ * match.
+ *
+ * @param   regexp      The compiled regular expression.
+ * @param   callback    A pointer to the user-supplied callback function.
+ * @param   context     User context pointer.  The value supplied at the
+ *                      time the callback function is set will be saved
+ *                      and passed to the callback each time that it is called.
+ * @param   status      A reference to a UErrorCode to receive any errors.
+ * @draft ICU 4.0
+ */
+U_DRAFT void U_EXPORT2
+uregex_setMatchCallback(URegularExpression      *regexp,
+                        URegexMatchCallback     *callback,
+                        const void              *context,
+                        UErrorCode              *status);
+
+
+/**
+ *  Get the callback function for this URegularExpression.
+ *
+ * @param   regexp      The compiled regular expression.
+ * @param   callback    Out paramater, receives a pointer to the user-supplied 
+ *                      callback function.
+ * @param   context     Out parameter, receives the user context pointer that
+ *                      was set when uregex_setMatchCallback() was called.
+ * @param   status      A reference to a UErrorCode to receive any errors.
+ * @draft ICU 4.0
+ */
+U_DRAFT void U_EXPORT2
+uregex_getMatchCallback(const URegularExpression    *regexp,
+                        URegexMatchCallback        **callback,
+                        const void                 **context,
+                        UErrorCode                  *status);
+
+
+
+#endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS  */
+#endif   /*  UREGEX_H  */

Deleted: MacRuby/trunk/icu-1060/unicode/urename.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/urename.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/urename.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1775 +0,0 @@
-/*
-*******************************************************************************
-*   Copyright (C) 2002-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*******************************************************************************
-*
-*   file name:  urename.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   Created by: Perl script written by Vladimir Weinstein
-*
-*  Contains data for renaming ICU exports.
-*  Gets included by umachine.h
-*
-*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
-*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
-*/
-
-#ifndef URENAME_H
-#define URENAME_H
-
-/* Uncomment the following line to disable renaming on platforms
-   that do not use Autoconf. */
-/* #define U_DISABLE_RENAMING 1 */
-
-#if !U_DISABLE_RENAMING
-
-/* C exports renaming data */
-
-#define T_CString_int64ToString T_CString_int64ToString_4_0
-#define T_CString_integerToString T_CString_integerToString_4_0
-#define T_CString_stricmp T_CString_stricmp_4_0
-#define T_CString_stringToInteger T_CString_stringToInteger_4_0
-#define T_CString_strnicmp T_CString_strnicmp_4_0
-#define T_CString_toLowerCase T_CString_toLowerCase_4_0
-#define T_CString_toUpperCase T_CString_toUpperCase_4_0
-#define UCNV_FROM_U_CALLBACK_ESCAPE UCNV_FROM_U_CALLBACK_ESCAPE_4_0
-#define UCNV_FROM_U_CALLBACK_SKIP UCNV_FROM_U_CALLBACK_SKIP_4_0
-#define UCNV_FROM_U_CALLBACK_STOP UCNV_FROM_U_CALLBACK_STOP_4_0
-#define UCNV_FROM_U_CALLBACK_SUBSTITUTE UCNV_FROM_U_CALLBACK_SUBSTITUTE_4_0
-#define UCNV_TO_U_CALLBACK_ESCAPE UCNV_TO_U_CALLBACK_ESCAPE_4_0
-#define UCNV_TO_U_CALLBACK_SKIP UCNV_TO_U_CALLBACK_SKIP_4_0
-#define UCNV_TO_U_CALLBACK_STOP UCNV_TO_U_CALLBACK_STOP_4_0
-#define UCNV_TO_U_CALLBACK_SUBSTITUTE UCNV_TO_U_CALLBACK_SUBSTITUTE_4_0
-#define UDataMemory_createNewInstance UDataMemory_createNewInstance_4_0
-#define UDataMemory_init UDataMemory_init_4_0
-#define UDataMemory_isLoaded UDataMemory_isLoaded_4_0
-#define UDataMemory_normalizeDataPointer UDataMemory_normalizeDataPointer_4_0
-#define UDataMemory_setData UDataMemory_setData_4_0
-#define UDatamemory_assign UDatamemory_assign_4_0
-#define _ASCIIData _ASCIIData_4_0
-#define _Bocu1Data _Bocu1Data_4_0
-#define _CESU8Data _CESU8Data_4_0
-#define _HZData _HZData_4_0
-#define _IMAPData _IMAPData_4_0
-#define _ISCIIData _ISCIIData_4_0
-#define _ISO2022Data _ISO2022Data_4_0
-#define _LMBCSData1 _LMBCSData1_4_0
-#define _LMBCSData11 _LMBCSData11_4_0
-#define _LMBCSData16 _LMBCSData16_4_0
-#define _LMBCSData17 _LMBCSData17_4_0
-#define _LMBCSData18 _LMBCSData18_4_0
-#define _LMBCSData19 _LMBCSData19_4_0
-#define _LMBCSData2 _LMBCSData2_4_0
-#define _LMBCSData3 _LMBCSData3_4_0
-#define _LMBCSData4 _LMBCSData4_4_0
-#define _LMBCSData5 _LMBCSData5_4_0
-#define _LMBCSData6 _LMBCSData6_4_0
-#define _LMBCSData8 _LMBCSData8_4_0
-#define _Latin1Data _Latin1Data_4_0
-#define _MBCSData _MBCSData_4_0
-#define _SCSUData _SCSUData_4_0
-#define _UTF16BEData _UTF16BEData_4_0
-#define _UTF16Data _UTF16Data_4_0
-#define _UTF16LEData _UTF16LEData_4_0
-#define _UTF32BEData _UTF32BEData_4_0
-#define _UTF32Data _UTF32Data_4_0
-#define _UTF32LEData _UTF32LEData_4_0
-#define _UTF7Data _UTF7Data_4_0
-#define _UTF8Data _UTF8Data_4_0
-#define cmemory_cleanup cmemory_cleanup_4_0
-#define cmemory_inUse cmemory_inUse_4_0
-#define le_close le_close_4_0
-#define le_create le_create_4_0
-#define le_getCharIndices le_getCharIndices_4_0
-#define le_getCharIndicesWithBase le_getCharIndicesWithBase_4_0
-#define le_getGlyphCount le_getGlyphCount_4_0
-#define le_getGlyphPosition le_getGlyphPosition_4_0
-#define le_getGlyphPositions le_getGlyphPositions_4_0
-#define le_getGlyphs le_getGlyphs_4_0
-#define le_layoutChars le_layoutChars_4_0
-#define le_reset le_reset_4_0
-#define locale_getKeywords locale_getKeywords_4_0
-#define locale_get_default locale_get_default_4_0
-#define locale_set_default locale_set_default_4_0
-#define pl_addFontRun pl_addFontRun_4_0
-#define pl_addLocaleRun pl_addLocaleRun_4_0
-#define pl_addValueRun pl_addValueRun_4_0
-#define pl_close pl_close_4_0
-#define pl_closeFontRuns pl_closeFontRuns_4_0
-#define pl_closeLine pl_closeLine_4_0
-#define pl_closeLocaleRuns pl_closeLocaleRuns_4_0
-#define pl_closeValueRuns pl_closeValueRuns_4_0
-#define pl_countLineRuns pl_countLineRuns_4_0
-#define pl_create pl_create_4_0
-#define pl_getAscent pl_getAscent_4_0
-#define pl_getDescent pl_getDescent_4_0
-#define pl_getFontRunCount pl_getFontRunCount_4_0
-#define pl_getFontRunFont pl_getFontRunFont_4_0
-#define pl_getFontRunLastLimit pl_getFontRunLastLimit_4_0
-#define pl_getFontRunLimit pl_getFontRunLimit_4_0
-#define pl_getLeading pl_getLeading_4_0
-#define pl_getLineAscent pl_getLineAscent_4_0
-#define pl_getLineDescent pl_getLineDescent_4_0
-#define pl_getLineLeading pl_getLineLeading_4_0
-#define pl_getLineVisualRun pl_getLineVisualRun_4_0
-#define pl_getLineWidth pl_getLineWidth_4_0
-#define pl_getLocaleRunCount pl_getLocaleRunCount_4_0
-#define pl_getLocaleRunLastLimit pl_getLocaleRunLastLimit_4_0
-#define pl_getLocaleRunLimit pl_getLocaleRunLimit_4_0
-#define pl_getLocaleRunLocale pl_getLocaleRunLocale_4_0
-#define pl_getParagraphLevel pl_getParagraphLevel_4_0
-#define pl_getTextDirection pl_getTextDirection_4_0
-#define pl_getValueRunCount pl_getValueRunCount_4_0
-#define pl_getValueRunLastLimit pl_getValueRunLastLimit_4_0
-#define pl_getValueRunLimit pl_getValueRunLimit_4_0
-#define pl_getValueRunValue pl_getValueRunValue_4_0
-#define pl_getVisualRunAscent pl_getVisualRunAscent_4_0
-#define pl_getVisualRunDescent pl_getVisualRunDescent_4_0
-#define pl_getVisualRunDirection pl_getVisualRunDirection_4_0
-#define pl_getVisualRunFont pl_getVisualRunFont_4_0
-#define pl_getVisualRunGlyphCount pl_getVisualRunGlyphCount_4_0
-#define pl_getVisualRunGlyphToCharMap pl_getVisualRunGlyphToCharMap_4_0
-#define pl_getVisualRunGlyphs pl_getVisualRunGlyphs_4_0
-#define pl_getVisualRunLeading pl_getVisualRunLeading_4_0
-#define pl_getVisualRunPositions pl_getVisualRunPositions_4_0
-#define pl_isComplex pl_isComplex_4_0
-#define pl_nextLine pl_nextLine_4_0
-#define pl_openEmptyFontRuns pl_openEmptyFontRuns_4_0
-#define pl_openEmptyLocaleRuns pl_openEmptyLocaleRuns_4_0
-#define pl_openEmptyValueRuns pl_openEmptyValueRuns_4_0
-#define pl_openFontRuns pl_openFontRuns_4_0
-#define pl_openLocaleRuns pl_openLocaleRuns_4_0
-#define pl_openValueRuns pl_openValueRuns_4_0
-#define pl_reflow pl_reflow_4_0
-#define pl_resetFontRuns pl_resetFontRuns_4_0
-#define pl_resetLocaleRuns pl_resetLocaleRuns_4_0
-#define pl_resetValueRuns pl_resetValueRuns_4_0
-#define res_countArrayItems res_countArrayItems_4_0
-#define res_findResource res_findResource_4_0
-#define res_getAlias res_getAlias_4_0
-#define res_getArrayItem res_getArrayItem_4_0
-#define res_getBinary res_getBinary_4_0
-#define res_getIntVector res_getIntVector_4_0
-#define res_getResource res_getResource_4_0
-#define res_getString res_getString_4_0
-#define res_getTableItemByIndex res_getTableItemByIndex_4_0
-#define res_getTableItemByKey res_getTableItemByKey_4_0
-#define res_load res_load_4_0
-#define res_unload res_unload_4_0
-#define transliterator_cleanup transliterator_cleanup_4_0
-#define triedict_swap triedict_swap_4_0
-#define u_UCharsToChars u_UCharsToChars_4_0
-#define u_austrcpy u_austrcpy_4_0
-#define u_austrncpy u_austrncpy_4_0
-#define u_catclose u_catclose_4_0
-#define u_catgets u_catgets_4_0
-#define u_catopen u_catopen_4_0
-#define u_charAge u_charAge_4_0
-#define u_charDigitValue u_charDigitValue_4_0
-#define u_charDirection u_charDirection_4_0
-#define u_charFromName u_charFromName_4_0
-#define u_charMirror u_charMirror_4_0
-#define u_charName u_charName_4_0
-#define u_charType u_charType_4_0
-#define u_charsToUChars u_charsToUChars_4_0
-#define u_cleanup u_cleanup_4_0
-#define u_countChar32 u_countChar32_4_0
-#define u_digit u_digit_4_0
-#define u_enumCharNames u_enumCharNames_4_0
-#define u_enumCharTypes u_enumCharTypes_4_0
-#define u_errorName u_errorName_4_0
-#define u_fclose u_fclose_4_0
-#define u_feof u_feof_4_0
-#define u_fflush u_fflush_4_0
-#define u_fgetConverter u_fgetConverter_4_0
-#define u_fgetc u_fgetc_4_0
-#define u_fgetcodepage u_fgetcodepage_4_0
-#define u_fgetcx u_fgetcx_4_0
-#define u_fgetfile u_fgetfile_4_0
-#define u_fgetlocale u_fgetlocale_4_0
-#define u_fgets u_fgets_4_0
-#define u_file_read u_file_read_4_0
-#define u_file_write u_file_write_4_0
-#define u_file_write_flush u_file_write_flush_4_0
-#define u_finit u_finit_4_0
-#define u_foldCase u_foldCase_4_0
-#define u_fopen u_fopen_4_0
-#define u_forDigit u_forDigit_4_0
-#define u_formatMessage u_formatMessage_4_0
-#define u_formatMessageWithError u_formatMessageWithError_4_0
-#define u_fprintf u_fprintf_4_0
-#define u_fprintf_u u_fprintf_u_4_0
-#define u_fputc u_fputc_4_0
-#define u_fputs u_fputs_4_0
-#define u_frewind u_frewind_4_0
-#define u_fscanf u_fscanf_4_0
-#define u_fscanf_u u_fscanf_u_4_0
-#define u_fsetcodepage u_fsetcodepage_4_0
-#define u_fsetlocale u_fsetlocale_4_0
-#define u_fsettransliterator u_fsettransliterator_4_0
-#define u_fstropen u_fstropen_4_0
-#define u_fungetc u_fungetc_4_0
-#define u_getCombiningClass u_getCombiningClass_4_0
-#define u_getDataDirectory u_getDataDirectory_4_0
-#define u_getDefaultConverter u_getDefaultConverter_4_0
-#define u_getFC_NFKC_Closure u_getFC_NFKC_Closure_4_0
-#define u_getISOComment u_getISOComment_4_0
-#define u_getIntPropertyMaxValue u_getIntPropertyMaxValue_4_0
-#define u_getIntPropertyMinValue u_getIntPropertyMinValue_4_0
-#define u_getIntPropertyValue u_getIntPropertyValue_4_0
-#define u_getNumericValue u_getNumericValue_4_0
-#define u_getPropertyEnum u_getPropertyEnum_4_0
-#define u_getPropertyName u_getPropertyName_4_0
-#define u_getPropertyValueEnum u_getPropertyValueEnum_4_0
-#define u_getPropertyValueName u_getPropertyValueName_4_0
-#define u_getUnicodeProperties u_getUnicodeProperties_4_0
-#define u_getUnicodeVersion u_getUnicodeVersion_4_0
-#define u_getVersion u_getVersion_4_0
-#define u_growBufferFromStatic u_growBufferFromStatic_4_0
-#define u_hasBinaryProperty u_hasBinaryProperty_4_0
-#define u_init u_init_4_0
-#define u_isIDIgnorable u_isIDIgnorable_4_0
-#define u_isIDPart u_isIDPart_4_0
-#define u_isIDStart u_isIDStart_4_0
-#define u_isISOControl u_isISOControl_4_0
-#define u_isJavaIDPart u_isJavaIDPart_4_0
-#define u_isJavaIDStart u_isJavaIDStart_4_0
-#define u_isJavaSpaceChar u_isJavaSpaceChar_4_0
-#define u_isMirrored u_isMirrored_4_0
-#define u_isUAlphabetic u_isUAlphabetic_4_0
-#define u_isULowercase u_isULowercase_4_0
-#define u_isUUppercase u_isUUppercase_4_0
-#define u_isUWhiteSpace u_isUWhiteSpace_4_0
-#define u_isWhitespace u_isWhitespace_4_0
-#define u_isalnum u_isalnum_4_0
-#define u_isalnumPOSIX u_isalnumPOSIX_4_0
-#define u_isalpha u_isalpha_4_0
-#define u_isbase u_isbase_4_0
-#define u_isblank u_isblank_4_0
-#define u_iscntrl u_iscntrl_4_0
-#define u_isdefined u_isdefined_4_0
-#define u_isdigit u_isdigit_4_0
-#define u_isgraph u_isgraph_4_0
-#define u_isgraphPOSIX u_isgraphPOSIX_4_0
-#define u_islower u_islower_4_0
-#define u_isprint u_isprint_4_0
-#define u_isprintPOSIX u_isprintPOSIX_4_0
-#define u_ispunct u_ispunct_4_0
-#define u_isspace u_isspace_4_0
-#define u_istitle u_istitle_4_0
-#define u_isupper u_isupper_4_0
-#define u_isxdigit u_isxdigit_4_0
-#define u_lengthOfIdenticalLevelRun u_lengthOfIdenticalLevelRun_4_0
-#define u_locbund_close u_locbund_close_4_0
-#define u_locbund_getNumberFormat u_locbund_getNumberFormat_4_0
-#define u_locbund_init u_locbund_init_4_0
-#define u_memcasecmp u_memcasecmp_4_0
-#define u_memchr u_memchr_4_0
-#define u_memchr32 u_memchr32_4_0
-#define u_memcmp u_memcmp_4_0
-#define u_memcmpCodePointOrder u_memcmpCodePointOrder_4_0
-#define u_memcpy u_memcpy_4_0
-#define u_memmove u_memmove_4_0
-#define u_memrchr u_memrchr_4_0
-#define u_memrchr32 u_memrchr32_4_0
-#define u_memset u_memset_4_0
-#define u_parseMessage u_parseMessage_4_0
-#define u_parseMessageWithError u_parseMessageWithError_4_0
-#define u_printf_parse u_printf_parse_4_0
-#define u_releaseDefaultConverter u_releaseDefaultConverter_4_0
-#define u_scanf_parse u_scanf_parse_4_0
-#define u_setAtomicIncDecFunctions u_setAtomicIncDecFunctions_4_0
-#define u_setDataDirectory u_setDataDirectory_4_0
-#define u_setMemoryFunctions u_setMemoryFunctions_4_0
-#define u_setMutexFunctions u_setMutexFunctions_4_0
-#define u_shapeArabic u_shapeArabic_4_0
-#define u_snprintf u_snprintf_4_0
-#define u_snprintf_u u_snprintf_u_4_0
-#define u_sprintf u_sprintf_4_0
-#define u_sprintf_u u_sprintf_u_4_0
-#define u_sscanf u_sscanf_4_0
-#define u_sscanf_u u_sscanf_u_4_0
-#define u_strCaseCompare u_strCaseCompare_4_0
-#define u_strCompare u_strCompare_4_0
-#define u_strCompareIter u_strCompareIter_4_0
-#define u_strFindFirst u_strFindFirst_4_0
-#define u_strFindLast u_strFindLast_4_0
-#define u_strFoldCase u_strFoldCase_4_0
-#define u_strFromPunycode u_strFromPunycode_4_0
-#define u_strFromUTF32 u_strFromUTF32_4_0
-#define u_strFromUTF8 u_strFromUTF8_4_0
-#define u_strFromUTF8Lenient u_strFromUTF8Lenient_4_0
-#define u_strFromUTF8WithSub u_strFromUTF8WithSub_4_0
-#define u_strFromWCS u_strFromWCS_4_0
-#define u_strHasMoreChar32Than u_strHasMoreChar32Than_4_0
-#define u_strToLower u_strToLower_4_0
-#define u_strToPunycode u_strToPunycode_4_0
-#define u_strToTitle u_strToTitle_4_0
-#define u_strToUTF32 u_strToUTF32_4_0
-#define u_strToUTF8 u_strToUTF8_4_0
-#define u_strToUTF8WithSub u_strToUTF8WithSub_4_0
-#define u_strToUpper u_strToUpper_4_0
-#define u_strToWCS u_strToWCS_4_0
-#define u_strcasecmp u_strcasecmp_4_0
-#define u_strcat u_strcat_4_0
-#define u_strchr u_strchr_4_0
-#define u_strchr32 u_strchr32_4_0
-#define u_strcmp u_strcmp_4_0
-#define u_strcmpCodePointOrder u_strcmpCodePointOrder_4_0
-#define u_strcmpFold u_strcmpFold_4_0
-#define u_strcpy u_strcpy_4_0
-#define u_strcspn u_strcspn_4_0
-#define u_strlen u_strlen_4_0
-#define u_strncasecmp u_strncasecmp_4_0
-#define u_strncat u_strncat_4_0
-#define u_strncmp u_strncmp_4_0
-#define u_strncmpCodePointOrder u_strncmpCodePointOrder_4_0
-#define u_strncpy u_strncpy_4_0
-#define u_strpbrk u_strpbrk_4_0
-#define u_strrchr u_strrchr_4_0
-#define u_strrchr32 u_strrchr32_4_0
-#define u_strrstr u_strrstr_4_0
-#define u_strspn u_strspn_4_0
-#define u_strstr u_strstr_4_0
-#define u_strtok_r u_strtok_r_4_0
-#define u_terminateChars u_terminateChars_4_0
-#define u_terminateUChar32s u_terminateUChar32s_4_0
-#define u_terminateUChars u_terminateUChars_4_0
-#define u_terminateWChars u_terminateWChars_4_0
-#define u_tolower u_tolower_4_0
-#define u_totitle u_totitle_4_0
-#define u_toupper u_toupper_4_0
-#define u_uastrcpy u_uastrcpy_4_0
-#define u_uastrncpy u_uastrncpy_4_0
-#define u_unescape u_unescape_4_0
-#define u_unescapeAt u_unescapeAt_4_0
-#define u_versionFromString u_versionFromString_4_0
-#define u_versionToString u_versionToString_4_0
-#define u_vformatMessage u_vformatMessage_4_0
-#define u_vformatMessageWithError u_vformatMessageWithError_4_0
-#define u_vfprintf u_vfprintf_4_0
-#define u_vfprintf_u u_vfprintf_u_4_0
-#define u_vfscanf u_vfscanf_4_0
-#define u_vfscanf_u u_vfscanf_u_4_0
-#define u_vparseMessage u_vparseMessage_4_0
-#define u_vparseMessageWithError u_vparseMessageWithError_4_0
-#define u_vsnprintf u_vsnprintf_4_0
-#define u_vsnprintf_u u_vsnprintf_u_4_0
-#define u_vsprintf u_vsprintf_4_0
-#define u_vsprintf_u u_vsprintf_u_4_0
-#define u_vsscanf u_vsscanf_4_0
-#define u_vsscanf_u u_vsscanf_u_4_0
-#define u_writeDiff u_writeDiff_4_0
-#define u_writeIdenticalLevelRun u_writeIdenticalLevelRun_4_0
-#define u_writeIdenticalLevelRunTwoChars u_writeIdenticalLevelRunTwoChars_4_0
-#define ubidi_addPropertyStarts ubidi_addPropertyStarts_4_0
-#define ubidi_close ubidi_close_4_0
-#define ubidi_closeProps ubidi_closeProps_4_0
-#define ubidi_countParagraphs ubidi_countParagraphs_4_0
-#define ubidi_countRuns ubidi_countRuns_4_0
-#define ubidi_getClass ubidi_getClass_4_0
-#define ubidi_getClassCallback ubidi_getClassCallback_4_0
-#define ubidi_getCustomizedClass ubidi_getCustomizedClass_4_0
-#define ubidi_getDirection ubidi_getDirection_4_0
-#define ubidi_getJoiningGroup ubidi_getJoiningGroup_4_0
-#define ubidi_getJoiningType ubidi_getJoiningType_4_0
-#define ubidi_getLength ubidi_getLength_4_0
-#define ubidi_getLevelAt ubidi_getLevelAt_4_0
-#define ubidi_getLevels ubidi_getLevels_4_0
-#define ubidi_getLogicalIndex ubidi_getLogicalIndex_4_0
-#define ubidi_getLogicalMap ubidi_getLogicalMap_4_0
-#define ubidi_getLogicalRun ubidi_getLogicalRun_4_0
-#define ubidi_getMaxValue ubidi_getMaxValue_4_0
-#define ubidi_getMemory ubidi_getMemory_4_0
-#define ubidi_getMirror ubidi_getMirror_4_0
-#define ubidi_getParaLevel ubidi_getParaLevel_4_0
-#define ubidi_getParagraph ubidi_getParagraph_4_0
-#define ubidi_getParagraphByIndex ubidi_getParagraphByIndex_4_0
-#define ubidi_getProcessedLength ubidi_getProcessedLength_4_0
-#define ubidi_getReorderingMode ubidi_getReorderingMode_4_0
-#define ubidi_getReorderingOptions ubidi_getReorderingOptions_4_0
-#define ubidi_getResultLength ubidi_getResultLength_4_0
-#define ubidi_getRuns ubidi_getRuns_4_0
-#define ubidi_getSingleton ubidi_getSingleton_4_0
-#define ubidi_getText ubidi_getText_4_0
-#define ubidi_getVisualIndex ubidi_getVisualIndex_4_0
-#define ubidi_getVisualMap ubidi_getVisualMap_4_0
-#define ubidi_getVisualRun ubidi_getVisualRun_4_0
-#define ubidi_invertMap ubidi_invertMap_4_0
-#define ubidi_isBidiControl ubidi_isBidiControl_4_0
-#define ubidi_isInverse ubidi_isInverse_4_0
-#define ubidi_isJoinControl ubidi_isJoinControl_4_0
-#define ubidi_isMirrored ubidi_isMirrored_4_0
-#define ubidi_isOrderParagraphsLTR ubidi_isOrderParagraphsLTR_4_0
-#define ubidi_open ubidi_open_4_0
-#define ubidi_openSized ubidi_openSized_4_0
-#define ubidi_orderParagraphsLTR ubidi_orderParagraphsLTR_4_0
-#define ubidi_reorderLogical ubidi_reorderLogical_4_0
-#define ubidi_reorderVisual ubidi_reorderVisual_4_0
-#define ubidi_setClassCallback ubidi_setClassCallback_4_0
-#define ubidi_setInverse ubidi_setInverse_4_0
-#define ubidi_setLine ubidi_setLine_4_0
-#define ubidi_setPara ubidi_setPara_4_0
-#define ubidi_setReorderingMode ubidi_setReorderingMode_4_0
-#define ubidi_setReorderingOptions ubidi_setReorderingOptions_4_0
-#define ubidi_writeReordered ubidi_writeReordered_4_0
-#define ubidi_writeReverse ubidi_writeReverse_4_0
-#define ublock_getCode ublock_getCode_4_0
-#define ubrk_close ubrk_close_4_0
-#define ubrk_countAvailable ubrk_countAvailable_4_0
-#define ubrk_current ubrk_current_4_0
-#define ubrk_first ubrk_first_4_0
-#define ubrk_following ubrk_following_4_0
-#define ubrk_getAvailable ubrk_getAvailable_4_0
-#define ubrk_getLocaleByType ubrk_getLocaleByType_4_0
-#define ubrk_getRuleStatus ubrk_getRuleStatus_4_0
-#define ubrk_getRuleStatusVec ubrk_getRuleStatusVec_4_0
-#define ubrk_isBoundary ubrk_isBoundary_4_0
-#define ubrk_last ubrk_last_4_0
-#define ubrk_next ubrk_next_4_0
-#define ubrk_open ubrk_open_4_0
-#define ubrk_openRules ubrk_openRules_4_0
-#define ubrk_preceding ubrk_preceding_4_0
-#define ubrk_previous ubrk_previous_4_0
-#define ubrk_safeClone ubrk_safeClone_4_0
-#define ubrk_setText ubrk_setText_4_0
-#define ubrk_setUText ubrk_setUText_4_0
-#define ubrk_swap ubrk_swap_4_0
-#define ucal_add ucal_add_4_0
-#define ucal_clear ucal_clear_4_0
-#define ucal_clearField ucal_clearField_4_0
-#define ucal_clone ucal_clone_4_0
-#define ucal_close ucal_close_4_0
-#define ucal_countAvailable ucal_countAvailable_4_0
-#define ucal_equivalentTo ucal_equivalentTo_4_0
-#define ucal_get ucal_get_4_0
-#define ucal_getAttribute ucal_getAttribute_4_0
-#define ucal_getAvailable ucal_getAvailable_4_0
-#define ucal_getCanonicalTimeZoneID ucal_getCanonicalTimeZoneID_4_0
-#define ucal_getDSTSavings ucal_getDSTSavings_4_0
-#define ucal_getDefaultTimeZone ucal_getDefaultTimeZone_4_0
-#define ucal_getGregorianChange ucal_getGregorianChange_4_0
-#define ucal_getLimit ucal_getLimit_4_0
-#define ucal_getLocaleByType ucal_getLocaleByType_4_0
-#define ucal_getMillis ucal_getMillis_4_0
-#define ucal_getNow ucal_getNow_4_0
-#define ucal_getTZDataVersion ucal_getTZDataVersion_4_0
-#define ucal_getTimeZoneDisplayName ucal_getTimeZoneDisplayName_4_0
-#define ucal_inDaylightTime ucal_inDaylightTime_4_0
-#define ucal_isSet ucal_isSet_4_0
-#define ucal_open ucal_open_4_0
-#define ucal_openCountryTimeZones ucal_openCountryTimeZones_4_0
-#define ucal_openTimeZones ucal_openTimeZones_4_0
-#define ucal_roll ucal_roll_4_0
-#define ucal_set ucal_set_4_0
-#define ucal_setAttribute ucal_setAttribute_4_0
-#define ucal_setDate ucal_setDate_4_0
-#define ucal_setDateTime ucal_setDateTime_4_0
-#define ucal_setDefaultTimeZone ucal_setDefaultTimeZone_4_0
-#define ucal_setGregorianChange ucal_setGregorianChange_4_0
-#define ucal_setMillis ucal_setMillis_4_0
-#define ucal_setTimeZone ucal_setTimeZone_4_0
-#define ucase_addCaseClosure ucase_addCaseClosure_4_0
-#define ucase_addPropertyStarts ucase_addPropertyStarts_4_0
-#define ucase_addStringCaseClosure ucase_addStringCaseClosure_4_0
-#define ucase_close ucase_close_4_0
-#define ucase_fold ucase_fold_4_0
-#define ucase_getCaseLocale ucase_getCaseLocale_4_0
-#define ucase_getSingleton ucase_getSingleton_4_0
-#define ucase_getType ucase_getType_4_0
-#define ucase_getTypeOrIgnorable ucase_getTypeOrIgnorable_4_0
-#define ucase_hasBinaryProperty ucase_hasBinaryProperty_4_0
-#define ucase_isCaseSensitive ucase_isCaseSensitive_4_0
-#define ucase_isSoftDotted ucase_isSoftDotted_4_0
-#define ucase_toFullFolding ucase_toFullFolding_4_0
-#define ucase_toFullLower ucase_toFullLower_4_0
-#define ucase_toFullTitle ucase_toFullTitle_4_0
-#define ucase_toFullUpper ucase_toFullUpper_4_0
-#define ucase_tolower ucase_tolower_4_0
-#define ucase_totitle ucase_totitle_4_0
-#define ucase_toupper ucase_toupper_4_0
-#define ucasemap_close ucasemap_close_4_0
-#define ucasemap_getBreakIterator ucasemap_getBreakIterator_4_0
-#define ucasemap_getLocale ucasemap_getLocale_4_0
-#define ucasemap_getOptions ucasemap_getOptions_4_0
-#define ucasemap_open ucasemap_open_4_0
-#define ucasemap_setBreakIterator ucasemap_setBreakIterator_4_0
-#define ucasemap_setLocale ucasemap_setLocale_4_0
-#define ucasemap_setOptions ucasemap_setOptions_4_0
-#define ucasemap_toTitle ucasemap_toTitle_4_0
-#define ucasemap_utf8FoldCase ucasemap_utf8FoldCase_4_0
-#define ucasemap_utf8ToLower ucasemap_utf8ToLower_4_0
-#define ucasemap_utf8ToTitle ucasemap_utf8ToTitle_4_0
-#define ucasemap_utf8ToUpper ucasemap_utf8ToUpper_4_0
-#define uchar_addPropertyStarts uchar_addPropertyStarts_4_0
-#define uchar_getHST uchar_getHST_4_0
-#define uchar_swapNames uchar_swapNames_4_0
-#define ucln_common_registerCleanup ucln_common_registerCleanup_4_0
-#define ucln_i18n_registerCleanup ucln_i18n_registerCleanup_4_0
-#define ucln_io_registerCleanup ucln_io_registerCleanup_4_0
-#define ucln_lib_cleanup ucln_lib_cleanup_4_0
-#define ucln_registerCleanup ucln_registerCleanup_4_0
-#define ucnv_MBCSFromUChar32 ucnv_MBCSFromUChar32_4_0
-#define ucnv_MBCSFromUnicodeWithOffsets ucnv_MBCSFromUnicodeWithOffsets_4_0
-#define ucnv_MBCSGetFilteredUnicodeSetForUnicode ucnv_MBCSGetFilteredUnicodeSetForUnicode_4_0
-#define ucnv_MBCSGetType ucnv_MBCSGetType_4_0
-#define ucnv_MBCSGetUnicodeSetForUnicode ucnv_MBCSGetUnicodeSetForUnicode_4_0
-#define ucnv_MBCSIsLeadByte ucnv_MBCSIsLeadByte_4_0
-#define ucnv_MBCSSimpleGetNextUChar ucnv_MBCSSimpleGetNextUChar_4_0
-#define ucnv_MBCSToUnicodeWithOffsets ucnv_MBCSToUnicodeWithOffsets_4_0
-#define ucnv_bld_countAvailableConverters ucnv_bld_countAvailableConverters_4_0
-#define ucnv_bld_getAvailableConverter ucnv_bld_getAvailableConverter_4_0
-#define ucnv_cbFromUWriteBytes ucnv_cbFromUWriteBytes_4_0
-#define ucnv_cbFromUWriteSub ucnv_cbFromUWriteSub_4_0
-#define ucnv_cbFromUWriteUChars ucnv_cbFromUWriteUChars_4_0
-#define ucnv_cbToUWriteSub ucnv_cbToUWriteSub_4_0
-#define ucnv_cbToUWriteUChars ucnv_cbToUWriteUChars_4_0
-#define ucnv_close ucnv_close_4_0
-#define ucnv_compareNames ucnv_compareNames_4_0
-#define ucnv_convert ucnv_convert_4_0
-#define ucnv_convertEx ucnv_convertEx_4_0
-#define ucnv_countAliases ucnv_countAliases_4_0
-#define ucnv_countAvailable ucnv_countAvailable_4_0
-#define ucnv_countStandards ucnv_countStandards_4_0
-#define ucnv_createAlgorithmicConverter ucnv_createAlgorithmicConverter_4_0
-#define ucnv_createConverter ucnv_createConverter_4_0
-#define ucnv_createConverterFromPackage ucnv_createConverterFromPackage_4_0
-#define ucnv_createConverterFromSharedData ucnv_createConverterFromSharedData_4_0
-#define ucnv_detectUnicodeSignature ucnv_detectUnicodeSignature_4_0
-#define ucnv_extContinueMatchFromU ucnv_extContinueMatchFromU_4_0
-#define ucnv_extContinueMatchToU ucnv_extContinueMatchToU_4_0
-#define ucnv_extGetUnicodeSet ucnv_extGetUnicodeSet_4_0
-#define ucnv_extInitialMatchFromU ucnv_extInitialMatchFromU_4_0
-#define ucnv_extInitialMatchToU ucnv_extInitialMatchToU_4_0
-#define ucnv_extSimpleMatchFromU ucnv_extSimpleMatchFromU_4_0
-#define ucnv_extSimpleMatchToU ucnv_extSimpleMatchToU_4_0
-#define ucnv_fixFileSeparator ucnv_fixFileSeparator_4_0
-#define ucnv_flushCache ucnv_flushCache_4_0
-#define ucnv_fromAlgorithmic ucnv_fromAlgorithmic_4_0
-#define ucnv_fromUChars ucnv_fromUChars_4_0
-#define ucnv_fromUCountPending ucnv_fromUCountPending_4_0
-#define ucnv_fromUWriteBytes ucnv_fromUWriteBytes_4_0
-#define ucnv_fromUnicode ucnv_fromUnicode_4_0
-#define ucnv_fromUnicode_UTF8 ucnv_fromUnicode_UTF8_4_0
-#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC ucnv_fromUnicode_UTF8_OFFSETS_LOGIC_4_0
-#define ucnv_getAlias ucnv_getAlias_4_0
-#define ucnv_getAliases ucnv_getAliases_4_0
-#define ucnv_getAvailableName ucnv_getAvailableName_4_0
-#define ucnv_getCCSID ucnv_getCCSID_4_0
-#define ucnv_getCanonicalName ucnv_getCanonicalName_4_0
-#define ucnv_getCompleteUnicodeSet ucnv_getCompleteUnicodeSet_4_0
-#define ucnv_getDefaultName ucnv_getDefaultName_4_0
-#define ucnv_getDisplayName ucnv_getDisplayName_4_0
-#define ucnv_getFromUCallBack ucnv_getFromUCallBack_4_0
-#define ucnv_getInvalidChars ucnv_getInvalidChars_4_0
-#define ucnv_getInvalidUChars ucnv_getInvalidUChars_4_0
-#define ucnv_getMaxCharSize ucnv_getMaxCharSize_4_0
-#define ucnv_getMinCharSize ucnv_getMinCharSize_4_0
-#define ucnv_getName ucnv_getName_4_0
-#define ucnv_getNextUChar ucnv_getNextUChar_4_0
-#define ucnv_getNonSurrogateUnicodeSet ucnv_getNonSurrogateUnicodeSet_4_0
-#define ucnv_getPlatform ucnv_getPlatform_4_0
-#define ucnv_getStandard ucnv_getStandard_4_0
-#define ucnv_getStandardName ucnv_getStandardName_4_0
-#define ucnv_getStarters ucnv_getStarters_4_0
-#define ucnv_getSubstChars ucnv_getSubstChars_4_0
-#define ucnv_getToUCallBack ucnv_getToUCallBack_4_0
-#define ucnv_getType ucnv_getType_4_0
-#define ucnv_getUnicodeSet ucnv_getUnicodeSet_4_0
-#define ucnv_incrementRefCount ucnv_incrementRefCount_4_0
-#define ucnv_io_countKnownConverters ucnv_io_countKnownConverters_4_0
-#define ucnv_io_getConverterName ucnv_io_getConverterName_4_0
-#define ucnv_io_stripASCIIForCompare ucnv_io_stripASCIIForCompare_4_0
-#define ucnv_io_stripEBCDICForCompare ucnv_io_stripEBCDICForCompare_4_0
-#define ucnv_isAmbiguous ucnv_isAmbiguous_4_0
-#define ucnv_load ucnv_load_4_0
-#define ucnv_loadSharedData ucnv_loadSharedData_4_0
-#define ucnv_open ucnv_open_4_0
-#define ucnv_openAllNames ucnv_openAllNames_4_0
-#define ucnv_openCCSID ucnv_openCCSID_4_0
-#define ucnv_openPackage ucnv_openPackage_4_0
-#define ucnv_openStandardNames ucnv_openStandardNames_4_0
-#define ucnv_openU ucnv_openU_4_0
-#define ucnv_reset ucnv_reset_4_0
-#define ucnv_resetFromUnicode ucnv_resetFromUnicode_4_0
-#define ucnv_resetToUnicode ucnv_resetToUnicode_4_0
-#define ucnv_safeClone ucnv_safeClone_4_0
-#define ucnv_setDefaultName ucnv_setDefaultName_4_0
-#define ucnv_setFallback ucnv_setFallback_4_0
-#define ucnv_setFromUCallBack ucnv_setFromUCallBack_4_0
-#define ucnv_setSubstChars ucnv_setSubstChars_4_0
-#define ucnv_setSubstString ucnv_setSubstString_4_0
-#define ucnv_setToUCallBack ucnv_setToUCallBack_4_0
-#define ucnv_swap ucnv_swap_4_0
-#define ucnv_swapAliases ucnv_swapAliases_4_0
-#define ucnv_toAlgorithmic ucnv_toAlgorithmic_4_0
-#define ucnv_toUChars ucnv_toUChars_4_0
-#define ucnv_toUCountPending ucnv_toUCountPending_4_0
-#define ucnv_toUWriteCodePoint ucnv_toUWriteCodePoint_4_0
-#define ucnv_toUWriteUChars ucnv_toUWriteUChars_4_0
-#define ucnv_toUnicode ucnv_toUnicode_4_0
-#define ucnv_unload ucnv_unload_4_0
-#define ucnv_unloadSharedDataIfReady ucnv_unloadSharedDataIfReady_4_0
-#define ucnv_usesFallback ucnv_usesFallback_4_0
-#define ucol_allocWeights ucol_allocWeights_4_0
-#define ucol_assembleTailoringTable ucol_assembleTailoringTable_4_0
-#define ucol_calcSortKey ucol_calcSortKey_4_0
-#define ucol_calcSortKeySimpleTertiary ucol_calcSortKeySimpleTertiary_4_0
-#define ucol_cloneBinary ucol_cloneBinary_4_0
-#define ucol_cloneRuleData ucol_cloneRuleData_4_0
-#define ucol_close ucol_close_4_0
-#define ucol_closeElements ucol_closeElements_4_0
-#define ucol_countAvailable ucol_countAvailable_4_0
-#define ucol_createElements ucol_createElements_4_0
-#define ucol_doCE ucol_doCE_4_0
-#define ucol_equal ucol_equal_4_0
-#define ucol_equals ucol_equals_4_0
-#define ucol_forgetUCA ucol_forgetUCA_4_0
-#define ucol_getAttribute ucol_getAttribute_4_0
-#define ucol_getAttributeOrDefault ucol_getAttributeOrDefault_4_0
-#define ucol_getAvailable ucol_getAvailable_4_0
-#define ucol_getBound ucol_getBound_4_0
-#define ucol_getCEStrengthDifference ucol_getCEStrengthDifference_4_0
-#define ucol_getContractions ucol_getContractions_4_0
-#define ucol_getContractionsAndExpansions ucol_getContractionsAndExpansions_4_0
-#define ucol_getDisplayName ucol_getDisplayName_4_0
-#define ucol_getFirstCE ucol_getFirstCE_4_0
-#define ucol_getFunctionalEquivalent ucol_getFunctionalEquivalent_4_0
-#define ucol_getKeywordValues ucol_getKeywordValues_4_0
-#define ucol_getKeywords ucol_getKeywords_4_0
-#define ucol_getLocale ucol_getLocale_4_0
-#define ucol_getLocaleByType ucol_getLocaleByType_4_0
-#define ucol_getMaxExpansion ucol_getMaxExpansion_4_0
-#define ucol_getNextCE ucol_getNextCE_4_0
-#define ucol_getOffset ucol_getOffset_4_0
-#define ucol_getPrevCE ucol_getPrevCE_4_0
-#define ucol_getRules ucol_getRules_4_0
-#define ucol_getRulesEx ucol_getRulesEx_4_0
-#define ucol_getShortDefinitionString ucol_getShortDefinitionString_4_0
-#define ucol_getSortKey ucol_getSortKey_4_0
-#define ucol_getSortKeySize ucol_getSortKeySize_4_0
-#define ucol_getSortKeyWithAllocation ucol_getSortKeyWithAllocation_4_0
-#define ucol_getStrength ucol_getStrength_4_0
-#define ucol_getTailoredSet ucol_getTailoredSet_4_0
-#define ucol_getUCAVersion ucol_getUCAVersion_4_0
-#define ucol_getUnsafeSet ucol_getUnsafeSet_4_0
-#define ucol_getVariableTop ucol_getVariableTop_4_0
-#define ucol_getVersion ucol_getVersion_4_0
-#define ucol_greater ucol_greater_4_0
-#define ucol_greaterOrEqual ucol_greaterOrEqual_4_0
-#define ucol_initBuffers ucol_initBuffers_4_0
-#define ucol_initCollator ucol_initCollator_4_0
-#define ucol_initInverseUCA ucol_initInverseUCA_4_0
-#define ucol_initUCA ucol_initUCA_4_0
-#define ucol_inv_getNextCE ucol_inv_getNextCE_4_0
-#define ucol_inv_getPrevCE ucol_inv_getPrevCE_4_0
-#define ucol_isTailored ucol_isTailored_4_0
-#define ucol_keyHashCode ucol_keyHashCode_4_0
-#define ucol_mergeSortkeys ucol_mergeSortkeys_4_0
-#define ucol_next ucol_next_4_0
-#define ucol_nextProcessed ucol_nextProcessed_4_0
-#define ucol_nextSortKeyPart ucol_nextSortKeyPart_4_0
-#define ucol_nextWeight ucol_nextWeight_4_0
-#define ucol_normalizeShortDefinitionString ucol_normalizeShortDefinitionString_4_0
-#define ucol_open ucol_open_4_0
-#define ucol_openAvailableLocales ucol_openAvailableLocales_4_0
-#define ucol_openBinary ucol_openBinary_4_0
-#define ucol_openElements ucol_openElements_4_0
-#define ucol_openFromShortString ucol_openFromShortString_4_0
-#define ucol_openRules ucol_openRules_4_0
-#define ucol_open_internal ucol_open_internal_4_0
-#define ucol_prepareShortStringOpen ucol_prepareShortStringOpen_4_0
-#define ucol_previous ucol_previous_4_0
-#define ucol_previousProcessed ucol_previousProcessed_4_0
-#define ucol_primaryOrder ucol_primaryOrder_4_0
-#define ucol_prv_getSpecialCE ucol_prv_getSpecialCE_4_0
-#define ucol_prv_getSpecialPrevCE ucol_prv_getSpecialPrevCE_4_0
-#define ucol_reset ucol_reset_4_0
-#define ucol_restoreVariableTop ucol_restoreVariableTop_4_0
-#define ucol_safeClone ucol_safeClone_4_0
-#define ucol_secondaryOrder ucol_secondaryOrder_4_0
-#define ucol_setAttribute ucol_setAttribute_4_0
-#define ucol_setOffset ucol_setOffset_4_0
-#define ucol_setOptionsFromHeader ucol_setOptionsFromHeader_4_0
-#define ucol_setReqValidLocales ucol_setReqValidLocales_4_0
-#define ucol_setStrength ucol_setStrength_4_0
-#define ucol_setText ucol_setText_4_0
-#define ucol_setVariableTop ucol_setVariableTop_4_0
-#define ucol_strcoll ucol_strcoll_4_0
-#define ucol_strcollIter ucol_strcollIter_4_0
-#define ucol_swap ucol_swap_4_0
-#define ucol_swapBinary ucol_swapBinary_4_0
-#define ucol_swapInverseUCA ucol_swapInverseUCA_4_0
-#define ucol_tertiaryOrder ucol_tertiaryOrder_4_0
-#define ucol_tok_assembleTokenList ucol_tok_assembleTokenList_4_0
-#define ucol_tok_closeTokenList ucol_tok_closeTokenList_4_0
-#define ucol_tok_getNextArgument ucol_tok_getNextArgument_4_0
-#define ucol_tok_initTokenList ucol_tok_initTokenList_4_0
-#define ucol_tok_parseNextToken ucol_tok_parseNextToken_4_0
-#define ucol_updateInternalState ucol_updateInternalState_4_0
-#define ucsdet_close ucsdet_close_4_0
-#define ucsdet_detect ucsdet_detect_4_0
-#define ucsdet_detectAll ucsdet_detectAll_4_0
-#define ucsdet_enableInputFilter ucsdet_enableInputFilter_4_0
-#define ucsdet_getAllDetectableCharsets ucsdet_getAllDetectableCharsets_4_0
-#define ucsdet_getConfidence ucsdet_getConfidence_4_0
-#define ucsdet_getLanguage ucsdet_getLanguage_4_0
-#define ucsdet_getName ucsdet_getName_4_0
-#define ucsdet_getUChars ucsdet_getUChars_4_0
-#define ucsdet_isInputFilterEnabled ucsdet_isInputFilterEnabled_4_0
-#define ucsdet_open ucsdet_open_4_0
-#define ucsdet_setDeclaredEncoding ucsdet_setDeclaredEncoding_4_0
-#define ucsdet_setText ucsdet_setText_4_0
-#define ucurr_countCurrencies ucurr_countCurrencies_4_0
-#define ucurr_forLocale ucurr_forLocale_4_0
-#define ucurr_forLocaleAndDate ucurr_forLocaleAndDate_4_0
-#define ucurr_getDefaultFractionDigits ucurr_getDefaultFractionDigits_4_0
-#define ucurr_getName ucurr_getName_4_0
-#define ucurr_getRoundingIncrement ucurr_getRoundingIncrement_4_0
-#define ucurr_openISOCurrencies ucurr_openISOCurrencies_4_0
-#define ucurr_register ucurr_register_4_0
-#define ucurr_unregister ucurr_unregister_4_0
-#define udat_applyPattern udat_applyPattern_4_0
-#define udat_clone udat_clone_4_0
-#define udat_close udat_close_4_0
-#define udat_countAvailable udat_countAvailable_4_0
-#define udat_countSymbols udat_countSymbols_4_0
-#define udat_format udat_format_4_0
-#define udat_get2DigitYearStart udat_get2DigitYearStart_4_0
-#define udat_getAvailable udat_getAvailable_4_0
-#define udat_getCalendar udat_getCalendar_4_0
-#define udat_getLocaleByType udat_getLocaleByType_4_0
-#define udat_getNumberFormat udat_getNumberFormat_4_0
-#define udat_getSymbols udat_getSymbols_4_0
-#define udat_isLenient udat_isLenient_4_0
-#define udat_open udat_open_4_0
-#define udat_parse udat_parse_4_0
-#define udat_parseCalendar udat_parseCalendar_4_0
-#define udat_set2DigitYearStart udat_set2DigitYearStart_4_0
-#define udat_setCalendar udat_setCalendar_4_0
-#define udat_setLenient udat_setLenient_4_0
-#define udat_setNumberFormat udat_setNumberFormat_4_0
-#define udat_setSymbols udat_setSymbols_4_0
-#define udat_toPattern udat_toPattern_4_0
-#define udata_checkCommonData udata_checkCommonData_4_0
-#define udata_close udata_close_4_0
-#define udata_closeSwapper udata_closeSwapper_4_0
-#define udata_getHeaderSize udata_getHeaderSize_4_0
-#define udata_getInfo udata_getInfo_4_0
-#define udata_getInfoSize udata_getInfoSize_4_0
-#define udata_getLength udata_getLength_4_0
-#define udata_getMemory udata_getMemory_4_0
-#define udata_getRawMemory udata_getRawMemory_4_0
-#define udata_open udata_open_4_0
-#define udata_openChoice udata_openChoice_4_0
-#define udata_openSwapper udata_openSwapper_4_0
-#define udata_openSwapperForInputData udata_openSwapperForInputData_4_0
-#define udata_printError udata_printError_4_0
-#define udata_readInt16 udata_readInt16_4_0
-#define udata_readInt32 udata_readInt32_4_0
-#define udata_setAppData udata_setAppData_4_0
-#define udata_setCommonData udata_setCommonData_4_0
-#define udata_setFileAccess udata_setFileAccess_4_0
-#define udata_swapDataHeader udata_swapDataHeader_4_0
-#define udata_swapInvStringBlock udata_swapInvStringBlock_4_0
-#define udatpg_addPattern udatpg_addPattern_4_0
-#define udatpg_clone udatpg_clone_4_0
-#define udatpg_close udatpg_close_4_0
-#define udatpg_getAppendItemFormat udatpg_getAppendItemFormat_4_0
-#define udatpg_getAppendItemName udatpg_getAppendItemName_4_0
-#define udatpg_getBaseSkeleton udatpg_getBaseSkeleton_4_0
-#define udatpg_getBestPattern udatpg_getBestPattern_4_0
-#define udatpg_getDateTimeFormat udatpg_getDateTimeFormat_4_0
-#define udatpg_getDecimal udatpg_getDecimal_4_0
-#define udatpg_getPatternForSkeleton udatpg_getPatternForSkeleton_4_0
-#define udatpg_getSkeleton udatpg_getSkeleton_4_0
-#define udatpg_open udatpg_open_4_0
-#define udatpg_openBaseSkeletons udatpg_openBaseSkeletons_4_0
-#define udatpg_openEmpty udatpg_openEmpty_4_0
-#define udatpg_openSkeletons udatpg_openSkeletons_4_0
-#define udatpg_replaceFieldTypes udatpg_replaceFieldTypes_4_0
-#define udatpg_setAppendItemFormat udatpg_setAppendItemFormat_4_0
-#define udatpg_setAppendItemName udatpg_setAppendItemName_4_0
-#define udatpg_setDateTimeFormat udatpg_setDateTimeFormat_4_0
-#define udatpg_setDecimal udatpg_setDecimal_4_0
-#define uenum_close uenum_close_4_0
-#define uenum_count uenum_count_4_0
-#define uenum_next uenum_next_4_0
-#define uenum_nextDefault uenum_nextDefault_4_0
-#define uenum_openCharStringsEnumeration uenum_openCharStringsEnumeration_4_0
-#define uenum_openStringEnumeration uenum_openStringEnumeration_4_0
-#define uenum_reset uenum_reset_4_0
-#define uenum_unext uenum_unext_4_0
-#define uenum_unextDefault uenum_unextDefault_4_0
-#define ufile_close_translit ufile_close_translit_4_0
-#define ufile_fill_uchar_buffer ufile_fill_uchar_buffer_4_0
-#define ufile_flush_translit ufile_flush_translit_4_0
-#define ufile_getch ufile_getch_4_0
-#define ufile_getch32 ufile_getch32_4_0
-#define ufmt_64tou ufmt_64tou_4_0
-#define ufmt_defaultCPToUnicode ufmt_defaultCPToUnicode_4_0
-#define ufmt_digitvalue ufmt_digitvalue_4_0
-#define ufmt_isdigit ufmt_isdigit_4_0
-#define ufmt_ptou ufmt_ptou_4_0
-#define ufmt_uto64 ufmt_uto64_4_0
-#define ufmt_utop ufmt_utop_4_0
-#define uhash_close uhash_close_4_0
-#define uhash_compareCaselessUnicodeString uhash_compareCaselessUnicodeString_4_0
-#define uhash_compareChars uhash_compareChars_4_0
-#define uhash_compareIChars uhash_compareIChars_4_0
-#define uhash_compareLong uhash_compareLong_4_0
-#define uhash_compareUChars uhash_compareUChars_4_0
-#define uhash_compareUnicodeString uhash_compareUnicodeString_4_0
-#define uhash_count uhash_count_4_0
-#define uhash_deleteHashtable uhash_deleteHashtable_4_0
-#define uhash_deleteUVector uhash_deleteUVector_4_0
-#define uhash_deleteUnicodeString uhash_deleteUnicodeString_4_0
-#define uhash_equals uhash_equals_4_0
-#define uhash_find uhash_find_4_0
-#define uhash_freeBlock uhash_freeBlock_4_0
-#define uhash_get uhash_get_4_0
-#define uhash_geti uhash_geti_4_0
-#define uhash_hashCaselessUnicodeString uhash_hashCaselessUnicodeString_4_0
-#define uhash_hashChars uhash_hashChars_4_0
-#define uhash_hashIChars uhash_hashIChars_4_0
-#define uhash_hashLong uhash_hashLong_4_0
-#define uhash_hashUChars uhash_hashUChars_4_0
-#define uhash_hashUCharsN uhash_hashUCharsN_4_0
-#define uhash_hashUnicodeString uhash_hashUnicodeString_4_0
-#define uhash_iget uhash_iget_4_0
-#define uhash_igeti uhash_igeti_4_0
-#define uhash_init uhash_init_4_0
-#define uhash_iput uhash_iput_4_0
-#define uhash_iputi uhash_iputi_4_0
-#define uhash_iremove uhash_iremove_4_0
-#define uhash_iremovei uhash_iremovei_4_0
-#define uhash_nextElement uhash_nextElement_4_0
-#define uhash_open uhash_open_4_0
-#define uhash_openSize uhash_openSize_4_0
-#define uhash_put uhash_put_4_0
-#define uhash_puti uhash_puti_4_0
-#define uhash_remove uhash_remove_4_0
-#define uhash_removeAll uhash_removeAll_4_0
-#define uhash_removeElement uhash_removeElement_4_0
-#define uhash_removei uhash_removei_4_0
-#define uhash_setKeyComparator uhash_setKeyComparator_4_0
-#define uhash_setKeyDeleter uhash_setKeyDeleter_4_0
-#define uhash_setKeyHasher uhash_setKeyHasher_4_0
-#define uhash_setResizePolicy uhash_setResizePolicy_4_0
-#define uhash_setValueComparator uhash_setValueComparator_4_0
-#define uhash_setValueDeleter uhash_setValueDeleter_4_0
-#define uhst_addPropertyStarts uhst_addPropertyStarts_4_0
-#define uidna_IDNToASCII uidna_IDNToASCII_4_0
-#define uidna_IDNToUnicode uidna_IDNToUnicode_4_0
-#define uidna_compare uidna_compare_4_0
-#define uidna_toASCII uidna_toASCII_4_0
-#define uidna_toUnicode uidna_toUnicode_4_0
-#define uiter_current32 uiter_current32_4_0
-#define uiter_getState uiter_getState_4_0
-#define uiter_next32 uiter_next32_4_0
-#define uiter_previous32 uiter_previous32_4_0
-#define uiter_setCharacterIterator uiter_setCharacterIterator_4_0
-#define uiter_setReplaceable uiter_setReplaceable_4_0
-#define uiter_setState uiter_setState_4_0
-#define uiter_setString uiter_setString_4_0
-#define uiter_setUTF16BE uiter_setUTF16BE_4_0
-#define uiter_setUTF8 uiter_setUTF8_4_0
-#define uloc_acceptLanguage uloc_acceptLanguage_4_0
-#define uloc_acceptLanguageFromHTTP uloc_acceptLanguageFromHTTP_4_0
-#define uloc_addLikelySubtags uloc_addLikelySubtags_4_0
-#define uloc_canonicalize uloc_canonicalize_4_0
-#define uloc_countAvailable uloc_countAvailable_4_0
-#define uloc_getAvailable uloc_getAvailable_4_0
-#define uloc_getBaseName uloc_getBaseName_4_0
-#define uloc_getCharacterOrientation uloc_getCharacterOrientation_4_0
-#define uloc_getCountry uloc_getCountry_4_0
-#define uloc_getDefault uloc_getDefault_4_0
-#define uloc_getDisplayCountry uloc_getDisplayCountry_4_0
-#define uloc_getDisplayKeyword uloc_getDisplayKeyword_4_0
-#define uloc_getDisplayKeywordValue uloc_getDisplayKeywordValue_4_0
-#define uloc_getDisplayLanguage uloc_getDisplayLanguage_4_0
-#define uloc_getDisplayName uloc_getDisplayName_4_0
-#define uloc_getDisplayScript uloc_getDisplayScript_4_0
-#define uloc_getDisplayVariant uloc_getDisplayVariant_4_0
-#define uloc_getISO3Country uloc_getISO3Country_4_0
-#define uloc_getISO3Language uloc_getISO3Language_4_0
-#define uloc_getISOCountries uloc_getISOCountries_4_0
-#define uloc_getISOLanguages uloc_getISOLanguages_4_0
-#define uloc_getKeywordValue uloc_getKeywordValue_4_0
-#define uloc_getLCID uloc_getLCID_4_0
-#define uloc_getLanguage uloc_getLanguage_4_0
-#define uloc_getLineOrientation uloc_getLineOrientation_4_0
-#define uloc_getLocaleForLCID uloc_getLocaleForLCID_4_0
-#define uloc_getName uloc_getName_4_0
-#define uloc_getParent uloc_getParent_4_0
-#define uloc_getScript uloc_getScript_4_0
-#define uloc_getVariant uloc_getVariant_4_0
-#define uloc_minimizeSubtags uloc_minimizeSubtags_4_0
-#define uloc_openKeywordList uloc_openKeywordList_4_0
-#define uloc_openKeywords uloc_openKeywords_4_0
-#define uloc_setDefault uloc_setDefault_4_0
-#define uloc_setKeywordValue uloc_setKeywordValue_4_0
-#define ulocdata_close ulocdata_close_4_0
-#define ulocdata_getDelimiter ulocdata_getDelimiter_4_0
-#define ulocdata_getExemplarSet ulocdata_getExemplarSet_4_0
-#define ulocdata_getMeasurementSystem ulocdata_getMeasurementSystem_4_0
-#define ulocdata_getNoSubstitute ulocdata_getNoSubstitute_4_0
-#define ulocdata_getPaperSize ulocdata_getPaperSize_4_0
-#define ulocdata_open ulocdata_open_4_0
-#define ulocdata_setNoSubstitute ulocdata_setNoSubstitute_4_0
-#define umsg_applyPattern umsg_applyPattern_4_0
-#define umsg_autoQuoteApostrophe umsg_autoQuoteApostrophe_4_0
-#define umsg_clone umsg_clone_4_0
-#define umsg_close umsg_close_4_0
-#define umsg_format umsg_format_4_0
-#define umsg_getLocale umsg_getLocale_4_0
-#define umsg_open umsg_open_4_0
-#define umsg_parse umsg_parse_4_0
-#define umsg_setLocale umsg_setLocale_4_0
-#define umsg_toPattern umsg_toPattern_4_0
-#define umsg_vformat umsg_vformat_4_0
-#define umsg_vparse umsg_vparse_4_0
-#define umtx_atomic_dec umtx_atomic_dec_4_0
-#define umtx_atomic_inc umtx_atomic_inc_4_0
-#define umtx_cleanup umtx_cleanup_4_0
-#define umtx_destroy umtx_destroy_4_0
-#define umtx_init umtx_init_4_0
-#define umtx_lock umtx_lock_4_0
-#define umtx_unlock umtx_unlock_4_0
-#define unorm_addPropertyStarts unorm_addPropertyStarts_4_0
-#define unorm_closeIter unorm_closeIter_4_0
-#define unorm_compare unorm_compare_4_0
-#define unorm_compose unorm_compose_4_0
-#define unorm_concatenate unorm_concatenate_4_0
-#define unorm_decompose unorm_decompose_4_0
-#define unorm_getCanonStartSet unorm_getCanonStartSet_4_0
-#define unorm_getCanonicalDecomposition unorm_getCanonicalDecomposition_4_0
-#define unorm_getDecomposition unorm_getDecomposition_4_0
-#define unorm_getFCD16FromCodePoint unorm_getFCD16FromCodePoint_4_0
-#define unorm_getFCDTrie unorm_getFCDTrie_4_0
-#define unorm_getNX unorm_getNX_4_0
-#define unorm_getQuickCheck unorm_getQuickCheck_4_0
-#define unorm_getUnicodeVersion unorm_getUnicodeVersion_4_0
-#define unorm_haveData unorm_haveData_4_0
-#define unorm_internalIsFullCompositionExclusion unorm_internalIsFullCompositionExclusion_4_0
-#define unorm_internalNormalize unorm_internalNormalize_4_0
-#define unorm_internalNormalizeWithNX unorm_internalNormalizeWithNX_4_0
-#define unorm_internalQuickCheck unorm_internalQuickCheck_4_0
-#define unorm_isCanonSafeStart unorm_isCanonSafeStart_4_0
-#define unorm_isNFSkippable unorm_isNFSkippable_4_0
-#define unorm_isNormalized unorm_isNormalized_4_0
-#define unorm_isNormalizedWithOptions unorm_isNormalizedWithOptions_4_0
-#define unorm_next unorm_next_4_0
-#define unorm_normalize unorm_normalize_4_0
-#define unorm_openIter unorm_openIter_4_0
-#define unorm_previous unorm_previous_4_0
-#define unorm_quickCheck unorm_quickCheck_4_0
-#define unorm_quickCheckWithOptions unorm_quickCheckWithOptions_4_0
-#define unorm_setIter unorm_setIter_4_0
-#define unum_applyPattern unum_applyPattern_4_0
-#define unum_clone unum_clone_4_0
-#define unum_close unum_close_4_0
-#define unum_countAvailable unum_countAvailable_4_0
-#define unum_format unum_format_4_0
-#define unum_formatDouble unum_formatDouble_4_0
-#define unum_formatDoubleCurrency unum_formatDoubleCurrency_4_0
-#define unum_formatInt64 unum_formatInt64_4_0
-#define unum_getAttribute unum_getAttribute_4_0
-#define unum_getAvailable unum_getAvailable_4_0
-#define unum_getDoubleAttribute unum_getDoubleAttribute_4_0
-#define unum_getLocaleByType unum_getLocaleByType_4_0
-#define unum_getSymbol unum_getSymbol_4_0
-#define unum_getTextAttribute unum_getTextAttribute_4_0
-#define unum_open unum_open_4_0
-#define unum_parse unum_parse_4_0
-#define unum_parseDouble unum_parseDouble_4_0
-#define unum_parseDoubleCurrency unum_parseDoubleCurrency_4_0
-#define unum_parseInt64 unum_parseInt64_4_0
-#define unum_setAttribute unum_setAttribute_4_0
-#define unum_setDoubleAttribute unum_setDoubleAttribute_4_0
-#define unum_setSymbol unum_setSymbol_4_0
-#define unum_setTextAttribute unum_setTextAttribute_4_0
-#define unum_toPattern unum_toPattern_4_0
-#define upname_swap upname_swap_4_0
-#define uprops_getSource uprops_getSource_4_0
-#define upropsvec_addPropertyStarts upropsvec_addPropertyStarts_4_0
-#define uprv_asciiFromEbcdic uprv_asciiFromEbcdic_4_0
-#define uprv_asciitolower uprv_asciitolower_4_0
-#define uprv_ceil uprv_ceil_4_0
-#define uprv_cnttab_addContraction uprv_cnttab_addContraction_4_0
-#define uprv_cnttab_changeContraction uprv_cnttab_changeContraction_4_0
-#define uprv_cnttab_changeLastCE uprv_cnttab_changeLastCE_4_0
-#define uprv_cnttab_clone uprv_cnttab_clone_4_0
-#define uprv_cnttab_close uprv_cnttab_close_4_0
-#define uprv_cnttab_constructTable uprv_cnttab_constructTable_4_0
-#define uprv_cnttab_findCE uprv_cnttab_findCE_4_0
-#define uprv_cnttab_findCP uprv_cnttab_findCP_4_0
-#define uprv_cnttab_getCE uprv_cnttab_getCE_4_0
-#define uprv_cnttab_insertContraction uprv_cnttab_insertContraction_4_0
-#define uprv_cnttab_isTailored uprv_cnttab_isTailored_4_0
-#define uprv_cnttab_open uprv_cnttab_open_4_0
-#define uprv_cnttab_setContraction uprv_cnttab_setContraction_4_0
-#define uprv_compareASCIIPropertyNames uprv_compareASCIIPropertyNames_4_0
-#define uprv_compareEBCDICPropertyNames uprv_compareEBCDICPropertyNames_4_0
-#define uprv_compareInvAscii uprv_compareInvAscii_4_0
-#define uprv_compareInvEbcdic uprv_compareInvEbcdic_4_0
-#define uprv_convertToLCID uprv_convertToLCID_4_0
-#define uprv_convertToPosix uprv_convertToPosix_4_0
-#define uprv_copyAscii uprv_copyAscii_4_0
-#define uprv_copyEbcdic uprv_copyEbcdic_4_0
-#define uprv_ebcdicFromAscii uprv_ebcdicFromAscii_4_0
-#define uprv_ebcdictolower uprv_ebcdictolower_4_0
-#define uprv_fabs uprv_fabs_4_0
-#define uprv_floor uprv_floor_4_0
-#define uprv_fmax uprv_fmax_4_0
-#define uprv_fmin uprv_fmin_4_0
-#define uprv_fmod uprv_fmod_4_0
-#define uprv_free uprv_free_4_0
-#define uprv_getCharNameCharacters uprv_getCharNameCharacters_4_0
-#define uprv_getDefaultCodepage uprv_getDefaultCodepage_4_0
-#define uprv_getDefaultLocaleID uprv_getDefaultLocaleID_4_0
-#define uprv_getInfinity uprv_getInfinity_4_0
-#define uprv_getMaxCharNameLength uprv_getMaxCharNameLength_4_0
-#define uprv_getMaxValues uprv_getMaxValues_4_0
-#define uprv_getNaN uprv_getNaN_4_0
-#define uprv_getStaticCurrencyName uprv_getStaticCurrencyName_4_0
-#define uprv_getUTCtime uprv_getUTCtime_4_0
-#define uprv_haveProperties uprv_haveProperties_4_0
-#define uprv_init_collIterate uprv_init_collIterate_4_0
-#define uprv_init_pce uprv_init_pce_4_0
-#define uprv_int32Comparator uprv_int32Comparator_4_0
-#define uprv_isInfinite uprv_isInfinite_4_0
-#define uprv_isInvariantString uprv_isInvariantString_4_0
-#define uprv_isInvariantUString uprv_isInvariantUString_4_0
-#define uprv_isNaN uprv_isNaN_4_0
-#define uprv_isNegativeInfinity uprv_isNegativeInfinity_4_0
-#define uprv_isPositiveInfinity uprv_isPositiveInfinity_4_0
-#define uprv_isRuleWhiteSpace uprv_isRuleWhiteSpace_4_0
-#define uprv_itou uprv_itou_4_0
-#define uprv_log uprv_log_4_0
-#define uprv_malloc uprv_malloc_4_0
-#define uprv_mapFile uprv_mapFile_4_0
-#define uprv_max uprv_max_4_0
-#define uprv_maxMantissa uprv_maxMantissa_4_0
-#define uprv_maximumPtr uprv_maximumPtr_4_0
-#define uprv_min uprv_min_4_0
-#define uprv_modf uprv_modf_4_0
-#define uprv_openRuleWhiteSpaceSet uprv_openRuleWhiteSpaceSet_4_0
-#define uprv_parseCurrency uprv_parseCurrency_4_0
-#define uprv_pathIsAbsolute uprv_pathIsAbsolute_4_0
-#define uprv_pow uprv_pow_4_0
-#define uprv_pow10 uprv_pow10_4_0
-#define uprv_realloc uprv_realloc_4_0
-#define uprv_round uprv_round_4_0
-#define uprv_sortArray uprv_sortArray_4_0
-#define uprv_strCompare uprv_strCompare_4_0
-#define uprv_strdup uprv_strdup_4_0
-#define uprv_strndup uprv_strndup_4_0
-#define uprv_syntaxError uprv_syntaxError_4_0
-#define uprv_timezone uprv_timezone_4_0
-#define uprv_toupper uprv_toupper_4_0
-#define uprv_trunc uprv_trunc_4_0
-#define uprv_tzname uprv_tzname_4_0
-#define uprv_tzset uprv_tzset_4_0
-#define uprv_uca_addAnElement uprv_uca_addAnElement_4_0
-#define uprv_uca_assembleTable uprv_uca_assembleTable_4_0
-#define uprv_uca_canonicalClosure uprv_uca_canonicalClosure_4_0
-#define uprv_uca_closeTempTable uprv_uca_closeTempTable_4_0
-#define uprv_uca_getCodePointFromRaw uprv_uca_getCodePointFromRaw_4_0
-#define uprv_uca_getImplicitFromRaw uprv_uca_getImplicitFromRaw_4_0
-#define uprv_uca_getRawFromCodePoint uprv_uca_getRawFromCodePoint_4_0
-#define uprv_uca_getRawFromImplicit uprv_uca_getRawFromImplicit_4_0
-#define uprv_uca_initImplicitConstants uprv_uca_initImplicitConstants_4_0
-#define uprv_uca_initTempTable uprv_uca_initTempTable_4_0
-#define uprv_uint16Comparator uprv_uint16Comparator_4_0
-#define uprv_uint32Comparator uprv_uint32Comparator_4_0
-#define uprv_unmapFile uprv_unmapFile_4_0
-#define uregex_appendReplacement uregex_appendReplacement_4_0
-#define uregex_appendTail uregex_appendTail_4_0
-#define uregex_clone uregex_clone_4_0
-#define uregex_close uregex_close_4_0
-#define uregex_end uregex_end_4_0
-#define uregex_find uregex_find_4_0
-#define uregex_findNext uregex_findNext_4_0
-#define uregex_flags uregex_flags_4_0
-#define uregex_getMatchCallback uregex_getMatchCallback_4_0
-#define uregex_getStackLimit uregex_getStackLimit_4_0
-#define uregex_getText uregex_getText_4_0
-#define uregex_getTimeLimit uregex_getTimeLimit_4_0
-#define uregex_group uregex_group_4_0
-#define uregex_groupCount uregex_groupCount_4_0
-#define uregex_hasAnchoringBounds uregex_hasAnchoringBounds_4_0
-#define uregex_hasTransparentBounds uregex_hasTransparentBounds_4_0
-#define uregex_hitEnd uregex_hitEnd_4_0
-#define uregex_lookingAt uregex_lookingAt_4_0
-#define uregex_matches uregex_matches_4_0
-#define uregex_open uregex_open_4_0
-#define uregex_openC uregex_openC_4_0
-#define uregex_pattern uregex_pattern_4_0
-#define uregex_regionEnd uregex_regionEnd_4_0
-#define uregex_regionStart uregex_regionStart_4_0
-#define uregex_replaceAll uregex_replaceAll_4_0
-#define uregex_replaceFirst uregex_replaceFirst_4_0
-#define uregex_requireEnd uregex_requireEnd_4_0
-#define uregex_reset uregex_reset_4_0
-#define uregex_setMatchCallback uregex_setMatchCallback_4_0
-#define uregex_setRegion uregex_setRegion_4_0
-#define uregex_setStackLimit uregex_setStackLimit_4_0
-#define uregex_setText uregex_setText_4_0
-#define uregex_setTimeLimit uregex_setTimeLimit_4_0
-#define uregex_split uregex_split_4_0
-#define uregex_start uregex_start_4_0
-#define uregex_useAnchoringBounds uregex_useAnchoringBounds_4_0
-#define uregex_useTransparentBounds uregex_useTransparentBounds_4_0
-#define ures_close ures_close_4_0
-#define ures_copyResb ures_copyResb_4_0
-#define ures_countArrayItems ures_countArrayItems_4_0
-#define ures_findResource ures_findResource_4_0
-#define ures_findSubResource ures_findSubResource_4_0
-#define ures_getBinary ures_getBinary_4_0
-#define ures_getByIndex ures_getByIndex_4_0
-#define ures_getByKey ures_getByKey_4_0
-#define ures_getByKeyWithFallback ures_getByKeyWithFallback_4_0
-#define ures_getFunctionalEquivalent ures_getFunctionalEquivalent_4_0
-#define ures_getInt ures_getInt_4_0
-#define ures_getIntVector ures_getIntVector_4_0
-#define ures_getKey ures_getKey_4_0
-#define ures_getKeywordValues ures_getKeywordValues_4_0
-#define ures_getLocale ures_getLocale_4_0
-#define ures_getLocaleByType ures_getLocaleByType_4_0
-#define ures_getName ures_getName_4_0
-#define ures_getNextResource ures_getNextResource_4_0
-#define ures_getNextString ures_getNextString_4_0
-#define ures_getSize ures_getSize_4_0
-#define ures_getString ures_getString_4_0
-#define ures_getStringByIndex ures_getStringByIndex_4_0
-#define ures_getStringByKey ures_getStringByKey_4_0
-#define ures_getStringByKeyWithFallback ures_getStringByKeyWithFallback_4_0
-#define ures_getType ures_getType_4_0
-#define ures_getUInt ures_getUInt_4_0
-#define ures_getUTF8String ures_getUTF8String_4_0
-#define ures_getUTF8StringByIndex ures_getUTF8StringByIndex_4_0
-#define ures_getUTF8StringByKey ures_getUTF8StringByKey_4_0
-#define ures_getVersion ures_getVersion_4_0
-#define ures_getVersionNumber ures_getVersionNumber_4_0
-#define ures_hasNext ures_hasNext_4_0
-#define ures_initStackObject ures_initStackObject_4_0
-#define ures_open ures_open_4_0
-#define ures_openAvailableLocales ures_openAvailableLocales_4_0
-#define ures_openDirect ures_openDirect_4_0
-#define ures_openFillIn ures_openFillIn_4_0
-#define ures_openU ures_openU_4_0
-#define ures_resetIterator ures_resetIterator_4_0
-#define ures_swap ures_swap_4_0
-#define uscript_closeRun uscript_closeRun_4_0
-#define uscript_getCode uscript_getCode_4_0
-#define uscript_getName uscript_getName_4_0
-#define uscript_getScript uscript_getScript_4_0
-#define uscript_getShortName uscript_getShortName_4_0
-#define uscript_nextRun uscript_nextRun_4_0
-#define uscript_openRun uscript_openRun_4_0
-#define uscript_resetRun uscript_resetRun_4_0
-#define uscript_setRunText uscript_setRunText_4_0
-#define usearch_close usearch_close_4_0
-#define usearch_first usearch_first_4_0
-#define usearch_following usearch_following_4_0
-#define usearch_getAttribute usearch_getAttribute_4_0
-#define usearch_getBreakIterator usearch_getBreakIterator_4_0
-#define usearch_getCollator usearch_getCollator_4_0
-#define usearch_getMatchedLength usearch_getMatchedLength_4_0
-#define usearch_getMatchedStart usearch_getMatchedStart_4_0
-#define usearch_getMatchedText usearch_getMatchedText_4_0
-#define usearch_getOffset usearch_getOffset_4_0
-#define usearch_getPattern usearch_getPattern_4_0
-#define usearch_getText usearch_getText_4_0
-#define usearch_handleNextCanonical usearch_handleNextCanonical_4_0
-#define usearch_handleNextExact usearch_handleNextExact_4_0
-#define usearch_handlePreviousCanonical usearch_handlePreviousCanonical_4_0
-#define usearch_handlePreviousExact usearch_handlePreviousExact_4_0
-#define usearch_last usearch_last_4_0
-#define usearch_next usearch_next_4_0
-#define usearch_open usearch_open_4_0
-#define usearch_openFromCollator usearch_openFromCollator_4_0
-#define usearch_preceding usearch_preceding_4_0
-#define usearch_previous usearch_previous_4_0
-#define usearch_reset usearch_reset_4_0
-#define usearch_search usearch_search_4_0
-#define usearch_searchBackwards usearch_searchBackwards_4_0
-#define usearch_setAttribute usearch_setAttribute_4_0
-#define usearch_setBreakIterator usearch_setBreakIterator_4_0
-#define usearch_setCollator usearch_setCollator_4_0
-#define usearch_setOffset usearch_setOffset_4_0
-#define usearch_setPattern usearch_setPattern_4_0
-#define usearch_setText usearch_setText_4_0
-#define uset_add uset_add_4_0
-#define uset_addAll uset_addAll_4_0
-#define uset_addAllCodePoints uset_addAllCodePoints_4_0
-#define uset_addRange uset_addRange_4_0
-#define uset_addString uset_addString_4_0
-#define uset_applyIntPropertyValue uset_applyIntPropertyValue_4_0
-#define uset_applyPattern uset_applyPattern_4_0
-#define uset_applyPropertyAlias uset_applyPropertyAlias_4_0
-#define uset_charAt uset_charAt_4_0
-#define uset_clear uset_clear_4_0
-#define uset_clone uset_clone_4_0
-#define uset_cloneAsThawed uset_cloneAsThawed_4_0
-#define uset_close uset_close_4_0
-#define uset_compact uset_compact_4_0
-#define uset_complement uset_complement_4_0
-#define uset_complementAll uset_complementAll_4_0
-#define uset_contains uset_contains_4_0
-#define uset_containsAll uset_containsAll_4_0
-#define uset_containsAllCodePoints uset_containsAllCodePoints_4_0
-#define uset_containsNone uset_containsNone_4_0
-#define uset_containsRange uset_containsRange_4_0
-#define uset_containsSome uset_containsSome_4_0
-#define uset_containsString uset_containsString_4_0
-#define uset_equals uset_equals_4_0
-#define uset_freeze uset_freeze_4_0
-#define uset_getItem uset_getItem_4_0
-#define uset_getItemCount uset_getItemCount_4_0
-#define uset_getSerializedRange uset_getSerializedRange_4_0
-#define uset_getSerializedRangeCount uset_getSerializedRangeCount_4_0
-#define uset_getSerializedSet uset_getSerializedSet_4_0
-#define uset_indexOf uset_indexOf_4_0
-#define uset_isEmpty uset_isEmpty_4_0
-#define uset_isFrozen uset_isFrozen_4_0
-#define uset_open uset_open_4_0
-#define uset_openPattern uset_openPattern_4_0
-#define uset_openPatternOptions uset_openPatternOptions_4_0
-#define uset_remove uset_remove_4_0
-#define uset_removeAll uset_removeAll_4_0
-#define uset_removeRange uset_removeRange_4_0
-#define uset_removeString uset_removeString_4_0
-#define uset_resemblesPattern uset_resemblesPattern_4_0
-#define uset_retain uset_retain_4_0
-#define uset_retainAll uset_retainAll_4_0
-#define uset_serialize uset_serialize_4_0
-#define uset_serializedContains uset_serializedContains_4_0
-#define uset_set uset_set_4_0
-#define uset_setSerializedToOne uset_setSerializedToOne_4_0
-#define uset_size uset_size_4_0
-#define uset_span uset_span_4_0
-#define uset_spanBack uset_spanBack_4_0
-#define uset_spanBackUTF8 uset_spanBackUTF8_4_0
-#define uset_spanUTF8 uset_spanUTF8_4_0
-#define uset_toPattern uset_toPattern_4_0
-#define usprep_close usprep_close_4_0
-#define usprep_open usprep_open_4_0
-#define usprep_prepare usprep_prepare_4_0
-#define usprep_swap usprep_swap_4_0
-#define ustr_foldCase ustr_foldCase_4_0
-#define ustr_toLower ustr_toLower_4_0
-#define ustr_toTitle ustr_toTitle_4_0
-#define ustr_toUpper ustr_toUpper_4_0
-#define utext_char32At utext_char32At_4_0
-#define utext_clone utext_clone_4_0
-#define utext_close utext_close_4_0
-#define utext_copy utext_copy_4_0
-#define utext_current32 utext_current32_4_0
-#define utext_equals utext_equals_4_0
-#define utext_extract utext_extract_4_0
-#define utext_freeze utext_freeze_4_0
-#define utext_getNativeIndex utext_getNativeIndex_4_0
-#define utext_getPreviousNativeIndex utext_getPreviousNativeIndex_4_0
-#define utext_hasMetaData utext_hasMetaData_4_0
-#define utext_isLengthExpensive utext_isLengthExpensive_4_0
-#define utext_isWritable utext_isWritable_4_0
-#define utext_moveIndex32 utext_moveIndex32_4_0
-#define utext_nativeLength utext_nativeLength_4_0
-#define utext_next32 utext_next32_4_0
-#define utext_next32From utext_next32From_4_0
-#define utext_openCharacterIterator utext_openCharacterIterator_4_0
-#define utext_openConstUnicodeString utext_openConstUnicodeString_4_0
-#define utext_openReplaceable utext_openReplaceable_4_0
-#define utext_openUChars utext_openUChars_4_0
-#define utext_openUTF8 utext_openUTF8_4_0
-#define utext_openUnicodeString utext_openUnicodeString_4_0
-#define utext_previous32 utext_previous32_4_0
-#define utext_previous32From utext_previous32From_4_0
-#define utext_replace utext_replace_4_0
-#define utext_setNativeIndex utext_setNativeIndex_4_0
-#define utext_setup utext_setup_4_0
-#define utf8_appendCharSafeBody utf8_appendCharSafeBody_4_0
-#define utf8_back1SafeBody utf8_back1SafeBody_4_0
-#define utf8_countTrailBytes utf8_countTrailBytes_4_0
-#define utf8_nextCharSafeBody utf8_nextCharSafeBody_4_0
-#define utf8_prevCharSafeBody utf8_prevCharSafeBody_4_0
-#define utmscale_fromInt64 utmscale_fromInt64_4_0
-#define utmscale_getTimeScaleValue utmscale_getTimeScaleValue_4_0
-#define utmscale_toInt64 utmscale_toInt64_4_0
-#define utrace_cleanup utrace_cleanup_4_0
-#define utrace_data utrace_data_4_0
-#define utrace_entry utrace_entry_4_0
-#define utrace_exit utrace_exit_4_0
-#define utrace_format utrace_format_4_0
-#define utrace_functionName utrace_functionName_4_0
-#define utrace_getFunctions utrace_getFunctions_4_0
-#define utrace_getLevel utrace_getLevel_4_0
-#define utrace_level utrace_level_4_0
-#define utrace_setFunctions utrace_setFunctions_4_0
-#define utrace_setLevel utrace_setLevel_4_0
-#define utrace_vformat utrace_vformat_4_0
-#define utrans_clone utrans_clone_4_0
-#define utrans_close utrans_close_4_0
-#define utrans_countAvailableIDs utrans_countAvailableIDs_4_0
-#define utrans_getAvailableID utrans_getAvailableID_4_0
-#define utrans_getID utrans_getID_4_0
-#define utrans_getUnicodeID utrans_getUnicodeID_4_0
-#define utrans_open utrans_open_4_0
-#define utrans_openIDs utrans_openIDs_4_0
-#define utrans_openInverse utrans_openInverse_4_0
-#define utrans_openU utrans_openU_4_0
-#define utrans_register utrans_register_4_0
-#define utrans_rep_caseContextIterator utrans_rep_caseContextIterator_4_0
-#define utrans_setFilter utrans_setFilter_4_0
-#define utrans_stripRules utrans_stripRules_4_0
-#define utrans_trans utrans_trans_4_0
-#define utrans_transIncremental utrans_transIncremental_4_0
-#define utrans_transIncrementalUChars utrans_transIncrementalUChars_4_0
-#define utrans_transUChars utrans_transUChars_4_0
-#define utrans_unregister utrans_unregister_4_0
-#define utrans_unregisterID utrans_unregisterID_4_0
-#define utrie_clone utrie_clone_4_0
-#define utrie_close utrie_close_4_0
-#define utrie_defaultGetFoldingOffset utrie_defaultGetFoldingOffset_4_0
-#define utrie_enum utrie_enum_4_0
-#define utrie_get32 utrie_get32_4_0
-#define utrie_getData utrie_getData_4_0
-#define utrie_open utrie_open_4_0
-#define utrie_serialize utrie_serialize_4_0
-#define utrie_set32 utrie_set32_4_0
-#define utrie_setRange32 utrie_setRange32_4_0
-#define utrie_swap utrie_swap_4_0
-#define utrie_unserialize utrie_unserialize_4_0
-#define utrie_unserializeDummy utrie_unserializeDummy_4_0
-/* C++ class names renaming defines */
-
-#ifdef XP_CPLUSPLUS
-#if !U_HAVE_NAMESPACE
-
-#define AbsoluteValueSubstitution AbsoluteValueSubstitution_4_0
-#define AlternateSubstitutionSubtable AlternateSubstitutionSubtable_4_0
-#define AnchorTable AnchorTable_4_0
-#define AndConstraint AndConstraint_4_0
-#define AnnualTimeZoneRule AnnualTimeZoneRule_4_0
-#define AnyTransliterator AnyTransliterator_4_0
-#define ArabicOpenTypeLayoutEngine ArabicOpenTypeLayoutEngine_4_0
-#define ArabicShaping ArabicShaping_4_0
-#define BMPSet BMPSet_4_0
-#define BasicCalendarFactory BasicCalendarFactory_4_0
-#define BasicTimeZone BasicTimeZone_4_0
-#define BinarySearchLookupTable BinarySearchLookupTable_4_0
-#define BreakIterator BreakIterator_4_0
-#define BreakTransliterator BreakTransliterator_4_0
-#define BuddhistCalendar BuddhistCalendar_4_0
-#define BuildCompactTrieHorizontalNode BuildCompactTrieHorizontalNode_4_0
-#define BuildCompactTrieNode BuildCompactTrieNode_4_0
-#define BuildCompactTrieVerticalNode BuildCompactTrieVerticalNode_4_0
-#define CEBuffer CEBuffer_4_0
-#define CECalendar CECalendar_4_0
-#define CFactory CFactory_4_0
-#define Calendar Calendar_4_0
-#define CalendarAstronomer CalendarAstronomer_4_0
-#define CalendarCache CalendarCache_4_0
-#define CalendarData CalendarData_4_0
-#define CalendarService CalendarService_4_0
-#define CanonMarkFilter CanonMarkFilter_4_0
-#define CanonShaping CanonShaping_4_0
-#define CanonicalIterator CanonicalIterator_4_0
-#define CaseMapTransliterator CaseMapTransliterator_4_0
-#define ChainingContextualSubstitutionFormat1Subtable ChainingContextualSubstitutionFormat1Subtable_4_0
-#define ChainingContextualSubstitutionFormat2Subtable ChainingContextualSubstitutionFormat2Subtable_4_0
-#define ChainingContextualSubstitutionFormat3Subtable ChainingContextualSubstitutionFormat3Subtable_4_0
-#define ChainingContextualSubstitutionSubtable ChainingContextualSubstitutionSubtable_4_0
-#define CharSubstitutionFilter CharSubstitutionFilter_4_0
-#define CharacterIterator CharacterIterator_4_0
-#define CharacterNode CharacterNode_4_0
-#define CharsetDetector CharsetDetector_4_0
-#define CharsetMatch CharsetMatch_4_0
-#define CharsetRecog_2022 CharsetRecog_2022_4_0
-#define CharsetRecog_2022CN CharsetRecog_2022CN_4_0
-#define CharsetRecog_2022JP CharsetRecog_2022JP_4_0
-#define CharsetRecog_2022KR CharsetRecog_2022KR_4_0
-#define CharsetRecog_8859_1 CharsetRecog_8859_1_4_0
-#define CharsetRecog_8859_1_da CharsetRecog_8859_1_da_4_0
-#define CharsetRecog_8859_1_de CharsetRecog_8859_1_de_4_0
-#define CharsetRecog_8859_1_en CharsetRecog_8859_1_en_4_0
-#define CharsetRecog_8859_1_es CharsetRecog_8859_1_es_4_0
-#define CharsetRecog_8859_1_fr CharsetRecog_8859_1_fr_4_0
-#define CharsetRecog_8859_1_it CharsetRecog_8859_1_it_4_0
-#define CharsetRecog_8859_1_nl CharsetRecog_8859_1_nl_4_0
-#define CharsetRecog_8859_1_no CharsetRecog_8859_1_no_4_0
-#define CharsetRecog_8859_1_pt CharsetRecog_8859_1_pt_4_0
-#define CharsetRecog_8859_1_sv CharsetRecog_8859_1_sv_4_0
-#define CharsetRecog_8859_2 CharsetRecog_8859_2_4_0
-#define CharsetRecog_8859_2_cs CharsetRecog_8859_2_cs_4_0
-#define CharsetRecog_8859_2_hu CharsetRecog_8859_2_hu_4_0
-#define CharsetRecog_8859_2_pl CharsetRecog_8859_2_pl_4_0
-#define CharsetRecog_8859_2_ro CharsetRecog_8859_2_ro_4_0
-#define CharsetRecog_8859_5 CharsetRecog_8859_5_4_0
-#define CharsetRecog_8859_5_ru CharsetRecog_8859_5_ru_4_0
-#define CharsetRecog_8859_6 CharsetRecog_8859_6_4_0
-#define CharsetRecog_8859_6_ar CharsetRecog_8859_6_ar_4_0
-#define CharsetRecog_8859_7 CharsetRecog_8859_7_4_0
-#define CharsetRecog_8859_7_el CharsetRecog_8859_7_el_4_0
-#define CharsetRecog_8859_8 CharsetRecog_8859_8_4_0
-#define CharsetRecog_8859_8_I_he CharsetRecog_8859_8_I_he_4_0
-#define CharsetRecog_8859_8_he CharsetRecog_8859_8_he_4_0
-#define CharsetRecog_8859_9 CharsetRecog_8859_9_4_0
-#define CharsetRecog_8859_9_tr CharsetRecog_8859_9_tr_4_0
-#define CharsetRecog_KOI8_R CharsetRecog_KOI8_R_4_0
-#define CharsetRecog_UTF8 CharsetRecog_UTF8_4_0
-#define CharsetRecog_UTF_16_BE CharsetRecog_UTF_16_BE_4_0
-#define CharsetRecog_UTF_16_LE CharsetRecog_UTF_16_LE_4_0
-#define CharsetRecog_UTF_32 CharsetRecog_UTF_32_4_0
-#define CharsetRecog_UTF_32_BE CharsetRecog_UTF_32_BE_4_0
-#define CharsetRecog_UTF_32_LE CharsetRecog_UTF_32_LE_4_0
-#define CharsetRecog_Unicode CharsetRecog_Unicode_4_0
-#define CharsetRecog_big5 CharsetRecog_big5_4_0
-#define CharsetRecog_euc CharsetRecog_euc_4_0
-#define CharsetRecog_euc_jp CharsetRecog_euc_jp_4_0
-#define CharsetRecog_euc_kr CharsetRecog_euc_kr_4_0
-#define CharsetRecog_gb_18030 CharsetRecog_gb_18030_4_0
-#define CharsetRecog_mbcs CharsetRecog_mbcs_4_0
-#define CharsetRecog_sbcs CharsetRecog_sbcs_4_0
-#define CharsetRecog_sjis CharsetRecog_sjis_4_0
-#define CharsetRecog_windows_1251 CharsetRecog_windows_1251_4_0
-#define CharsetRecog_windows_1256 CharsetRecog_windows_1256_4_0
-#define CharsetRecognizer CharsetRecognizer_4_0
-#define ChineseCalendar ChineseCalendar_4_0
-#define ChoiceFormat ChoiceFormat_4_0
-#define ClassDefFormat1Table ClassDefFormat1Table_4_0
-#define ClassDefFormat2Table ClassDefFormat2Table_4_0
-#define ClassDefinitionTable ClassDefinitionTable_4_0
-#define CollationElementIterator CollationElementIterator_4_0
-#define CollationKey CollationKey_4_0
-#define CollationLocaleListEnumeration CollationLocaleListEnumeration_4_0
-#define Collator Collator_4_0
-#define CollatorFactory CollatorFactory_4_0
-#define CompactTrieDictionary CompactTrieDictionary_4_0
-#define CompactTrieEnumeration CompactTrieEnumeration_4_0
-#define CompoundTransliterator CompoundTransliterator_4_0
-#define ContextualGlyphSubstitutionProcessor ContextualGlyphSubstitutionProcessor_4_0
-#define ContextualSubstitutionBase ContextualSubstitutionBase_4_0
-#define ContextualSubstitutionFormat1Subtable ContextualSubstitutionFormat1Subtable_4_0
-#define ContextualSubstitutionFormat2Subtable ContextualSubstitutionFormat2Subtable_4_0
-#define ContextualSubstitutionFormat3Subtable ContextualSubstitutionFormat3Subtable_4_0
-#define ContextualSubstitutionSubtable ContextualSubstitutionSubtable_4_0
-#define CopticCalendar CopticCalendar_4_0
-#define CoverageFormat1Table CoverageFormat1Table_4_0
-#define CoverageFormat2Table CoverageFormat2Table_4_0
-#define CoverageTable CoverageTable_4_0
-#define CurrencyAmount CurrencyAmount_4_0
-#define CurrencyFormat CurrencyFormat_4_0
-#define CurrencyUnit CurrencyUnit_4_0
-#define CursiveAttachmentSubtable CursiveAttachmentSubtable_4_0
-#define DTRedundantEnumeration DTRedundantEnumeration_4_0
-#define DTSkeletonEnumeration DTSkeletonEnumeration_4_0
-#define DateFormat DateFormat_4_0
-#define DateFormatSymbols DateFormatSymbols_4_0
-#define DateInterval DateInterval_4_0
-#define DateIntervalFormat DateIntervalFormat_4_0
-#define DateIntervalInfo DateIntervalInfo_4_0
-#define DateTimeMatcher DateTimeMatcher_4_0
-#define DateTimePatternGenerator DateTimePatternGenerator_4_0
-#define DateTimeRule DateTimeRule_4_0
-#define DecimalFormat DecimalFormat_4_0
-#define DecimalFormatSymbols DecimalFormatSymbols_4_0
-#define DefaultCalendarFactory DefaultCalendarFactory_4_0
-#define DefaultCharMapper DefaultCharMapper_4_0
-#define DeviceTable DeviceTable_4_0
-#define DictionaryBreakEngine DictionaryBreakEngine_4_0
-#define DigitList DigitList_4_0
-#define DistanceInfo DistanceInfo_4_0
-#define Entry Entry_4_0
-#define EnumToOffset EnumToOffset_4_0
-#define EscapeTransliterator EscapeTransliterator_4_0
-#define EthiopicCalendar EthiopicCalendar_4_0
-#define EventListener EventListener_4_0
-#define ExtensionSubtable ExtensionSubtable_4_0
-#define FeatureListTable FeatureListTable_4_0
-#define FieldPosition FieldPosition_4_0
-#define FontRuns FontRuns_4_0
-#define Format Format_4_0
-#define Format1AnchorTable Format1AnchorTable_4_0
-#define Format2AnchorTable Format2AnchorTable_4_0
-#define Format3AnchorTable Format3AnchorTable_4_0
-#define FormatNameEnumeration FormatNameEnumeration_4_0
-#define FormatParser FormatParser_4_0
-#define Formattable Formattable_4_0
-#define ForwardCharacterIterator ForwardCharacterIterator_4_0
-#define FractionalPartSubstitution FractionalPartSubstitution_4_0
-#define FunctionReplacer FunctionReplacer_4_0
-#define GDEFMarkFilter GDEFMarkFilter_4_0
-#define GXLayoutEngine GXLayoutEngine_4_0
-#define GlyphDefinitionTableHeader GlyphDefinitionTableHeader_4_0
-#define GlyphIterator GlyphIterator_4_0
-#define GlyphLookupTableHeader GlyphLookupTableHeader_4_0
-#define GlyphPositionAdjustments GlyphPositionAdjustments_4_0
-#define GlyphPositioningLookupProcessor GlyphPositioningLookupProcessor_4_0
-#define GlyphPositioningTableHeader GlyphPositioningTableHeader_4_0
-#define GlyphSubstitutionLookupProcessor GlyphSubstitutionLookupProcessor_4_0
-#define GlyphSubstitutionTableHeader GlyphSubstitutionTableHeader_4_0
-#define Grego Grego_4_0
-#define GregorianCalendar GregorianCalendar_4_0
-#define HanOpenTypeLayoutEngine HanOpenTypeLayoutEngine_4_0
-#define HangulOpenTypeLayoutEngine HangulOpenTypeLayoutEngine_4_0
-#define HebrewCalendar HebrewCalendar_4_0
-#define ICUBreakIteratorFactory ICUBreakIteratorFactory_4_0
-#define ICUBreakIteratorService ICUBreakIteratorService_4_0
-#define ICUCollatorFactory ICUCollatorFactory_4_0
-#define ICUCollatorService ICUCollatorService_4_0
-#define ICULanguageBreakFactory ICULanguageBreakFactory_4_0
-#define ICULocaleService ICULocaleService_4_0
-#define ICUNotifier ICUNotifier_4_0
-#define ICUNumberFormatFactory ICUNumberFormatFactory_4_0
-#define ICUNumberFormatService ICUNumberFormatService_4_0
-#define ICUResourceBundleFactory ICUResourceBundleFactory_4_0
-#define ICUService ICUService_4_0
-#define ICUServiceFactory ICUServiceFactory_4_0
-#define ICUServiceKey ICUServiceKey_4_0
-#define ICU_Utility ICU_Utility_4_0
-#define IndianCalendar IndianCalendar_4_0
-#define IndicClassTable IndicClassTable_4_0
-#define IndicOpenTypeLayoutEngine IndicOpenTypeLayoutEngine_4_0
-#define IndicRearrangementProcessor IndicRearrangementProcessor_4_0
-#define IndicReordering IndicReordering_4_0
-#define InitialTimeZoneRule InitialTimeZoneRule_4_0
-#define InputText InputText_4_0
-#define IntegralPartSubstitution IntegralPartSubstitution_4_0
-#define IslamicCalendar IslamicCalendar_4_0
-#define IteratedChar IteratedChar_4_0
-#define JapaneseCalendar JapaneseCalendar_4_0
-#define KernTable KernTable_4_0
-#define KeywordEnumeration KeywordEnumeration_4_0
-#define KhmerClassTable KhmerClassTable_4_0
-#define KhmerOpenTypeLayoutEngine KhmerOpenTypeLayoutEngine_4_0
-#define KhmerReordering KhmerReordering_4_0
-#define LECharMapper LECharMapper_4_0
-#define LEFontInstance LEFontInstance_4_0
-#define LEGlyphFilter LEGlyphFilter_4_0
-#define LEGlyphStorage LEGlyphStorage_4_0
-#define LEInsertionCallback LEInsertionCallback_4_0
-#define LEInsertionList LEInsertionList_4_0
-#define LXUtilities LXUtilities_4_0
-#define LanguageBreakEngine LanguageBreakEngine_4_0
-#define LanguageBreakFactory LanguageBreakFactory_4_0
-#define LayoutEngine LayoutEngine_4_0
-#define LigatureSubstitutionProcessor LigatureSubstitutionProcessor_4_0
-#define LigatureSubstitutionSubtable LigatureSubstitutionSubtable_4_0
-#define LocDataParser LocDataParser_4_0
-#define Locale Locale_4_0
-#define LocaleBased LocaleBased_4_0
-#define LocaleKey LocaleKey_4_0
-#define LocaleKeyFactory LocaleKeyFactory_4_0
-#define LocaleRuns LocaleRuns_4_0
-#define LocaleUtility LocaleUtility_4_0
-#define LocalizationInfo LocalizationInfo_4_0
-#define LookupListTable LookupListTable_4_0
-#define LookupProcessor LookupProcessor_4_0
-#define LookupSubtable LookupSubtable_4_0
-#define LookupTable LookupTable_4_0
-#define LowercaseTransliterator LowercaseTransliterator_4_0
-#define MPreFixups MPreFixups_4_0
-#define MarkArray MarkArray_4_0
-#define MarkToBasePositioningSubtable MarkToBasePositioningSubtable_4_0
-#define MarkToLigaturePositioningSubtable MarkToLigaturePositioningSubtable_4_0
-#define MarkToMarkPositioningSubtable MarkToMarkPositioningSubtable_4_0
-#define Math Math_4_0
-#define Measure Measure_4_0
-#define MeasureFormat MeasureFormat_4_0
-#define MeasureUnit MeasureUnit_4_0
-#define MessageFormat MessageFormat_4_0
-#define MessageFormatAdapter MessageFormatAdapter_4_0
-#define ModulusSubstitution ModulusSubstitution_4_0
-#define MoonRiseSetCoordFunc MoonRiseSetCoordFunc_4_0
-#define MoonTimeAngleFunc MoonTimeAngleFunc_4_0
-#define MorphSubtableHeader MorphSubtableHeader_4_0
-#define MorphTableHeader MorphTableHeader_4_0
-#define MultipleSubstitutionSubtable MultipleSubstitutionSubtable_4_0
-#define MultiplierSubstitution MultiplierSubstitution_4_0
-#define MutableTrieDictionary MutableTrieDictionary_4_0
-#define MutableTrieEnumeration MutableTrieEnumeration_4_0
-#define NFFactory NFFactory_4_0
-#define NFRule NFRule_4_0
-#define NFRuleSet NFRuleSet_4_0
-#define NFSubstitution NFSubstitution_4_0
-#define NGramParser NGramParser_4_0
-#define NameToEnum NameToEnum_4_0
-#define NameUnicodeTransliterator NameUnicodeTransliterator_4_0
-#define NonContextualGlyphSubstitutionProcessor NonContextualGlyphSubstitutionProcessor_4_0
-#define NonContiguousEnumToOffset NonContiguousEnumToOffset_4_0
-#define NormalizationTransliterator NormalizationTransliterator_4_0
-#define Normalizer Normalizer_4_0
-#define NullSubstitution NullSubstitution_4_0
-#define NullTransliterator NullTransliterator_4_0
-#define NumberFormat NumberFormat_4_0
-#define NumberFormatFactory NumberFormatFactory_4_0
-#define NumeratorSubstitution NumeratorSubstitution_4_0
-#define OlsonTimeZone OlsonTimeZone_4_0
-#define OpenTypeLayoutEngine OpenTypeLayoutEngine_4_0
-#define OpenTypeUtilities OpenTypeUtilities_4_0
-#define OrConstraint OrConstraint_4_0
-#define PCEBuffer PCEBuffer_4_0
-#define PairPositioningFormat1Subtable PairPositioningFormat1Subtable_4_0
-#define PairPositioningFormat2Subtable PairPositioningFormat2Subtable_4_0
-#define PairPositioningSubtable PairPositioningSubtable_4_0
-#define ParagraphLayout ParagraphLayout_4_0
-#define ParseData ParseData_4_0
-#define ParsePosition ParsePosition_4_0
-#define PatternMap PatternMap_4_0
-#define PatternMapIterator PatternMapIterator_4_0
-#define PersianCalendar PersianCalendar_4_0
-#define PluralFormat PluralFormat_4_0
-#define PluralKeywordEnumeration PluralKeywordEnumeration_4_0
-#define PluralRules PluralRules_4_0
-#define PropertyAliases PropertyAliases_4_0
-#define PtnElem PtnElem_4_0
-#define PtnSkeleton PtnSkeleton_4_0
-#define Quantifier Quantifier_4_0
-#define RBBIDataWrapper RBBIDataWrapper_4_0
-#define RBBINode RBBINode_4_0
-#define RBBIRuleBuilder RBBIRuleBuilder_4_0
-#define RBBIRuleScanner RBBIRuleScanner_4_0
-#define RBBISetBuilder RBBISetBuilder_4_0
-#define RBBIStateDescriptor RBBIStateDescriptor_4_0
-#define RBBISymbolTable RBBISymbolTable_4_0
-#define RBBISymbolTableEntry RBBISymbolTableEntry_4_0
-#define RBBITableBuilder RBBITableBuilder_4_0
-#define RCEBuffer RCEBuffer_4_0
-#define RangeDescriptor RangeDescriptor_4_0
-#define RegexCImpl RegexCImpl_4_0
-#define RegexCompile RegexCompile_4_0
-#define RegexMatcher RegexMatcher_4_0
-#define RegexPattern RegexPattern_4_0
-#define RegexStaticSets RegexStaticSets_4_0
-#define RelativeDateFormat RelativeDateFormat_4_0
-#define RemoveTransliterator RemoveTransliterator_4_0
-#define Replaceable Replaceable_4_0
-#define ReplaceableGlue ReplaceableGlue_4_0
-#define ResourceBundle ResourceBundle_4_0
-#define RiseSetCoordFunc RiseSetCoordFunc_4_0
-#define RuleBasedBreakIterator RuleBasedBreakIterator_4_0
-#define RuleBasedCollator RuleBasedCollator_4_0
-#define RuleBasedNumberFormat RuleBasedNumberFormat_4_0
-#define RuleBasedTimeZone RuleBasedTimeZone_4_0
-#define RuleBasedTransliterator RuleBasedTransliterator_4_0
-#define RuleChain RuleChain_4_0
-#define RuleCharacterIterator RuleCharacterIterator_4_0
-#define RuleHalf RuleHalf_4_0
-#define RuleParser RuleParser_4_0
-#define RunArray RunArray_4_0
-#define SafeZoneStringFormatPtr SafeZoneStringFormatPtr_4_0
-#define SameValueSubstitution SameValueSubstitution_4_0
-#define ScriptListTable ScriptListTable_4_0
-#define ScriptRunIterator ScriptRunIterator_4_0
-#define ScriptTable ScriptTable_4_0
-#define SearchIterator SearchIterator_4_0
-#define SegmentArrayProcessor SegmentArrayProcessor_4_0
-#define SegmentSingleProcessor SegmentSingleProcessor_4_0
-#define ServiceEnumeration ServiceEnumeration_4_0
-#define ServiceListener ServiceListener_4_0
-#define SimpleArrayProcessor SimpleArrayProcessor_4_0
-#define SimpleDateFormat SimpleDateFormat_4_0
-#define SimpleFactory SimpleFactory_4_0
-#define SimpleLocaleKeyFactory SimpleLocaleKeyFactory_4_0
-#define SimpleNumberFormatFactory SimpleNumberFormatFactory_4_0
-#define SimpleTimeZone SimpleTimeZone_4_0
-#define SinglePositioningFormat1Subtable SinglePositioningFormat1Subtable_4_0
-#define SinglePositioningFormat2Subtable SinglePositioningFormat2Subtable_4_0
-#define SinglePositioningSubtable SinglePositioningSubtable_4_0
-#define SingleSubstitutionFormat1Subtable SingleSubstitutionFormat1Subtable_4_0
-#define SingleSubstitutionFormat2Subtable SingleSubstitutionFormat2Subtable_4_0
-#define SingleSubstitutionSubtable SingleSubstitutionSubtable_4_0
-#define SingleTableProcessor SingleTableProcessor_4_0
-#define Spec Spec_4_0
-#define StateTableProcessor StateTableProcessor_4_0
-#define StringCharacterIterator StringCharacterIterator_4_0
-#define StringEnumeration StringEnumeration_4_0
-#define StringLocalizationInfo StringLocalizationInfo_4_0
-#define StringMatcher StringMatcher_4_0
-#define StringPair StringPair_4_0
-#define StringReplacer StringReplacer_4_0
-#define StringSearch StringSearch_4_0
-#define StyleRuns StyleRuns_4_0
-#define SubstitutionLookup SubstitutionLookup_4_0
-#define SubtableProcessor SubtableProcessor_4_0
-#define SunTimeAngleFunc SunTimeAngleFunc_4_0
-#define SymbolTable SymbolTable_4_0
-#define TZEnumeration TZEnumeration_4_0
-#define TaiwanCalendar TaiwanCalendar_4_0
-#define TernaryNode TernaryNode_4_0
-#define TextTrieMap TextTrieMap_4_0
-#define TextTrieMapSearchResultHandler TextTrieMapSearchResultHandler_4_0
-#define ThaiBreakEngine ThaiBreakEngine_4_0
-#define ThaiLayoutEngine ThaiLayoutEngine_4_0
-#define ThaiShaping ThaiShaping_4_0
-#define TibetanClassTable TibetanClassTable_4_0
-#define TibetanOpenTypeLayoutEngine TibetanOpenTypeLayoutEngine_4_0
-#define TibetanReordering TibetanReordering_4_0
-#define TimeArrayTimeZoneRule TimeArrayTimeZoneRule_4_0
-#define TimeZone TimeZone_4_0
-#define TimeZoneRule TimeZoneRule_4_0
-#define TimeZoneTransition TimeZoneTransition_4_0
-#define TitlecaseTransliterator TitlecaseTransliterator_4_0
-#define TransliterationRule TransliterationRule_4_0
-#define TransliterationRuleData TransliterationRuleData_4_0
-#define TransliterationRuleSet TransliterationRuleSet_4_0
-#define Transliterator Transliterator_4_0
-#define TransliteratorAlias TransliteratorAlias_4_0
-#define TransliteratorIDParser TransliteratorIDParser_4_0
-#define TransliteratorParser TransliteratorParser_4_0
-#define TransliteratorRegistry TransliteratorRegistry_4_0
-#define TrieWordDictionary TrieWordDictionary_4_0
-#define TrimmedArrayProcessor TrimmedArrayProcessor_4_0
-#define UCharCharacterIterator UCharCharacterIterator_4_0
-#define UCollationPCE UCollationPCE_4_0
-#define ULocRuns ULocRuns_4_0
-#define UMemory UMemory_4_0
-#define UObject UObject_4_0
-#define URegularExpression URegularExpression_4_0
-#define UStack UStack_4_0
-#define UStringEnumeration UStringEnumeration_4_0
-#define UVector UVector_4_0
-#define UVector32 UVector32_4_0
-#define UnescapeTransliterator UnescapeTransliterator_4_0
-#define UnhandledEngine UnhandledEngine_4_0
-#define UnicodeArabicOpenTypeLayoutEngine UnicodeArabicOpenTypeLayoutEngine_4_0
-#define UnicodeFilter UnicodeFilter_4_0
-#define UnicodeFunctor UnicodeFunctor_4_0
-#define UnicodeMatcher UnicodeMatcher_4_0
-#define UnicodeNameTransliterator UnicodeNameTransliterator_4_0
-#define UnicodeReplacer UnicodeReplacer_4_0
-#define UnicodeSet UnicodeSet_4_0
-#define UnicodeSetIterator UnicodeSetIterator_4_0
-#define UnicodeSetStringSpan UnicodeSetStringSpan_4_0
-#define UnicodeString UnicodeString_4_0
-#define UppercaseTransliterator UppercaseTransliterator_4_0
-#define VTZReader VTZReader_4_0
-#define VTZWriter VTZWriter_4_0
-#define VTimeZone VTimeZone_4_0
-#define ValueRecord ValueRecord_4_0
-#define ValueRuns ValueRuns_4_0
-#define ZSFCache ZSFCache_4_0
-#define ZSFCacheEntry ZSFCacheEntry_4_0
-#define ZoneMeta ZoneMeta_4_0
-#define ZoneStringFormat ZoneStringFormat_4_0
-#define ZoneStringInfo ZoneStringInfo_4_0
-#define ZoneStringSearchResultHandler ZoneStringSearchResultHandler_4_0
-#define ZoneStrings ZoneStrings_4_0
-#define locale_set_default_internal locale_set_default_internal_4_0
-#define util64_fromDouble util64_fromDouble_4_0
-#define util64_pow util64_pow_4_0
-#define util64_tou util64_tou_4_0
-
-#endif
-#endif
-
-#endif
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/urename.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/urename.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/urename.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/urename.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1775 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2002-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*
+*   file name:  urename.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   Created by: Perl script written by Vladimir Weinstein
+*
+*  Contains data for renaming ICU exports.
+*  Gets included by umachine.h
+*
+*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef URENAME_H
+#define URENAME_H
+
+/* Uncomment the following line to disable renaming on platforms
+   that do not use Autoconf. */
+/* #define U_DISABLE_RENAMING 1 */
+
+#if !U_DISABLE_RENAMING
+
+/* C exports renaming data */
+
+#define T_CString_int64ToString T_CString_int64ToString_4_0
+#define T_CString_integerToString T_CString_integerToString_4_0
+#define T_CString_stricmp T_CString_stricmp_4_0
+#define T_CString_stringToInteger T_CString_stringToInteger_4_0
+#define T_CString_strnicmp T_CString_strnicmp_4_0
+#define T_CString_toLowerCase T_CString_toLowerCase_4_0
+#define T_CString_toUpperCase T_CString_toUpperCase_4_0
+#define UCNV_FROM_U_CALLBACK_ESCAPE UCNV_FROM_U_CALLBACK_ESCAPE_4_0
+#define UCNV_FROM_U_CALLBACK_SKIP UCNV_FROM_U_CALLBACK_SKIP_4_0
+#define UCNV_FROM_U_CALLBACK_STOP UCNV_FROM_U_CALLBACK_STOP_4_0
+#define UCNV_FROM_U_CALLBACK_SUBSTITUTE UCNV_FROM_U_CALLBACK_SUBSTITUTE_4_0
+#define UCNV_TO_U_CALLBACK_ESCAPE UCNV_TO_U_CALLBACK_ESCAPE_4_0
+#define UCNV_TO_U_CALLBACK_SKIP UCNV_TO_U_CALLBACK_SKIP_4_0
+#define UCNV_TO_U_CALLBACK_STOP UCNV_TO_U_CALLBACK_STOP_4_0
+#define UCNV_TO_U_CALLBACK_SUBSTITUTE UCNV_TO_U_CALLBACK_SUBSTITUTE_4_0
+#define UDataMemory_createNewInstance UDataMemory_createNewInstance_4_0
+#define UDataMemory_init UDataMemory_init_4_0
+#define UDataMemory_isLoaded UDataMemory_isLoaded_4_0
+#define UDataMemory_normalizeDataPointer UDataMemory_normalizeDataPointer_4_0
+#define UDataMemory_setData UDataMemory_setData_4_0
+#define UDatamemory_assign UDatamemory_assign_4_0
+#define _ASCIIData _ASCIIData_4_0
+#define _Bocu1Data _Bocu1Data_4_0
+#define _CESU8Data _CESU8Data_4_0
+#define _HZData _HZData_4_0
+#define _IMAPData _IMAPData_4_0
+#define _ISCIIData _ISCIIData_4_0
+#define _ISO2022Data _ISO2022Data_4_0
+#define _LMBCSData1 _LMBCSData1_4_0
+#define _LMBCSData11 _LMBCSData11_4_0
+#define _LMBCSData16 _LMBCSData16_4_0
+#define _LMBCSData17 _LMBCSData17_4_0
+#define _LMBCSData18 _LMBCSData18_4_0
+#define _LMBCSData19 _LMBCSData19_4_0
+#define _LMBCSData2 _LMBCSData2_4_0
+#define _LMBCSData3 _LMBCSData3_4_0
+#define _LMBCSData4 _LMBCSData4_4_0
+#define _LMBCSData5 _LMBCSData5_4_0
+#define _LMBCSData6 _LMBCSData6_4_0
+#define _LMBCSData8 _LMBCSData8_4_0
+#define _Latin1Data _Latin1Data_4_0
+#define _MBCSData _MBCSData_4_0
+#define _SCSUData _SCSUData_4_0
+#define _UTF16BEData _UTF16BEData_4_0
+#define _UTF16Data _UTF16Data_4_0
+#define _UTF16LEData _UTF16LEData_4_0
+#define _UTF32BEData _UTF32BEData_4_0
+#define _UTF32Data _UTF32Data_4_0
+#define _UTF32LEData _UTF32LEData_4_0
+#define _UTF7Data _UTF7Data_4_0
+#define _UTF8Data _UTF8Data_4_0
+#define cmemory_cleanup cmemory_cleanup_4_0
+#define cmemory_inUse cmemory_inUse_4_0
+#define le_close le_close_4_0
+#define le_create le_create_4_0
+#define le_getCharIndices le_getCharIndices_4_0
+#define le_getCharIndicesWithBase le_getCharIndicesWithBase_4_0
+#define le_getGlyphCount le_getGlyphCount_4_0
+#define le_getGlyphPosition le_getGlyphPosition_4_0
+#define le_getGlyphPositions le_getGlyphPositions_4_0
+#define le_getGlyphs le_getGlyphs_4_0
+#define le_layoutChars le_layoutChars_4_0
+#define le_reset le_reset_4_0
+#define locale_getKeywords locale_getKeywords_4_0
+#define locale_get_default locale_get_default_4_0
+#define locale_set_default locale_set_default_4_0
+#define pl_addFontRun pl_addFontRun_4_0
+#define pl_addLocaleRun pl_addLocaleRun_4_0
+#define pl_addValueRun pl_addValueRun_4_0
+#define pl_close pl_close_4_0
+#define pl_closeFontRuns pl_closeFontRuns_4_0
+#define pl_closeLine pl_closeLine_4_0
+#define pl_closeLocaleRuns pl_closeLocaleRuns_4_0
+#define pl_closeValueRuns pl_closeValueRuns_4_0
+#define pl_countLineRuns pl_countLineRuns_4_0
+#define pl_create pl_create_4_0
+#define pl_getAscent pl_getAscent_4_0
+#define pl_getDescent pl_getDescent_4_0
+#define pl_getFontRunCount pl_getFontRunCount_4_0
+#define pl_getFontRunFont pl_getFontRunFont_4_0
+#define pl_getFontRunLastLimit pl_getFontRunLastLimit_4_0
+#define pl_getFontRunLimit pl_getFontRunLimit_4_0
+#define pl_getLeading pl_getLeading_4_0
+#define pl_getLineAscent pl_getLineAscent_4_0
+#define pl_getLineDescent pl_getLineDescent_4_0
+#define pl_getLineLeading pl_getLineLeading_4_0
+#define pl_getLineVisualRun pl_getLineVisualRun_4_0
+#define pl_getLineWidth pl_getLineWidth_4_0
+#define pl_getLocaleRunCount pl_getLocaleRunCount_4_0
+#define pl_getLocaleRunLastLimit pl_getLocaleRunLastLimit_4_0
+#define pl_getLocaleRunLimit pl_getLocaleRunLimit_4_0
+#define pl_getLocaleRunLocale pl_getLocaleRunLocale_4_0
+#define pl_getParagraphLevel pl_getParagraphLevel_4_0
+#define pl_getTextDirection pl_getTextDirection_4_0
+#define pl_getValueRunCount pl_getValueRunCount_4_0
+#define pl_getValueRunLastLimit pl_getValueRunLastLimit_4_0
+#define pl_getValueRunLimit pl_getValueRunLimit_4_0
+#define pl_getValueRunValue pl_getValueRunValue_4_0
+#define pl_getVisualRunAscent pl_getVisualRunAscent_4_0
+#define pl_getVisualRunDescent pl_getVisualRunDescent_4_0
+#define pl_getVisualRunDirection pl_getVisualRunDirection_4_0
+#define pl_getVisualRunFont pl_getVisualRunFont_4_0
+#define pl_getVisualRunGlyphCount pl_getVisualRunGlyphCount_4_0
+#define pl_getVisualRunGlyphToCharMap pl_getVisualRunGlyphToCharMap_4_0
+#define pl_getVisualRunGlyphs pl_getVisualRunGlyphs_4_0
+#define pl_getVisualRunLeading pl_getVisualRunLeading_4_0
+#define pl_getVisualRunPositions pl_getVisualRunPositions_4_0
+#define pl_isComplex pl_isComplex_4_0
+#define pl_nextLine pl_nextLine_4_0
+#define pl_openEmptyFontRuns pl_openEmptyFontRuns_4_0
+#define pl_openEmptyLocaleRuns pl_openEmptyLocaleRuns_4_0
+#define pl_openEmptyValueRuns pl_openEmptyValueRuns_4_0
+#define pl_openFontRuns pl_openFontRuns_4_0
+#define pl_openLocaleRuns pl_openLocaleRuns_4_0
+#define pl_openValueRuns pl_openValueRuns_4_0
+#define pl_reflow pl_reflow_4_0
+#define pl_resetFontRuns pl_resetFontRuns_4_0
+#define pl_resetLocaleRuns pl_resetLocaleRuns_4_0
+#define pl_resetValueRuns pl_resetValueRuns_4_0
+#define res_countArrayItems res_countArrayItems_4_0
+#define res_findResource res_findResource_4_0
+#define res_getAlias res_getAlias_4_0
+#define res_getArrayItem res_getArrayItem_4_0
+#define res_getBinary res_getBinary_4_0
+#define res_getIntVector res_getIntVector_4_0
+#define res_getResource res_getResource_4_0
+#define res_getString res_getString_4_0
+#define res_getTableItemByIndex res_getTableItemByIndex_4_0
+#define res_getTableItemByKey res_getTableItemByKey_4_0
+#define res_load res_load_4_0
+#define res_unload res_unload_4_0
+#define transliterator_cleanup transliterator_cleanup_4_0
+#define triedict_swap triedict_swap_4_0
+#define u_UCharsToChars u_UCharsToChars_4_0
+#define u_austrcpy u_austrcpy_4_0
+#define u_austrncpy u_austrncpy_4_0
+#define u_catclose u_catclose_4_0
+#define u_catgets u_catgets_4_0
+#define u_catopen u_catopen_4_0
+#define u_charAge u_charAge_4_0
+#define u_charDigitValue u_charDigitValue_4_0
+#define u_charDirection u_charDirection_4_0
+#define u_charFromName u_charFromName_4_0
+#define u_charMirror u_charMirror_4_0
+#define u_charName u_charName_4_0
+#define u_charType u_charType_4_0
+#define u_charsToUChars u_charsToUChars_4_0
+#define u_cleanup u_cleanup_4_0
+#define u_countChar32 u_countChar32_4_0
+#define u_digit u_digit_4_0
+#define u_enumCharNames u_enumCharNames_4_0
+#define u_enumCharTypes u_enumCharTypes_4_0
+#define u_errorName u_errorName_4_0
+#define u_fclose u_fclose_4_0
+#define u_feof u_feof_4_0
+#define u_fflush u_fflush_4_0
+#define u_fgetConverter u_fgetConverter_4_0
+#define u_fgetc u_fgetc_4_0
+#define u_fgetcodepage u_fgetcodepage_4_0
+#define u_fgetcx u_fgetcx_4_0
+#define u_fgetfile u_fgetfile_4_0
+#define u_fgetlocale u_fgetlocale_4_0
+#define u_fgets u_fgets_4_0
+#define u_file_read u_file_read_4_0
+#define u_file_write u_file_write_4_0
+#define u_file_write_flush u_file_write_flush_4_0
+#define u_finit u_finit_4_0
+#define u_foldCase u_foldCase_4_0
+#define u_fopen u_fopen_4_0
+#define u_forDigit u_forDigit_4_0
+#define u_formatMessage u_formatMessage_4_0
+#define u_formatMessageWithError u_formatMessageWithError_4_0
+#define u_fprintf u_fprintf_4_0
+#define u_fprintf_u u_fprintf_u_4_0
+#define u_fputc u_fputc_4_0
+#define u_fputs u_fputs_4_0
+#define u_frewind u_frewind_4_0
+#define u_fscanf u_fscanf_4_0
+#define u_fscanf_u u_fscanf_u_4_0
+#define u_fsetcodepage u_fsetcodepage_4_0
+#define u_fsetlocale u_fsetlocale_4_0
+#define u_fsettransliterator u_fsettransliterator_4_0
+#define u_fstropen u_fstropen_4_0
+#define u_fungetc u_fungetc_4_0
+#define u_getCombiningClass u_getCombiningClass_4_0
+#define u_getDataDirectory u_getDataDirectory_4_0
+#define u_getDefaultConverter u_getDefaultConverter_4_0
+#define u_getFC_NFKC_Closure u_getFC_NFKC_Closure_4_0
+#define u_getISOComment u_getISOComment_4_0
+#define u_getIntPropertyMaxValue u_getIntPropertyMaxValue_4_0
+#define u_getIntPropertyMinValue u_getIntPropertyMinValue_4_0
+#define u_getIntPropertyValue u_getIntPropertyValue_4_0
+#define u_getNumericValue u_getNumericValue_4_0
+#define u_getPropertyEnum u_getPropertyEnum_4_0
+#define u_getPropertyName u_getPropertyName_4_0
+#define u_getPropertyValueEnum u_getPropertyValueEnum_4_0
+#define u_getPropertyValueName u_getPropertyValueName_4_0
+#define u_getUnicodeProperties u_getUnicodeProperties_4_0
+#define u_getUnicodeVersion u_getUnicodeVersion_4_0
+#define u_getVersion u_getVersion_4_0
+#define u_growBufferFromStatic u_growBufferFromStatic_4_0
+#define u_hasBinaryProperty u_hasBinaryProperty_4_0
+#define u_init u_init_4_0
+#define u_isIDIgnorable u_isIDIgnorable_4_0
+#define u_isIDPart u_isIDPart_4_0
+#define u_isIDStart u_isIDStart_4_0
+#define u_isISOControl u_isISOControl_4_0
+#define u_isJavaIDPart u_isJavaIDPart_4_0
+#define u_isJavaIDStart u_isJavaIDStart_4_0
+#define u_isJavaSpaceChar u_isJavaSpaceChar_4_0
+#define u_isMirrored u_isMirrored_4_0
+#define u_isUAlphabetic u_isUAlphabetic_4_0
+#define u_isULowercase u_isULowercase_4_0
+#define u_isUUppercase u_isUUppercase_4_0
+#define u_isUWhiteSpace u_isUWhiteSpace_4_0
+#define u_isWhitespace u_isWhitespace_4_0
+#define u_isalnum u_isalnum_4_0
+#define u_isalnumPOSIX u_isalnumPOSIX_4_0
+#define u_isalpha u_isalpha_4_0
+#define u_isbase u_isbase_4_0
+#define u_isblank u_isblank_4_0
+#define u_iscntrl u_iscntrl_4_0
+#define u_isdefined u_isdefined_4_0
+#define u_isdigit u_isdigit_4_0
+#define u_isgraph u_isgraph_4_0
+#define u_isgraphPOSIX u_isgraphPOSIX_4_0
+#define u_islower u_islower_4_0
+#define u_isprint u_isprint_4_0
+#define u_isprintPOSIX u_isprintPOSIX_4_0
+#define u_ispunct u_ispunct_4_0
+#define u_isspace u_isspace_4_0
+#define u_istitle u_istitle_4_0
+#define u_isupper u_isupper_4_0
+#define u_isxdigit u_isxdigit_4_0
+#define u_lengthOfIdenticalLevelRun u_lengthOfIdenticalLevelRun_4_0
+#define u_locbund_close u_locbund_close_4_0
+#define u_locbund_getNumberFormat u_locbund_getNumberFormat_4_0
+#define u_locbund_init u_locbund_init_4_0
+#define u_memcasecmp u_memcasecmp_4_0
+#define u_memchr u_memchr_4_0
+#define u_memchr32 u_memchr32_4_0
+#define u_memcmp u_memcmp_4_0
+#define u_memcmpCodePointOrder u_memcmpCodePointOrder_4_0
+#define u_memcpy u_memcpy_4_0
+#define u_memmove u_memmove_4_0
+#define u_memrchr u_memrchr_4_0
+#define u_memrchr32 u_memrchr32_4_0
+#define u_memset u_memset_4_0
+#define u_parseMessage u_parseMessage_4_0
+#define u_parseMessageWithError u_parseMessageWithError_4_0
+#define u_printf_parse u_printf_parse_4_0
+#define u_releaseDefaultConverter u_releaseDefaultConverter_4_0
+#define u_scanf_parse u_scanf_parse_4_0
+#define u_setAtomicIncDecFunctions u_setAtomicIncDecFunctions_4_0
+#define u_setDataDirectory u_setDataDirectory_4_0
+#define u_setMemoryFunctions u_setMemoryFunctions_4_0
+#define u_setMutexFunctions u_setMutexFunctions_4_0
+#define u_shapeArabic u_shapeArabic_4_0
+#define u_snprintf u_snprintf_4_0
+#define u_snprintf_u u_snprintf_u_4_0
+#define u_sprintf u_sprintf_4_0
+#define u_sprintf_u u_sprintf_u_4_0
+#define u_sscanf u_sscanf_4_0
+#define u_sscanf_u u_sscanf_u_4_0
+#define u_strCaseCompare u_strCaseCompare_4_0
+#define u_strCompare u_strCompare_4_0
+#define u_strCompareIter u_strCompareIter_4_0
+#define u_strFindFirst u_strFindFirst_4_0
+#define u_strFindLast u_strFindLast_4_0
+#define u_strFoldCase u_strFoldCase_4_0
+#define u_strFromPunycode u_strFromPunycode_4_0
+#define u_strFromUTF32 u_strFromUTF32_4_0
+#define u_strFromUTF8 u_strFromUTF8_4_0
+#define u_strFromUTF8Lenient u_strFromUTF8Lenient_4_0
+#define u_strFromUTF8WithSub u_strFromUTF8WithSub_4_0
+#define u_strFromWCS u_strFromWCS_4_0
+#define u_strHasMoreChar32Than u_strHasMoreChar32Than_4_0
+#define u_strToLower u_strToLower_4_0
+#define u_strToPunycode u_strToPunycode_4_0
+#define u_strToTitle u_strToTitle_4_0
+#define u_strToUTF32 u_strToUTF32_4_0
+#define u_strToUTF8 u_strToUTF8_4_0
+#define u_strToUTF8WithSub u_strToUTF8WithSub_4_0
+#define u_strToUpper u_strToUpper_4_0
+#define u_strToWCS u_strToWCS_4_0
+#define u_strcasecmp u_strcasecmp_4_0
+#define u_strcat u_strcat_4_0
+#define u_strchr u_strchr_4_0
+#define u_strchr32 u_strchr32_4_0
+#define u_strcmp u_strcmp_4_0
+#define u_strcmpCodePointOrder u_strcmpCodePointOrder_4_0
+#define u_strcmpFold u_strcmpFold_4_0
+#define u_strcpy u_strcpy_4_0
+#define u_strcspn u_strcspn_4_0
+#define u_strlen u_strlen_4_0
+#define u_strncasecmp u_strncasecmp_4_0
+#define u_strncat u_strncat_4_0
+#define u_strncmp u_strncmp_4_0
+#define u_strncmpCodePointOrder u_strncmpCodePointOrder_4_0
+#define u_strncpy u_strncpy_4_0
+#define u_strpbrk u_strpbrk_4_0
+#define u_strrchr u_strrchr_4_0
+#define u_strrchr32 u_strrchr32_4_0
+#define u_strrstr u_strrstr_4_0
+#define u_strspn u_strspn_4_0
+#define u_strstr u_strstr_4_0
+#define u_strtok_r u_strtok_r_4_0
+#define u_terminateChars u_terminateChars_4_0
+#define u_terminateUChar32s u_terminateUChar32s_4_0
+#define u_terminateUChars u_terminateUChars_4_0
+#define u_terminateWChars u_terminateWChars_4_0
+#define u_tolower u_tolower_4_0
+#define u_totitle u_totitle_4_0
+#define u_toupper u_toupper_4_0
+#define u_uastrcpy u_uastrcpy_4_0
+#define u_uastrncpy u_uastrncpy_4_0
+#define u_unescape u_unescape_4_0
+#define u_unescapeAt u_unescapeAt_4_0
+#define u_versionFromString u_versionFromString_4_0
+#define u_versionToString u_versionToString_4_0
+#define u_vformatMessage u_vformatMessage_4_0
+#define u_vformatMessageWithError u_vformatMessageWithError_4_0
+#define u_vfprintf u_vfprintf_4_0
+#define u_vfprintf_u u_vfprintf_u_4_0
+#define u_vfscanf u_vfscanf_4_0
+#define u_vfscanf_u u_vfscanf_u_4_0
+#define u_vparseMessage u_vparseMessage_4_0
+#define u_vparseMessageWithError u_vparseMessageWithError_4_0
+#define u_vsnprintf u_vsnprintf_4_0
+#define u_vsnprintf_u u_vsnprintf_u_4_0
+#define u_vsprintf u_vsprintf_4_0
+#define u_vsprintf_u u_vsprintf_u_4_0
+#define u_vsscanf u_vsscanf_4_0
+#define u_vsscanf_u u_vsscanf_u_4_0
+#define u_writeDiff u_writeDiff_4_0
+#define u_writeIdenticalLevelRun u_writeIdenticalLevelRun_4_0
+#define u_writeIdenticalLevelRunTwoChars u_writeIdenticalLevelRunTwoChars_4_0
+#define ubidi_addPropertyStarts ubidi_addPropertyStarts_4_0
+#define ubidi_close ubidi_close_4_0
+#define ubidi_closeProps ubidi_closeProps_4_0
+#define ubidi_countParagraphs ubidi_countParagraphs_4_0
+#define ubidi_countRuns ubidi_countRuns_4_0
+#define ubidi_getClass ubidi_getClass_4_0
+#define ubidi_getClassCallback ubidi_getClassCallback_4_0
+#define ubidi_getCustomizedClass ubidi_getCustomizedClass_4_0
+#define ubidi_getDirection ubidi_getDirection_4_0
+#define ubidi_getJoiningGroup ubidi_getJoiningGroup_4_0
+#define ubidi_getJoiningType ubidi_getJoiningType_4_0
+#define ubidi_getLength ubidi_getLength_4_0
+#define ubidi_getLevelAt ubidi_getLevelAt_4_0
+#define ubidi_getLevels ubidi_getLevels_4_0
+#define ubidi_getLogicalIndex ubidi_getLogicalIndex_4_0
+#define ubidi_getLogicalMap ubidi_getLogicalMap_4_0
+#define ubidi_getLogicalRun ubidi_getLogicalRun_4_0
+#define ubidi_getMaxValue ubidi_getMaxValue_4_0
+#define ubidi_getMemory ubidi_getMemory_4_0
+#define ubidi_getMirror ubidi_getMirror_4_0
+#define ubidi_getParaLevel ubidi_getParaLevel_4_0
+#define ubidi_getParagraph ubidi_getParagraph_4_0
+#define ubidi_getParagraphByIndex ubidi_getParagraphByIndex_4_0
+#define ubidi_getProcessedLength ubidi_getProcessedLength_4_0
+#define ubidi_getReorderingMode ubidi_getReorderingMode_4_0
+#define ubidi_getReorderingOptions ubidi_getReorderingOptions_4_0
+#define ubidi_getResultLength ubidi_getResultLength_4_0
+#define ubidi_getRuns ubidi_getRuns_4_0
+#define ubidi_getSingleton ubidi_getSingleton_4_0
+#define ubidi_getText ubidi_getText_4_0
+#define ubidi_getVisualIndex ubidi_getVisualIndex_4_0
+#define ubidi_getVisualMap ubidi_getVisualMap_4_0
+#define ubidi_getVisualRun ubidi_getVisualRun_4_0
+#define ubidi_invertMap ubidi_invertMap_4_0
+#define ubidi_isBidiControl ubidi_isBidiControl_4_0
+#define ubidi_isInverse ubidi_isInverse_4_0
+#define ubidi_isJoinControl ubidi_isJoinControl_4_0
+#define ubidi_isMirrored ubidi_isMirrored_4_0
+#define ubidi_isOrderParagraphsLTR ubidi_isOrderParagraphsLTR_4_0
+#define ubidi_open ubidi_open_4_0
+#define ubidi_openSized ubidi_openSized_4_0
+#define ubidi_orderParagraphsLTR ubidi_orderParagraphsLTR_4_0
+#define ubidi_reorderLogical ubidi_reorderLogical_4_0
+#define ubidi_reorderVisual ubidi_reorderVisual_4_0
+#define ubidi_setClassCallback ubidi_setClassCallback_4_0
+#define ubidi_setInverse ubidi_setInverse_4_0
+#define ubidi_setLine ubidi_setLine_4_0
+#define ubidi_setPara ubidi_setPara_4_0
+#define ubidi_setReorderingMode ubidi_setReorderingMode_4_0
+#define ubidi_setReorderingOptions ubidi_setReorderingOptions_4_0
+#define ubidi_writeReordered ubidi_writeReordered_4_0
+#define ubidi_writeReverse ubidi_writeReverse_4_0
+#define ublock_getCode ublock_getCode_4_0
+#define ubrk_close ubrk_close_4_0
+#define ubrk_countAvailable ubrk_countAvailable_4_0
+#define ubrk_current ubrk_current_4_0
+#define ubrk_first ubrk_first_4_0
+#define ubrk_following ubrk_following_4_0
+#define ubrk_getAvailable ubrk_getAvailable_4_0
+#define ubrk_getLocaleByType ubrk_getLocaleByType_4_0
+#define ubrk_getRuleStatus ubrk_getRuleStatus_4_0
+#define ubrk_getRuleStatusVec ubrk_getRuleStatusVec_4_0
+#define ubrk_isBoundary ubrk_isBoundary_4_0
+#define ubrk_last ubrk_last_4_0
+#define ubrk_next ubrk_next_4_0
+#define ubrk_open ubrk_open_4_0
+#define ubrk_openRules ubrk_openRules_4_0
+#define ubrk_preceding ubrk_preceding_4_0
+#define ubrk_previous ubrk_previous_4_0
+#define ubrk_safeClone ubrk_safeClone_4_0
+#define ubrk_setText ubrk_setText_4_0
+#define ubrk_setUText ubrk_setUText_4_0
+#define ubrk_swap ubrk_swap_4_0
+#define ucal_add ucal_add_4_0
+#define ucal_clear ucal_clear_4_0
+#define ucal_clearField ucal_clearField_4_0
+#define ucal_clone ucal_clone_4_0
+#define ucal_close ucal_close_4_0
+#define ucal_countAvailable ucal_countAvailable_4_0
+#define ucal_equivalentTo ucal_equivalentTo_4_0
+#define ucal_get ucal_get_4_0
+#define ucal_getAttribute ucal_getAttribute_4_0
+#define ucal_getAvailable ucal_getAvailable_4_0
+#define ucal_getCanonicalTimeZoneID ucal_getCanonicalTimeZoneID_4_0
+#define ucal_getDSTSavings ucal_getDSTSavings_4_0
+#define ucal_getDefaultTimeZone ucal_getDefaultTimeZone_4_0
+#define ucal_getGregorianChange ucal_getGregorianChange_4_0
+#define ucal_getLimit ucal_getLimit_4_0
+#define ucal_getLocaleByType ucal_getLocaleByType_4_0
+#define ucal_getMillis ucal_getMillis_4_0
+#define ucal_getNow ucal_getNow_4_0
+#define ucal_getTZDataVersion ucal_getTZDataVersion_4_0
+#define ucal_getTimeZoneDisplayName ucal_getTimeZoneDisplayName_4_0
+#define ucal_inDaylightTime ucal_inDaylightTime_4_0
+#define ucal_isSet ucal_isSet_4_0
+#define ucal_open ucal_open_4_0
+#define ucal_openCountryTimeZones ucal_openCountryTimeZones_4_0
+#define ucal_openTimeZones ucal_openTimeZones_4_0
+#define ucal_roll ucal_roll_4_0
+#define ucal_set ucal_set_4_0
+#define ucal_setAttribute ucal_setAttribute_4_0
+#define ucal_setDate ucal_setDate_4_0
+#define ucal_setDateTime ucal_setDateTime_4_0
+#define ucal_setDefaultTimeZone ucal_setDefaultTimeZone_4_0
+#define ucal_setGregorianChange ucal_setGregorianChange_4_0
+#define ucal_setMillis ucal_setMillis_4_0
+#define ucal_setTimeZone ucal_setTimeZone_4_0
+#define ucase_addCaseClosure ucase_addCaseClosure_4_0
+#define ucase_addPropertyStarts ucase_addPropertyStarts_4_0
+#define ucase_addStringCaseClosure ucase_addStringCaseClosure_4_0
+#define ucase_close ucase_close_4_0
+#define ucase_fold ucase_fold_4_0
+#define ucase_getCaseLocale ucase_getCaseLocale_4_0
+#define ucase_getSingleton ucase_getSingleton_4_0
+#define ucase_getType ucase_getType_4_0
+#define ucase_getTypeOrIgnorable ucase_getTypeOrIgnorable_4_0
+#define ucase_hasBinaryProperty ucase_hasBinaryProperty_4_0
+#define ucase_isCaseSensitive ucase_isCaseSensitive_4_0
+#define ucase_isSoftDotted ucase_isSoftDotted_4_0
+#define ucase_toFullFolding ucase_toFullFolding_4_0
+#define ucase_toFullLower ucase_toFullLower_4_0
+#define ucase_toFullTitle ucase_toFullTitle_4_0
+#define ucase_toFullUpper ucase_toFullUpper_4_0
+#define ucase_tolower ucase_tolower_4_0
+#define ucase_totitle ucase_totitle_4_0
+#define ucase_toupper ucase_toupper_4_0
+#define ucasemap_close ucasemap_close_4_0
+#define ucasemap_getBreakIterator ucasemap_getBreakIterator_4_0
+#define ucasemap_getLocale ucasemap_getLocale_4_0
+#define ucasemap_getOptions ucasemap_getOptions_4_0
+#define ucasemap_open ucasemap_open_4_0
+#define ucasemap_setBreakIterator ucasemap_setBreakIterator_4_0
+#define ucasemap_setLocale ucasemap_setLocale_4_0
+#define ucasemap_setOptions ucasemap_setOptions_4_0
+#define ucasemap_toTitle ucasemap_toTitle_4_0
+#define ucasemap_utf8FoldCase ucasemap_utf8FoldCase_4_0
+#define ucasemap_utf8ToLower ucasemap_utf8ToLower_4_0
+#define ucasemap_utf8ToTitle ucasemap_utf8ToTitle_4_0
+#define ucasemap_utf8ToUpper ucasemap_utf8ToUpper_4_0
+#define uchar_addPropertyStarts uchar_addPropertyStarts_4_0
+#define uchar_getHST uchar_getHST_4_0
+#define uchar_swapNames uchar_swapNames_4_0
+#define ucln_common_registerCleanup ucln_common_registerCleanup_4_0
+#define ucln_i18n_registerCleanup ucln_i18n_registerCleanup_4_0
+#define ucln_io_registerCleanup ucln_io_registerCleanup_4_0
+#define ucln_lib_cleanup ucln_lib_cleanup_4_0
+#define ucln_registerCleanup ucln_registerCleanup_4_0
+#define ucnv_MBCSFromUChar32 ucnv_MBCSFromUChar32_4_0
+#define ucnv_MBCSFromUnicodeWithOffsets ucnv_MBCSFromUnicodeWithOffsets_4_0
+#define ucnv_MBCSGetFilteredUnicodeSetForUnicode ucnv_MBCSGetFilteredUnicodeSetForUnicode_4_0
+#define ucnv_MBCSGetType ucnv_MBCSGetType_4_0
+#define ucnv_MBCSGetUnicodeSetForUnicode ucnv_MBCSGetUnicodeSetForUnicode_4_0
+#define ucnv_MBCSIsLeadByte ucnv_MBCSIsLeadByte_4_0
+#define ucnv_MBCSSimpleGetNextUChar ucnv_MBCSSimpleGetNextUChar_4_0
+#define ucnv_MBCSToUnicodeWithOffsets ucnv_MBCSToUnicodeWithOffsets_4_0
+#define ucnv_bld_countAvailableConverters ucnv_bld_countAvailableConverters_4_0
+#define ucnv_bld_getAvailableConverter ucnv_bld_getAvailableConverter_4_0
+#define ucnv_cbFromUWriteBytes ucnv_cbFromUWriteBytes_4_0
+#define ucnv_cbFromUWriteSub ucnv_cbFromUWriteSub_4_0
+#define ucnv_cbFromUWriteUChars ucnv_cbFromUWriteUChars_4_0
+#define ucnv_cbToUWriteSub ucnv_cbToUWriteSub_4_0
+#define ucnv_cbToUWriteUChars ucnv_cbToUWriteUChars_4_0
+#define ucnv_close ucnv_close_4_0
+#define ucnv_compareNames ucnv_compareNames_4_0
+#define ucnv_convert ucnv_convert_4_0
+#define ucnv_convertEx ucnv_convertEx_4_0
+#define ucnv_countAliases ucnv_countAliases_4_0
+#define ucnv_countAvailable ucnv_countAvailable_4_0
+#define ucnv_countStandards ucnv_countStandards_4_0
+#define ucnv_createAlgorithmicConverter ucnv_createAlgorithmicConverter_4_0
+#define ucnv_createConverter ucnv_createConverter_4_0
+#define ucnv_createConverterFromPackage ucnv_createConverterFromPackage_4_0
+#define ucnv_createConverterFromSharedData ucnv_createConverterFromSharedData_4_0
+#define ucnv_detectUnicodeSignature ucnv_detectUnicodeSignature_4_0
+#define ucnv_extContinueMatchFromU ucnv_extContinueMatchFromU_4_0
+#define ucnv_extContinueMatchToU ucnv_extContinueMatchToU_4_0
+#define ucnv_extGetUnicodeSet ucnv_extGetUnicodeSet_4_0
+#define ucnv_extInitialMatchFromU ucnv_extInitialMatchFromU_4_0
+#define ucnv_extInitialMatchToU ucnv_extInitialMatchToU_4_0
+#define ucnv_extSimpleMatchFromU ucnv_extSimpleMatchFromU_4_0
+#define ucnv_extSimpleMatchToU ucnv_extSimpleMatchToU_4_0
+#define ucnv_fixFileSeparator ucnv_fixFileSeparator_4_0
+#define ucnv_flushCache ucnv_flushCache_4_0
+#define ucnv_fromAlgorithmic ucnv_fromAlgorithmic_4_0
+#define ucnv_fromUChars ucnv_fromUChars_4_0
+#define ucnv_fromUCountPending ucnv_fromUCountPending_4_0
+#define ucnv_fromUWriteBytes ucnv_fromUWriteBytes_4_0
+#define ucnv_fromUnicode ucnv_fromUnicode_4_0
+#define ucnv_fromUnicode_UTF8 ucnv_fromUnicode_UTF8_4_0
+#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC ucnv_fromUnicode_UTF8_OFFSETS_LOGIC_4_0
+#define ucnv_getAlias ucnv_getAlias_4_0
+#define ucnv_getAliases ucnv_getAliases_4_0
+#define ucnv_getAvailableName ucnv_getAvailableName_4_0
+#define ucnv_getCCSID ucnv_getCCSID_4_0
+#define ucnv_getCanonicalName ucnv_getCanonicalName_4_0
+#define ucnv_getCompleteUnicodeSet ucnv_getCompleteUnicodeSet_4_0
+#define ucnv_getDefaultName ucnv_getDefaultName_4_0
+#define ucnv_getDisplayName ucnv_getDisplayName_4_0
+#define ucnv_getFromUCallBack ucnv_getFromUCallBack_4_0
+#define ucnv_getInvalidChars ucnv_getInvalidChars_4_0
+#define ucnv_getInvalidUChars ucnv_getInvalidUChars_4_0
+#define ucnv_getMaxCharSize ucnv_getMaxCharSize_4_0
+#define ucnv_getMinCharSize ucnv_getMinCharSize_4_0
+#define ucnv_getName ucnv_getName_4_0
+#define ucnv_getNextUChar ucnv_getNextUChar_4_0
+#define ucnv_getNonSurrogateUnicodeSet ucnv_getNonSurrogateUnicodeSet_4_0
+#define ucnv_getPlatform ucnv_getPlatform_4_0
+#define ucnv_getStandard ucnv_getStandard_4_0
+#define ucnv_getStandardName ucnv_getStandardName_4_0
+#define ucnv_getStarters ucnv_getStarters_4_0
+#define ucnv_getSubstChars ucnv_getSubstChars_4_0
+#define ucnv_getToUCallBack ucnv_getToUCallBack_4_0
+#define ucnv_getType ucnv_getType_4_0
+#define ucnv_getUnicodeSet ucnv_getUnicodeSet_4_0
+#define ucnv_incrementRefCount ucnv_incrementRefCount_4_0
+#define ucnv_io_countKnownConverters ucnv_io_countKnownConverters_4_0
+#define ucnv_io_getConverterName ucnv_io_getConverterName_4_0
+#define ucnv_io_stripASCIIForCompare ucnv_io_stripASCIIForCompare_4_0
+#define ucnv_io_stripEBCDICForCompare ucnv_io_stripEBCDICForCompare_4_0
+#define ucnv_isAmbiguous ucnv_isAmbiguous_4_0
+#define ucnv_load ucnv_load_4_0
+#define ucnv_loadSharedData ucnv_loadSharedData_4_0
+#define ucnv_open ucnv_open_4_0
+#define ucnv_openAllNames ucnv_openAllNames_4_0
+#define ucnv_openCCSID ucnv_openCCSID_4_0
+#define ucnv_openPackage ucnv_openPackage_4_0
+#define ucnv_openStandardNames ucnv_openStandardNames_4_0
+#define ucnv_openU ucnv_openU_4_0
+#define ucnv_reset ucnv_reset_4_0
+#define ucnv_resetFromUnicode ucnv_resetFromUnicode_4_0
+#define ucnv_resetToUnicode ucnv_resetToUnicode_4_0
+#define ucnv_safeClone ucnv_safeClone_4_0
+#define ucnv_setDefaultName ucnv_setDefaultName_4_0
+#define ucnv_setFallback ucnv_setFallback_4_0
+#define ucnv_setFromUCallBack ucnv_setFromUCallBack_4_0
+#define ucnv_setSubstChars ucnv_setSubstChars_4_0
+#define ucnv_setSubstString ucnv_setSubstString_4_0
+#define ucnv_setToUCallBack ucnv_setToUCallBack_4_0
+#define ucnv_swap ucnv_swap_4_0
+#define ucnv_swapAliases ucnv_swapAliases_4_0
+#define ucnv_toAlgorithmic ucnv_toAlgorithmic_4_0
+#define ucnv_toUChars ucnv_toUChars_4_0
+#define ucnv_toUCountPending ucnv_toUCountPending_4_0
+#define ucnv_toUWriteCodePoint ucnv_toUWriteCodePoint_4_0
+#define ucnv_toUWriteUChars ucnv_toUWriteUChars_4_0
+#define ucnv_toUnicode ucnv_toUnicode_4_0
+#define ucnv_unload ucnv_unload_4_0
+#define ucnv_unloadSharedDataIfReady ucnv_unloadSharedDataIfReady_4_0
+#define ucnv_usesFallback ucnv_usesFallback_4_0
+#define ucol_allocWeights ucol_allocWeights_4_0
+#define ucol_assembleTailoringTable ucol_assembleTailoringTable_4_0
+#define ucol_calcSortKey ucol_calcSortKey_4_0
+#define ucol_calcSortKeySimpleTertiary ucol_calcSortKeySimpleTertiary_4_0
+#define ucol_cloneBinary ucol_cloneBinary_4_0
+#define ucol_cloneRuleData ucol_cloneRuleData_4_0
+#define ucol_close ucol_close_4_0
+#define ucol_closeElements ucol_closeElements_4_0
+#define ucol_countAvailable ucol_countAvailable_4_0
+#define ucol_createElements ucol_createElements_4_0
+#define ucol_doCE ucol_doCE_4_0
+#define ucol_equal ucol_equal_4_0
+#define ucol_equals ucol_equals_4_0
+#define ucol_forgetUCA ucol_forgetUCA_4_0
+#define ucol_getAttribute ucol_getAttribute_4_0
+#define ucol_getAttributeOrDefault ucol_getAttributeOrDefault_4_0
+#define ucol_getAvailable ucol_getAvailable_4_0
+#define ucol_getBound ucol_getBound_4_0
+#define ucol_getCEStrengthDifference ucol_getCEStrengthDifference_4_0
+#define ucol_getContractions ucol_getContractions_4_0
+#define ucol_getContractionsAndExpansions ucol_getContractionsAndExpansions_4_0
+#define ucol_getDisplayName ucol_getDisplayName_4_0
+#define ucol_getFirstCE ucol_getFirstCE_4_0
+#define ucol_getFunctionalEquivalent ucol_getFunctionalEquivalent_4_0
+#define ucol_getKeywordValues ucol_getKeywordValues_4_0
+#define ucol_getKeywords ucol_getKeywords_4_0
+#define ucol_getLocale ucol_getLocale_4_0
+#define ucol_getLocaleByType ucol_getLocaleByType_4_0
+#define ucol_getMaxExpansion ucol_getMaxExpansion_4_0
+#define ucol_getNextCE ucol_getNextCE_4_0
+#define ucol_getOffset ucol_getOffset_4_0
+#define ucol_getPrevCE ucol_getPrevCE_4_0
+#define ucol_getRules ucol_getRules_4_0
+#define ucol_getRulesEx ucol_getRulesEx_4_0
+#define ucol_getShortDefinitionString ucol_getShortDefinitionString_4_0
+#define ucol_getSortKey ucol_getSortKey_4_0
+#define ucol_getSortKeySize ucol_getSortKeySize_4_0
+#define ucol_getSortKeyWithAllocation ucol_getSortKeyWithAllocation_4_0
+#define ucol_getStrength ucol_getStrength_4_0
+#define ucol_getTailoredSet ucol_getTailoredSet_4_0
+#define ucol_getUCAVersion ucol_getUCAVersion_4_0
+#define ucol_getUnsafeSet ucol_getUnsafeSet_4_0
+#define ucol_getVariableTop ucol_getVariableTop_4_0
+#define ucol_getVersion ucol_getVersion_4_0
+#define ucol_greater ucol_greater_4_0
+#define ucol_greaterOrEqual ucol_greaterOrEqual_4_0
+#define ucol_initBuffers ucol_initBuffers_4_0
+#define ucol_initCollator ucol_initCollator_4_0
+#define ucol_initInverseUCA ucol_initInverseUCA_4_0
+#define ucol_initUCA ucol_initUCA_4_0
+#define ucol_inv_getNextCE ucol_inv_getNextCE_4_0
+#define ucol_inv_getPrevCE ucol_inv_getPrevCE_4_0
+#define ucol_isTailored ucol_isTailored_4_0
+#define ucol_keyHashCode ucol_keyHashCode_4_0
+#define ucol_mergeSortkeys ucol_mergeSortkeys_4_0
+#define ucol_next ucol_next_4_0
+#define ucol_nextProcessed ucol_nextProcessed_4_0
+#define ucol_nextSortKeyPart ucol_nextSortKeyPart_4_0
+#define ucol_nextWeight ucol_nextWeight_4_0
+#define ucol_normalizeShortDefinitionString ucol_normalizeShortDefinitionString_4_0
+#define ucol_open ucol_open_4_0
+#define ucol_openAvailableLocales ucol_openAvailableLocales_4_0
+#define ucol_openBinary ucol_openBinary_4_0
+#define ucol_openElements ucol_openElements_4_0
+#define ucol_openFromShortString ucol_openFromShortString_4_0
+#define ucol_openRules ucol_openRules_4_0
+#define ucol_open_internal ucol_open_internal_4_0
+#define ucol_prepareShortStringOpen ucol_prepareShortStringOpen_4_0
+#define ucol_previous ucol_previous_4_0
+#define ucol_previousProcessed ucol_previousProcessed_4_0
+#define ucol_primaryOrder ucol_primaryOrder_4_0
+#define ucol_prv_getSpecialCE ucol_prv_getSpecialCE_4_0
+#define ucol_prv_getSpecialPrevCE ucol_prv_getSpecialPrevCE_4_0
+#define ucol_reset ucol_reset_4_0
+#define ucol_restoreVariableTop ucol_restoreVariableTop_4_0
+#define ucol_safeClone ucol_safeClone_4_0
+#define ucol_secondaryOrder ucol_secondaryOrder_4_0
+#define ucol_setAttribute ucol_setAttribute_4_0
+#define ucol_setOffset ucol_setOffset_4_0
+#define ucol_setOptionsFromHeader ucol_setOptionsFromHeader_4_0
+#define ucol_setReqValidLocales ucol_setReqValidLocales_4_0
+#define ucol_setStrength ucol_setStrength_4_0
+#define ucol_setText ucol_setText_4_0
+#define ucol_setVariableTop ucol_setVariableTop_4_0
+#define ucol_strcoll ucol_strcoll_4_0
+#define ucol_strcollIter ucol_strcollIter_4_0
+#define ucol_swap ucol_swap_4_0
+#define ucol_swapBinary ucol_swapBinary_4_0
+#define ucol_swapInverseUCA ucol_swapInverseUCA_4_0
+#define ucol_tertiaryOrder ucol_tertiaryOrder_4_0
+#define ucol_tok_assembleTokenList ucol_tok_assembleTokenList_4_0
+#define ucol_tok_closeTokenList ucol_tok_closeTokenList_4_0
+#define ucol_tok_getNextArgument ucol_tok_getNextArgument_4_0
+#define ucol_tok_initTokenList ucol_tok_initTokenList_4_0
+#define ucol_tok_parseNextToken ucol_tok_parseNextToken_4_0
+#define ucol_updateInternalState ucol_updateInternalState_4_0
+#define ucsdet_close ucsdet_close_4_0
+#define ucsdet_detect ucsdet_detect_4_0
+#define ucsdet_detectAll ucsdet_detectAll_4_0
+#define ucsdet_enableInputFilter ucsdet_enableInputFilter_4_0
+#define ucsdet_getAllDetectableCharsets ucsdet_getAllDetectableCharsets_4_0
+#define ucsdet_getConfidence ucsdet_getConfidence_4_0
+#define ucsdet_getLanguage ucsdet_getLanguage_4_0
+#define ucsdet_getName ucsdet_getName_4_0
+#define ucsdet_getUChars ucsdet_getUChars_4_0
+#define ucsdet_isInputFilterEnabled ucsdet_isInputFilterEnabled_4_0
+#define ucsdet_open ucsdet_open_4_0
+#define ucsdet_setDeclaredEncoding ucsdet_setDeclaredEncoding_4_0
+#define ucsdet_setText ucsdet_setText_4_0
+#define ucurr_countCurrencies ucurr_countCurrencies_4_0
+#define ucurr_forLocale ucurr_forLocale_4_0
+#define ucurr_forLocaleAndDate ucurr_forLocaleAndDate_4_0
+#define ucurr_getDefaultFractionDigits ucurr_getDefaultFractionDigits_4_0
+#define ucurr_getName ucurr_getName_4_0
+#define ucurr_getRoundingIncrement ucurr_getRoundingIncrement_4_0
+#define ucurr_openISOCurrencies ucurr_openISOCurrencies_4_0
+#define ucurr_register ucurr_register_4_0
+#define ucurr_unregister ucurr_unregister_4_0
+#define udat_applyPattern udat_applyPattern_4_0
+#define udat_clone udat_clone_4_0
+#define udat_close udat_close_4_0
+#define udat_countAvailable udat_countAvailable_4_0
+#define udat_countSymbols udat_countSymbols_4_0
+#define udat_format udat_format_4_0
+#define udat_get2DigitYearStart udat_get2DigitYearStart_4_0
+#define udat_getAvailable udat_getAvailable_4_0
+#define udat_getCalendar udat_getCalendar_4_0
+#define udat_getLocaleByType udat_getLocaleByType_4_0
+#define udat_getNumberFormat udat_getNumberFormat_4_0
+#define udat_getSymbols udat_getSymbols_4_0
+#define udat_isLenient udat_isLenient_4_0
+#define udat_open udat_open_4_0
+#define udat_parse udat_parse_4_0
+#define udat_parseCalendar udat_parseCalendar_4_0
+#define udat_set2DigitYearStart udat_set2DigitYearStart_4_0
+#define udat_setCalendar udat_setCalendar_4_0
+#define udat_setLenient udat_setLenient_4_0
+#define udat_setNumberFormat udat_setNumberFormat_4_0
+#define udat_setSymbols udat_setSymbols_4_0
+#define udat_toPattern udat_toPattern_4_0
+#define udata_checkCommonData udata_checkCommonData_4_0
+#define udata_close udata_close_4_0
+#define udata_closeSwapper udata_closeSwapper_4_0
+#define udata_getHeaderSize udata_getHeaderSize_4_0
+#define udata_getInfo udata_getInfo_4_0
+#define udata_getInfoSize udata_getInfoSize_4_0
+#define udata_getLength udata_getLength_4_0
+#define udata_getMemory udata_getMemory_4_0
+#define udata_getRawMemory udata_getRawMemory_4_0
+#define udata_open udata_open_4_0
+#define udata_openChoice udata_openChoice_4_0
+#define udata_openSwapper udata_openSwapper_4_0
+#define udata_openSwapperForInputData udata_openSwapperForInputData_4_0
+#define udata_printError udata_printError_4_0
+#define udata_readInt16 udata_readInt16_4_0
+#define udata_readInt32 udata_readInt32_4_0
+#define udata_setAppData udata_setAppData_4_0
+#define udata_setCommonData udata_setCommonData_4_0
+#define udata_setFileAccess udata_setFileAccess_4_0
+#define udata_swapDataHeader udata_swapDataHeader_4_0
+#define udata_swapInvStringBlock udata_swapInvStringBlock_4_0
+#define udatpg_addPattern udatpg_addPattern_4_0
+#define udatpg_clone udatpg_clone_4_0
+#define udatpg_close udatpg_close_4_0
+#define udatpg_getAppendItemFormat udatpg_getAppendItemFormat_4_0
+#define udatpg_getAppendItemName udatpg_getAppendItemName_4_0
+#define udatpg_getBaseSkeleton udatpg_getBaseSkeleton_4_0
+#define udatpg_getBestPattern udatpg_getBestPattern_4_0
+#define udatpg_getDateTimeFormat udatpg_getDateTimeFormat_4_0
+#define udatpg_getDecimal udatpg_getDecimal_4_0
+#define udatpg_getPatternForSkeleton udatpg_getPatternForSkeleton_4_0
+#define udatpg_getSkeleton udatpg_getSkeleton_4_0
+#define udatpg_open udatpg_open_4_0
+#define udatpg_openBaseSkeletons udatpg_openBaseSkeletons_4_0
+#define udatpg_openEmpty udatpg_openEmpty_4_0
+#define udatpg_openSkeletons udatpg_openSkeletons_4_0
+#define udatpg_replaceFieldTypes udatpg_replaceFieldTypes_4_0
+#define udatpg_setAppendItemFormat udatpg_setAppendItemFormat_4_0
+#define udatpg_setAppendItemName udatpg_setAppendItemName_4_0
+#define udatpg_setDateTimeFormat udatpg_setDateTimeFormat_4_0
+#define udatpg_setDecimal udatpg_setDecimal_4_0
+#define uenum_close uenum_close_4_0
+#define uenum_count uenum_count_4_0
+#define uenum_next uenum_next_4_0
+#define uenum_nextDefault uenum_nextDefault_4_0
+#define uenum_openCharStringsEnumeration uenum_openCharStringsEnumeration_4_0
+#define uenum_openStringEnumeration uenum_openStringEnumeration_4_0
+#define uenum_reset uenum_reset_4_0
+#define uenum_unext uenum_unext_4_0
+#define uenum_unextDefault uenum_unextDefault_4_0
+#define ufile_close_translit ufile_close_translit_4_0
+#define ufile_fill_uchar_buffer ufile_fill_uchar_buffer_4_0
+#define ufile_flush_translit ufile_flush_translit_4_0
+#define ufile_getch ufile_getch_4_0
+#define ufile_getch32 ufile_getch32_4_0
+#define ufmt_64tou ufmt_64tou_4_0
+#define ufmt_defaultCPToUnicode ufmt_defaultCPToUnicode_4_0
+#define ufmt_digitvalue ufmt_digitvalue_4_0
+#define ufmt_isdigit ufmt_isdigit_4_0
+#define ufmt_ptou ufmt_ptou_4_0
+#define ufmt_uto64 ufmt_uto64_4_0
+#define ufmt_utop ufmt_utop_4_0
+#define uhash_close uhash_close_4_0
+#define uhash_compareCaselessUnicodeString uhash_compareCaselessUnicodeString_4_0
+#define uhash_compareChars uhash_compareChars_4_0
+#define uhash_compareIChars uhash_compareIChars_4_0
+#define uhash_compareLong uhash_compareLong_4_0
+#define uhash_compareUChars uhash_compareUChars_4_0
+#define uhash_compareUnicodeString uhash_compareUnicodeString_4_0
+#define uhash_count uhash_count_4_0
+#define uhash_deleteHashtable uhash_deleteHashtable_4_0
+#define uhash_deleteUVector uhash_deleteUVector_4_0
+#define uhash_deleteUnicodeString uhash_deleteUnicodeString_4_0
+#define uhash_equals uhash_equals_4_0
+#define uhash_find uhash_find_4_0
+#define uhash_freeBlock uhash_freeBlock_4_0
+#define uhash_get uhash_get_4_0
+#define uhash_geti uhash_geti_4_0
+#define uhash_hashCaselessUnicodeString uhash_hashCaselessUnicodeString_4_0
+#define uhash_hashChars uhash_hashChars_4_0
+#define uhash_hashIChars uhash_hashIChars_4_0
+#define uhash_hashLong uhash_hashLong_4_0
+#define uhash_hashUChars uhash_hashUChars_4_0
+#define uhash_hashUCharsN uhash_hashUCharsN_4_0
+#define uhash_hashUnicodeString uhash_hashUnicodeString_4_0
+#define uhash_iget uhash_iget_4_0
+#define uhash_igeti uhash_igeti_4_0
+#define uhash_init uhash_init_4_0
+#define uhash_iput uhash_iput_4_0
+#define uhash_iputi uhash_iputi_4_0
+#define uhash_iremove uhash_iremove_4_0
+#define uhash_iremovei uhash_iremovei_4_0
+#define uhash_nextElement uhash_nextElement_4_0
+#define uhash_open uhash_open_4_0
+#define uhash_openSize uhash_openSize_4_0
+#define uhash_put uhash_put_4_0
+#define uhash_puti uhash_puti_4_0
+#define uhash_remove uhash_remove_4_0
+#define uhash_removeAll uhash_removeAll_4_0
+#define uhash_removeElement uhash_removeElement_4_0
+#define uhash_removei uhash_removei_4_0
+#define uhash_setKeyComparator uhash_setKeyComparator_4_0
+#define uhash_setKeyDeleter uhash_setKeyDeleter_4_0
+#define uhash_setKeyHasher uhash_setKeyHasher_4_0
+#define uhash_setResizePolicy uhash_setResizePolicy_4_0
+#define uhash_setValueComparator uhash_setValueComparator_4_0
+#define uhash_setValueDeleter uhash_setValueDeleter_4_0
+#define uhst_addPropertyStarts uhst_addPropertyStarts_4_0
+#define uidna_IDNToASCII uidna_IDNToASCII_4_0
+#define uidna_IDNToUnicode uidna_IDNToUnicode_4_0
+#define uidna_compare uidna_compare_4_0
+#define uidna_toASCII uidna_toASCII_4_0
+#define uidna_toUnicode uidna_toUnicode_4_0
+#define uiter_current32 uiter_current32_4_0
+#define uiter_getState uiter_getState_4_0
+#define uiter_next32 uiter_next32_4_0
+#define uiter_previous32 uiter_previous32_4_0
+#define uiter_setCharacterIterator uiter_setCharacterIterator_4_0
+#define uiter_setReplaceable uiter_setReplaceable_4_0
+#define uiter_setState uiter_setState_4_0
+#define uiter_setString uiter_setString_4_0
+#define uiter_setUTF16BE uiter_setUTF16BE_4_0
+#define uiter_setUTF8 uiter_setUTF8_4_0
+#define uloc_acceptLanguage uloc_acceptLanguage_4_0
+#define uloc_acceptLanguageFromHTTP uloc_acceptLanguageFromHTTP_4_0
+#define uloc_addLikelySubtags uloc_addLikelySubtags_4_0
+#define uloc_canonicalize uloc_canonicalize_4_0
+#define uloc_countAvailable uloc_countAvailable_4_0
+#define uloc_getAvailable uloc_getAvailable_4_0
+#define uloc_getBaseName uloc_getBaseName_4_0
+#define uloc_getCharacterOrientation uloc_getCharacterOrientation_4_0
+#define uloc_getCountry uloc_getCountry_4_0
+#define uloc_getDefault uloc_getDefault_4_0
+#define uloc_getDisplayCountry uloc_getDisplayCountry_4_0
+#define uloc_getDisplayKeyword uloc_getDisplayKeyword_4_0
+#define uloc_getDisplayKeywordValue uloc_getDisplayKeywordValue_4_0
+#define uloc_getDisplayLanguage uloc_getDisplayLanguage_4_0
+#define uloc_getDisplayName uloc_getDisplayName_4_0
+#define uloc_getDisplayScript uloc_getDisplayScript_4_0
+#define uloc_getDisplayVariant uloc_getDisplayVariant_4_0
+#define uloc_getISO3Country uloc_getISO3Country_4_0
+#define uloc_getISO3Language uloc_getISO3Language_4_0
+#define uloc_getISOCountries uloc_getISOCountries_4_0
+#define uloc_getISOLanguages uloc_getISOLanguages_4_0
+#define uloc_getKeywordValue uloc_getKeywordValue_4_0
+#define uloc_getLCID uloc_getLCID_4_0
+#define uloc_getLanguage uloc_getLanguage_4_0
+#define uloc_getLineOrientation uloc_getLineOrientation_4_0
+#define uloc_getLocaleForLCID uloc_getLocaleForLCID_4_0
+#define uloc_getName uloc_getName_4_0
+#define uloc_getParent uloc_getParent_4_0
+#define uloc_getScript uloc_getScript_4_0
+#define uloc_getVariant uloc_getVariant_4_0
+#define uloc_minimizeSubtags uloc_minimizeSubtags_4_0
+#define uloc_openKeywordList uloc_openKeywordList_4_0
+#define uloc_openKeywords uloc_openKeywords_4_0
+#define uloc_setDefault uloc_setDefault_4_0
+#define uloc_setKeywordValue uloc_setKeywordValue_4_0
+#define ulocdata_close ulocdata_close_4_0
+#define ulocdata_getDelimiter ulocdata_getDelimiter_4_0
+#define ulocdata_getExemplarSet ulocdata_getExemplarSet_4_0
+#define ulocdata_getMeasurementSystem ulocdata_getMeasurementSystem_4_0
+#define ulocdata_getNoSubstitute ulocdata_getNoSubstitute_4_0
+#define ulocdata_getPaperSize ulocdata_getPaperSize_4_0
+#define ulocdata_open ulocdata_open_4_0
+#define ulocdata_setNoSubstitute ulocdata_setNoSubstitute_4_0
+#define umsg_applyPattern umsg_applyPattern_4_0
+#define umsg_autoQuoteApostrophe umsg_autoQuoteApostrophe_4_0
+#define umsg_clone umsg_clone_4_0
+#define umsg_close umsg_close_4_0
+#define umsg_format umsg_format_4_0
+#define umsg_getLocale umsg_getLocale_4_0
+#define umsg_open umsg_open_4_0
+#define umsg_parse umsg_parse_4_0
+#define umsg_setLocale umsg_setLocale_4_0
+#define umsg_toPattern umsg_toPattern_4_0
+#define umsg_vformat umsg_vformat_4_0
+#define umsg_vparse umsg_vparse_4_0
+#define umtx_atomic_dec umtx_atomic_dec_4_0
+#define umtx_atomic_inc umtx_atomic_inc_4_0
+#define umtx_cleanup umtx_cleanup_4_0
+#define umtx_destroy umtx_destroy_4_0
+#define umtx_init umtx_init_4_0
+#define umtx_lock umtx_lock_4_0
+#define umtx_unlock umtx_unlock_4_0
+#define unorm_addPropertyStarts unorm_addPropertyStarts_4_0
+#define unorm_closeIter unorm_closeIter_4_0
+#define unorm_compare unorm_compare_4_0
+#define unorm_compose unorm_compose_4_0
+#define unorm_concatenate unorm_concatenate_4_0
+#define unorm_decompose unorm_decompose_4_0
+#define unorm_getCanonStartSet unorm_getCanonStartSet_4_0
+#define unorm_getCanonicalDecomposition unorm_getCanonicalDecomposition_4_0
+#define unorm_getDecomposition unorm_getDecomposition_4_0
+#define unorm_getFCD16FromCodePoint unorm_getFCD16FromCodePoint_4_0
+#define unorm_getFCDTrie unorm_getFCDTrie_4_0
+#define unorm_getNX unorm_getNX_4_0
+#define unorm_getQuickCheck unorm_getQuickCheck_4_0
+#define unorm_getUnicodeVersion unorm_getUnicodeVersion_4_0
+#define unorm_haveData unorm_haveData_4_0
+#define unorm_internalIsFullCompositionExclusion unorm_internalIsFullCompositionExclusion_4_0
+#define unorm_internalNormalize unorm_internalNormalize_4_0
+#define unorm_internalNormalizeWithNX unorm_internalNormalizeWithNX_4_0
+#define unorm_internalQuickCheck unorm_internalQuickCheck_4_0
+#define unorm_isCanonSafeStart unorm_isCanonSafeStart_4_0
+#define unorm_isNFSkippable unorm_isNFSkippable_4_0
+#define unorm_isNormalized unorm_isNormalized_4_0
+#define unorm_isNormalizedWithOptions unorm_isNormalizedWithOptions_4_0
+#define unorm_next unorm_next_4_0
+#define unorm_normalize unorm_normalize_4_0
+#define unorm_openIter unorm_openIter_4_0
+#define unorm_previous unorm_previous_4_0
+#define unorm_quickCheck unorm_quickCheck_4_0
+#define unorm_quickCheckWithOptions unorm_quickCheckWithOptions_4_0
+#define unorm_setIter unorm_setIter_4_0
+#define unum_applyPattern unum_applyPattern_4_0
+#define unum_clone unum_clone_4_0
+#define unum_close unum_close_4_0
+#define unum_countAvailable unum_countAvailable_4_0
+#define unum_format unum_format_4_0
+#define unum_formatDouble unum_formatDouble_4_0
+#define unum_formatDoubleCurrency unum_formatDoubleCurrency_4_0
+#define unum_formatInt64 unum_formatInt64_4_0
+#define unum_getAttribute unum_getAttribute_4_0
+#define unum_getAvailable unum_getAvailable_4_0
+#define unum_getDoubleAttribute unum_getDoubleAttribute_4_0
+#define unum_getLocaleByType unum_getLocaleByType_4_0
+#define unum_getSymbol unum_getSymbol_4_0
+#define unum_getTextAttribute unum_getTextAttribute_4_0
+#define unum_open unum_open_4_0
+#define unum_parse unum_parse_4_0
+#define unum_parseDouble unum_parseDouble_4_0
+#define unum_parseDoubleCurrency unum_parseDoubleCurrency_4_0
+#define unum_parseInt64 unum_parseInt64_4_0
+#define unum_setAttribute unum_setAttribute_4_0
+#define unum_setDoubleAttribute unum_setDoubleAttribute_4_0
+#define unum_setSymbol unum_setSymbol_4_0
+#define unum_setTextAttribute unum_setTextAttribute_4_0
+#define unum_toPattern unum_toPattern_4_0
+#define upname_swap upname_swap_4_0
+#define uprops_getSource uprops_getSource_4_0
+#define upropsvec_addPropertyStarts upropsvec_addPropertyStarts_4_0
+#define uprv_asciiFromEbcdic uprv_asciiFromEbcdic_4_0
+#define uprv_asciitolower uprv_asciitolower_4_0
+#define uprv_ceil uprv_ceil_4_0
+#define uprv_cnttab_addContraction uprv_cnttab_addContraction_4_0
+#define uprv_cnttab_changeContraction uprv_cnttab_changeContraction_4_0
+#define uprv_cnttab_changeLastCE uprv_cnttab_changeLastCE_4_0
+#define uprv_cnttab_clone uprv_cnttab_clone_4_0
+#define uprv_cnttab_close uprv_cnttab_close_4_0
+#define uprv_cnttab_constructTable uprv_cnttab_constructTable_4_0
+#define uprv_cnttab_findCE uprv_cnttab_findCE_4_0
+#define uprv_cnttab_findCP uprv_cnttab_findCP_4_0
+#define uprv_cnttab_getCE uprv_cnttab_getCE_4_0
+#define uprv_cnttab_insertContraction uprv_cnttab_insertContraction_4_0
+#define uprv_cnttab_isTailored uprv_cnttab_isTailored_4_0
+#define uprv_cnttab_open uprv_cnttab_open_4_0
+#define uprv_cnttab_setContraction uprv_cnttab_setContraction_4_0
+#define uprv_compareASCIIPropertyNames uprv_compareASCIIPropertyNames_4_0
+#define uprv_compareEBCDICPropertyNames uprv_compareEBCDICPropertyNames_4_0
+#define uprv_compareInvAscii uprv_compareInvAscii_4_0
+#define uprv_compareInvEbcdic uprv_compareInvEbcdic_4_0
+#define uprv_convertToLCID uprv_convertToLCID_4_0
+#define uprv_convertToPosix uprv_convertToPosix_4_0
+#define uprv_copyAscii uprv_copyAscii_4_0
+#define uprv_copyEbcdic uprv_copyEbcdic_4_0
+#define uprv_ebcdicFromAscii uprv_ebcdicFromAscii_4_0
+#define uprv_ebcdictolower uprv_ebcdictolower_4_0
+#define uprv_fabs uprv_fabs_4_0
+#define uprv_floor uprv_floor_4_0
+#define uprv_fmax uprv_fmax_4_0
+#define uprv_fmin uprv_fmin_4_0
+#define uprv_fmod uprv_fmod_4_0
+#define uprv_free uprv_free_4_0
+#define uprv_getCharNameCharacters uprv_getCharNameCharacters_4_0
+#define uprv_getDefaultCodepage uprv_getDefaultCodepage_4_0
+#define uprv_getDefaultLocaleID uprv_getDefaultLocaleID_4_0
+#define uprv_getInfinity uprv_getInfinity_4_0
+#define uprv_getMaxCharNameLength uprv_getMaxCharNameLength_4_0
+#define uprv_getMaxValues uprv_getMaxValues_4_0
+#define uprv_getNaN uprv_getNaN_4_0
+#define uprv_getStaticCurrencyName uprv_getStaticCurrencyName_4_0
+#define uprv_getUTCtime uprv_getUTCtime_4_0
+#define uprv_haveProperties uprv_haveProperties_4_0
+#define uprv_init_collIterate uprv_init_collIterate_4_0
+#define uprv_init_pce uprv_init_pce_4_0
+#define uprv_int32Comparator uprv_int32Comparator_4_0
+#define uprv_isInfinite uprv_isInfinite_4_0
+#define uprv_isInvariantString uprv_isInvariantString_4_0
+#define uprv_isInvariantUString uprv_isInvariantUString_4_0
+#define uprv_isNaN uprv_isNaN_4_0
+#define uprv_isNegativeInfinity uprv_isNegativeInfinity_4_0
+#define uprv_isPositiveInfinity uprv_isPositiveInfinity_4_0
+#define uprv_isRuleWhiteSpace uprv_isRuleWhiteSpace_4_0
+#define uprv_itou uprv_itou_4_0
+#define uprv_log uprv_log_4_0
+#define uprv_malloc uprv_malloc_4_0
+#define uprv_mapFile uprv_mapFile_4_0
+#define uprv_max uprv_max_4_0
+#define uprv_maxMantissa uprv_maxMantissa_4_0
+#define uprv_maximumPtr uprv_maximumPtr_4_0
+#define uprv_min uprv_min_4_0
+#define uprv_modf uprv_modf_4_0
+#define uprv_openRuleWhiteSpaceSet uprv_openRuleWhiteSpaceSet_4_0
+#define uprv_parseCurrency uprv_parseCurrency_4_0
+#define uprv_pathIsAbsolute uprv_pathIsAbsolute_4_0
+#define uprv_pow uprv_pow_4_0
+#define uprv_pow10 uprv_pow10_4_0
+#define uprv_realloc uprv_realloc_4_0
+#define uprv_round uprv_round_4_0
+#define uprv_sortArray uprv_sortArray_4_0
+#define uprv_strCompare uprv_strCompare_4_0
+#define uprv_strdup uprv_strdup_4_0
+#define uprv_strndup uprv_strndup_4_0
+#define uprv_syntaxError uprv_syntaxError_4_0
+#define uprv_timezone uprv_timezone_4_0
+#define uprv_toupper uprv_toupper_4_0
+#define uprv_trunc uprv_trunc_4_0
+#define uprv_tzname uprv_tzname_4_0
+#define uprv_tzset uprv_tzset_4_0
+#define uprv_uca_addAnElement uprv_uca_addAnElement_4_0
+#define uprv_uca_assembleTable uprv_uca_assembleTable_4_0
+#define uprv_uca_canonicalClosure uprv_uca_canonicalClosure_4_0
+#define uprv_uca_closeTempTable uprv_uca_closeTempTable_4_0
+#define uprv_uca_getCodePointFromRaw uprv_uca_getCodePointFromRaw_4_0
+#define uprv_uca_getImplicitFromRaw uprv_uca_getImplicitFromRaw_4_0
+#define uprv_uca_getRawFromCodePoint uprv_uca_getRawFromCodePoint_4_0
+#define uprv_uca_getRawFromImplicit uprv_uca_getRawFromImplicit_4_0
+#define uprv_uca_initImplicitConstants uprv_uca_initImplicitConstants_4_0
+#define uprv_uca_initTempTable uprv_uca_initTempTable_4_0
+#define uprv_uint16Comparator uprv_uint16Comparator_4_0
+#define uprv_uint32Comparator uprv_uint32Comparator_4_0
+#define uprv_unmapFile uprv_unmapFile_4_0
+#define uregex_appendReplacement uregex_appendReplacement_4_0
+#define uregex_appendTail uregex_appendTail_4_0
+#define uregex_clone uregex_clone_4_0
+#define uregex_close uregex_close_4_0
+#define uregex_end uregex_end_4_0
+#define uregex_find uregex_find_4_0
+#define uregex_findNext uregex_findNext_4_0
+#define uregex_flags uregex_flags_4_0
+#define uregex_getMatchCallback uregex_getMatchCallback_4_0
+#define uregex_getStackLimit uregex_getStackLimit_4_0
+#define uregex_getText uregex_getText_4_0
+#define uregex_getTimeLimit uregex_getTimeLimit_4_0
+#define uregex_group uregex_group_4_0
+#define uregex_groupCount uregex_groupCount_4_0
+#define uregex_hasAnchoringBounds uregex_hasAnchoringBounds_4_0
+#define uregex_hasTransparentBounds uregex_hasTransparentBounds_4_0
+#define uregex_hitEnd uregex_hitEnd_4_0
+#define uregex_lookingAt uregex_lookingAt_4_0
+#define uregex_matches uregex_matches_4_0
+#define uregex_open uregex_open_4_0
+#define uregex_openC uregex_openC_4_0
+#define uregex_pattern uregex_pattern_4_0
+#define uregex_regionEnd uregex_regionEnd_4_0
+#define uregex_regionStart uregex_regionStart_4_0
+#define uregex_replaceAll uregex_replaceAll_4_0
+#define uregex_replaceFirst uregex_replaceFirst_4_0
+#define uregex_requireEnd uregex_requireEnd_4_0
+#define uregex_reset uregex_reset_4_0
+#define uregex_setMatchCallback uregex_setMatchCallback_4_0
+#define uregex_setRegion uregex_setRegion_4_0
+#define uregex_setStackLimit uregex_setStackLimit_4_0
+#define uregex_setText uregex_setText_4_0
+#define uregex_setTimeLimit uregex_setTimeLimit_4_0
+#define uregex_split uregex_split_4_0
+#define uregex_start uregex_start_4_0
+#define uregex_useAnchoringBounds uregex_useAnchoringBounds_4_0
+#define uregex_useTransparentBounds uregex_useTransparentBounds_4_0
+#define ures_close ures_close_4_0
+#define ures_copyResb ures_copyResb_4_0
+#define ures_countArrayItems ures_countArrayItems_4_0
+#define ures_findResource ures_findResource_4_0
+#define ures_findSubResource ures_findSubResource_4_0
+#define ures_getBinary ures_getBinary_4_0
+#define ures_getByIndex ures_getByIndex_4_0
+#define ures_getByKey ures_getByKey_4_0
+#define ures_getByKeyWithFallback ures_getByKeyWithFallback_4_0
+#define ures_getFunctionalEquivalent ures_getFunctionalEquivalent_4_0
+#define ures_getInt ures_getInt_4_0
+#define ures_getIntVector ures_getIntVector_4_0
+#define ures_getKey ures_getKey_4_0
+#define ures_getKeywordValues ures_getKeywordValues_4_0
+#define ures_getLocale ures_getLocale_4_0
+#define ures_getLocaleByType ures_getLocaleByType_4_0
+#define ures_getName ures_getName_4_0
+#define ures_getNextResource ures_getNextResource_4_0
+#define ures_getNextString ures_getNextString_4_0
+#define ures_getSize ures_getSize_4_0
+#define ures_getString ures_getString_4_0
+#define ures_getStringByIndex ures_getStringByIndex_4_0
+#define ures_getStringByKey ures_getStringByKey_4_0
+#define ures_getStringByKeyWithFallback ures_getStringByKeyWithFallback_4_0
+#define ures_getType ures_getType_4_0
+#define ures_getUInt ures_getUInt_4_0
+#define ures_getUTF8String ures_getUTF8String_4_0
+#define ures_getUTF8StringByIndex ures_getUTF8StringByIndex_4_0
+#define ures_getUTF8StringByKey ures_getUTF8StringByKey_4_0
+#define ures_getVersion ures_getVersion_4_0
+#define ures_getVersionNumber ures_getVersionNumber_4_0
+#define ures_hasNext ures_hasNext_4_0
+#define ures_initStackObject ures_initStackObject_4_0
+#define ures_open ures_open_4_0
+#define ures_openAvailableLocales ures_openAvailableLocales_4_0
+#define ures_openDirect ures_openDirect_4_0
+#define ures_openFillIn ures_openFillIn_4_0
+#define ures_openU ures_openU_4_0
+#define ures_resetIterator ures_resetIterator_4_0
+#define ures_swap ures_swap_4_0
+#define uscript_closeRun uscript_closeRun_4_0
+#define uscript_getCode uscript_getCode_4_0
+#define uscript_getName uscript_getName_4_0
+#define uscript_getScript uscript_getScript_4_0
+#define uscript_getShortName uscript_getShortName_4_0
+#define uscript_nextRun uscript_nextRun_4_0
+#define uscript_openRun uscript_openRun_4_0
+#define uscript_resetRun uscript_resetRun_4_0
+#define uscript_setRunText uscript_setRunText_4_0
+#define usearch_close usearch_close_4_0
+#define usearch_first usearch_first_4_0
+#define usearch_following usearch_following_4_0
+#define usearch_getAttribute usearch_getAttribute_4_0
+#define usearch_getBreakIterator usearch_getBreakIterator_4_0
+#define usearch_getCollator usearch_getCollator_4_0
+#define usearch_getMatchedLength usearch_getMatchedLength_4_0
+#define usearch_getMatchedStart usearch_getMatchedStart_4_0
+#define usearch_getMatchedText usearch_getMatchedText_4_0
+#define usearch_getOffset usearch_getOffset_4_0
+#define usearch_getPattern usearch_getPattern_4_0
+#define usearch_getText usearch_getText_4_0
+#define usearch_handleNextCanonical usearch_handleNextCanonical_4_0
+#define usearch_handleNextExact usearch_handleNextExact_4_0
+#define usearch_handlePreviousCanonical usearch_handlePreviousCanonical_4_0
+#define usearch_handlePreviousExact usearch_handlePreviousExact_4_0
+#define usearch_last usearch_last_4_0
+#define usearch_next usearch_next_4_0
+#define usearch_open usearch_open_4_0
+#define usearch_openFromCollator usearch_openFromCollator_4_0
+#define usearch_preceding usearch_preceding_4_0
+#define usearch_previous usearch_previous_4_0
+#define usearch_reset usearch_reset_4_0
+#define usearch_search usearch_search_4_0
+#define usearch_searchBackwards usearch_searchBackwards_4_0
+#define usearch_setAttribute usearch_setAttribute_4_0
+#define usearch_setBreakIterator usearch_setBreakIterator_4_0
+#define usearch_setCollator usearch_setCollator_4_0
+#define usearch_setOffset usearch_setOffset_4_0
+#define usearch_setPattern usearch_setPattern_4_0
+#define usearch_setText usearch_setText_4_0
+#define uset_add uset_add_4_0
+#define uset_addAll uset_addAll_4_0
+#define uset_addAllCodePoints uset_addAllCodePoints_4_0
+#define uset_addRange uset_addRange_4_0
+#define uset_addString uset_addString_4_0
+#define uset_applyIntPropertyValue uset_applyIntPropertyValue_4_0
+#define uset_applyPattern uset_applyPattern_4_0
+#define uset_applyPropertyAlias uset_applyPropertyAlias_4_0
+#define uset_charAt uset_charAt_4_0
+#define uset_clear uset_clear_4_0
+#define uset_clone uset_clone_4_0
+#define uset_cloneAsThawed uset_cloneAsThawed_4_0
+#define uset_close uset_close_4_0
+#define uset_compact uset_compact_4_0
+#define uset_complement uset_complement_4_0
+#define uset_complementAll uset_complementAll_4_0
+#define uset_contains uset_contains_4_0
+#define uset_containsAll uset_containsAll_4_0
+#define uset_containsAllCodePoints uset_containsAllCodePoints_4_0
+#define uset_containsNone uset_containsNone_4_0
+#define uset_containsRange uset_containsRange_4_0
+#define uset_containsSome uset_containsSome_4_0
+#define uset_containsString uset_containsString_4_0
+#define uset_equals uset_equals_4_0
+#define uset_freeze uset_freeze_4_0
+#define uset_getItem uset_getItem_4_0
+#define uset_getItemCount uset_getItemCount_4_0
+#define uset_getSerializedRange uset_getSerializedRange_4_0
+#define uset_getSerializedRangeCount uset_getSerializedRangeCount_4_0
+#define uset_getSerializedSet uset_getSerializedSet_4_0
+#define uset_indexOf uset_indexOf_4_0
+#define uset_isEmpty uset_isEmpty_4_0
+#define uset_isFrozen uset_isFrozen_4_0
+#define uset_open uset_open_4_0
+#define uset_openPattern uset_openPattern_4_0
+#define uset_openPatternOptions uset_openPatternOptions_4_0
+#define uset_remove uset_remove_4_0
+#define uset_removeAll uset_removeAll_4_0
+#define uset_removeRange uset_removeRange_4_0
+#define uset_removeString uset_removeString_4_0
+#define uset_resemblesPattern uset_resemblesPattern_4_0
+#define uset_retain uset_retain_4_0
+#define uset_retainAll uset_retainAll_4_0
+#define uset_serialize uset_serialize_4_0
+#define uset_serializedContains uset_serializedContains_4_0
+#define uset_set uset_set_4_0
+#define uset_setSerializedToOne uset_setSerializedToOne_4_0
+#define uset_size uset_size_4_0
+#define uset_span uset_span_4_0
+#define uset_spanBack uset_spanBack_4_0
+#define uset_spanBackUTF8 uset_spanBackUTF8_4_0
+#define uset_spanUTF8 uset_spanUTF8_4_0
+#define uset_toPattern uset_toPattern_4_0
+#define usprep_close usprep_close_4_0
+#define usprep_open usprep_open_4_0
+#define usprep_prepare usprep_prepare_4_0
+#define usprep_swap usprep_swap_4_0
+#define ustr_foldCase ustr_foldCase_4_0
+#define ustr_toLower ustr_toLower_4_0
+#define ustr_toTitle ustr_toTitle_4_0
+#define ustr_toUpper ustr_toUpper_4_0
+#define utext_char32At utext_char32At_4_0
+#define utext_clone utext_clone_4_0
+#define utext_close utext_close_4_0
+#define utext_copy utext_copy_4_0
+#define utext_current32 utext_current32_4_0
+#define utext_equals utext_equals_4_0
+#define utext_extract utext_extract_4_0
+#define utext_freeze utext_freeze_4_0
+#define utext_getNativeIndex utext_getNativeIndex_4_0
+#define utext_getPreviousNativeIndex utext_getPreviousNativeIndex_4_0
+#define utext_hasMetaData utext_hasMetaData_4_0
+#define utext_isLengthExpensive utext_isLengthExpensive_4_0
+#define utext_isWritable utext_isWritable_4_0
+#define utext_moveIndex32 utext_moveIndex32_4_0
+#define utext_nativeLength utext_nativeLength_4_0
+#define utext_next32 utext_next32_4_0
+#define utext_next32From utext_next32From_4_0
+#define utext_openCharacterIterator utext_openCharacterIterator_4_0
+#define utext_openConstUnicodeString utext_openConstUnicodeString_4_0
+#define utext_openReplaceable utext_openReplaceable_4_0
+#define utext_openUChars utext_openUChars_4_0
+#define utext_openUTF8 utext_openUTF8_4_0
+#define utext_openUnicodeString utext_openUnicodeString_4_0
+#define utext_previous32 utext_previous32_4_0
+#define utext_previous32From utext_previous32From_4_0
+#define utext_replace utext_replace_4_0
+#define utext_setNativeIndex utext_setNativeIndex_4_0
+#define utext_setup utext_setup_4_0
+#define utf8_appendCharSafeBody utf8_appendCharSafeBody_4_0
+#define utf8_back1SafeBody utf8_back1SafeBody_4_0
+#define utf8_countTrailBytes utf8_countTrailBytes_4_0
+#define utf8_nextCharSafeBody utf8_nextCharSafeBody_4_0
+#define utf8_prevCharSafeBody utf8_prevCharSafeBody_4_0
+#define utmscale_fromInt64 utmscale_fromInt64_4_0
+#define utmscale_getTimeScaleValue utmscale_getTimeScaleValue_4_0
+#define utmscale_toInt64 utmscale_toInt64_4_0
+#define utrace_cleanup utrace_cleanup_4_0
+#define utrace_data utrace_data_4_0
+#define utrace_entry utrace_entry_4_0
+#define utrace_exit utrace_exit_4_0
+#define utrace_format utrace_format_4_0
+#define utrace_functionName utrace_functionName_4_0
+#define utrace_getFunctions utrace_getFunctions_4_0
+#define utrace_getLevel utrace_getLevel_4_0
+#define utrace_level utrace_level_4_0
+#define utrace_setFunctions utrace_setFunctions_4_0
+#define utrace_setLevel utrace_setLevel_4_0
+#define utrace_vformat utrace_vformat_4_0
+#define utrans_clone utrans_clone_4_0
+#define utrans_close utrans_close_4_0
+#define utrans_countAvailableIDs utrans_countAvailableIDs_4_0
+#define utrans_getAvailableID utrans_getAvailableID_4_0
+#define utrans_getID utrans_getID_4_0
+#define utrans_getUnicodeID utrans_getUnicodeID_4_0
+#define utrans_open utrans_open_4_0
+#define utrans_openIDs utrans_openIDs_4_0
+#define utrans_openInverse utrans_openInverse_4_0
+#define utrans_openU utrans_openU_4_0
+#define utrans_register utrans_register_4_0
+#define utrans_rep_caseContextIterator utrans_rep_caseContextIterator_4_0
+#define utrans_setFilter utrans_setFilter_4_0
+#define utrans_stripRules utrans_stripRules_4_0
+#define utrans_trans utrans_trans_4_0
+#define utrans_transIncremental utrans_transIncremental_4_0
+#define utrans_transIncrementalUChars utrans_transIncrementalUChars_4_0
+#define utrans_transUChars utrans_transUChars_4_0
+#define utrans_unregister utrans_unregister_4_0
+#define utrans_unregisterID utrans_unregisterID_4_0
+#define utrie_clone utrie_clone_4_0
+#define utrie_close utrie_close_4_0
+#define utrie_defaultGetFoldingOffset utrie_defaultGetFoldingOffset_4_0
+#define utrie_enum utrie_enum_4_0
+#define utrie_get32 utrie_get32_4_0
+#define utrie_getData utrie_getData_4_0
+#define utrie_open utrie_open_4_0
+#define utrie_serialize utrie_serialize_4_0
+#define utrie_set32 utrie_set32_4_0
+#define utrie_setRange32 utrie_setRange32_4_0
+#define utrie_swap utrie_swap_4_0
+#define utrie_unserialize utrie_unserialize_4_0
+#define utrie_unserializeDummy utrie_unserializeDummy_4_0
+/* C++ class names renaming defines */
+
+#ifdef XP_CPLUSPLUS
+#if !U_HAVE_NAMESPACE
+
+#define AbsoluteValueSubstitution AbsoluteValueSubstitution_4_0
+#define AlternateSubstitutionSubtable AlternateSubstitutionSubtable_4_0
+#define AnchorTable AnchorTable_4_0
+#define AndConstraint AndConstraint_4_0
+#define AnnualTimeZoneRule AnnualTimeZoneRule_4_0
+#define AnyTransliterator AnyTransliterator_4_0
+#define ArabicOpenTypeLayoutEngine ArabicOpenTypeLayoutEngine_4_0
+#define ArabicShaping ArabicShaping_4_0
+#define BMPSet BMPSet_4_0
+#define BasicCalendarFactory BasicCalendarFactory_4_0
+#define BasicTimeZone BasicTimeZone_4_0
+#define BinarySearchLookupTable BinarySearchLookupTable_4_0
+#define BreakIterator BreakIterator_4_0
+#define BreakTransliterator BreakTransliterator_4_0
+#define BuddhistCalendar BuddhistCalendar_4_0
+#define BuildCompactTrieHorizontalNode BuildCompactTrieHorizontalNode_4_0
+#define BuildCompactTrieNode BuildCompactTrieNode_4_0
+#define BuildCompactTrieVerticalNode BuildCompactTrieVerticalNode_4_0
+#define CEBuffer CEBuffer_4_0
+#define CECalendar CECalendar_4_0
+#define CFactory CFactory_4_0
+#define Calendar Calendar_4_0
+#define CalendarAstronomer CalendarAstronomer_4_0
+#define CalendarCache CalendarCache_4_0
+#define CalendarData CalendarData_4_0
+#define CalendarService CalendarService_4_0
+#define CanonMarkFilter CanonMarkFilter_4_0
+#define CanonShaping CanonShaping_4_0
+#define CanonicalIterator CanonicalIterator_4_0
+#define CaseMapTransliterator CaseMapTransliterator_4_0
+#define ChainingContextualSubstitutionFormat1Subtable ChainingContextualSubstitutionFormat1Subtable_4_0
+#define ChainingContextualSubstitutionFormat2Subtable ChainingContextualSubstitutionFormat2Subtable_4_0
+#define ChainingContextualSubstitutionFormat3Subtable ChainingContextualSubstitutionFormat3Subtable_4_0
+#define ChainingContextualSubstitutionSubtable ChainingContextualSubstitutionSubtable_4_0
+#define CharSubstitutionFilter CharSubstitutionFilter_4_0
+#define CharacterIterator CharacterIterator_4_0
+#define CharacterNode CharacterNode_4_0
+#define CharsetDetector CharsetDetector_4_0
+#define CharsetMatch CharsetMatch_4_0
+#define CharsetRecog_2022 CharsetRecog_2022_4_0
+#define CharsetRecog_2022CN CharsetRecog_2022CN_4_0
+#define CharsetRecog_2022JP CharsetRecog_2022JP_4_0
+#define CharsetRecog_2022KR CharsetRecog_2022KR_4_0
+#define CharsetRecog_8859_1 CharsetRecog_8859_1_4_0
+#define CharsetRecog_8859_1_da CharsetRecog_8859_1_da_4_0
+#define CharsetRecog_8859_1_de CharsetRecog_8859_1_de_4_0
+#define CharsetRecog_8859_1_en CharsetRecog_8859_1_en_4_0
+#define CharsetRecog_8859_1_es CharsetRecog_8859_1_es_4_0
+#define CharsetRecog_8859_1_fr CharsetRecog_8859_1_fr_4_0
+#define CharsetRecog_8859_1_it CharsetRecog_8859_1_it_4_0
+#define CharsetRecog_8859_1_nl CharsetRecog_8859_1_nl_4_0
+#define CharsetRecog_8859_1_no CharsetRecog_8859_1_no_4_0
+#define CharsetRecog_8859_1_pt CharsetRecog_8859_1_pt_4_0
+#define CharsetRecog_8859_1_sv CharsetRecog_8859_1_sv_4_0
+#define CharsetRecog_8859_2 CharsetRecog_8859_2_4_0
+#define CharsetRecog_8859_2_cs CharsetRecog_8859_2_cs_4_0
+#define CharsetRecog_8859_2_hu CharsetRecog_8859_2_hu_4_0
+#define CharsetRecog_8859_2_pl CharsetRecog_8859_2_pl_4_0
+#define CharsetRecog_8859_2_ro CharsetRecog_8859_2_ro_4_0
+#define CharsetRecog_8859_5 CharsetRecog_8859_5_4_0
+#define CharsetRecog_8859_5_ru CharsetRecog_8859_5_ru_4_0
+#define CharsetRecog_8859_6 CharsetRecog_8859_6_4_0
+#define CharsetRecog_8859_6_ar CharsetRecog_8859_6_ar_4_0
+#define CharsetRecog_8859_7 CharsetRecog_8859_7_4_0
+#define CharsetRecog_8859_7_el CharsetRecog_8859_7_el_4_0
+#define CharsetRecog_8859_8 CharsetRecog_8859_8_4_0
+#define CharsetRecog_8859_8_I_he CharsetRecog_8859_8_I_he_4_0
+#define CharsetRecog_8859_8_he CharsetRecog_8859_8_he_4_0
+#define CharsetRecog_8859_9 CharsetRecog_8859_9_4_0
+#define CharsetRecog_8859_9_tr CharsetRecog_8859_9_tr_4_0
+#define CharsetRecog_KOI8_R CharsetRecog_KOI8_R_4_0
+#define CharsetRecog_UTF8 CharsetRecog_UTF8_4_0
+#define CharsetRecog_UTF_16_BE CharsetRecog_UTF_16_BE_4_0
+#define CharsetRecog_UTF_16_LE CharsetRecog_UTF_16_LE_4_0
+#define CharsetRecog_UTF_32 CharsetRecog_UTF_32_4_0
+#define CharsetRecog_UTF_32_BE CharsetRecog_UTF_32_BE_4_0
+#define CharsetRecog_UTF_32_LE CharsetRecog_UTF_32_LE_4_0
+#define CharsetRecog_Unicode CharsetRecog_Unicode_4_0
+#define CharsetRecog_big5 CharsetRecog_big5_4_0
+#define CharsetRecog_euc CharsetRecog_euc_4_0
+#define CharsetRecog_euc_jp CharsetRecog_euc_jp_4_0
+#define CharsetRecog_euc_kr CharsetRecog_euc_kr_4_0
+#define CharsetRecog_gb_18030 CharsetRecog_gb_18030_4_0
+#define CharsetRecog_mbcs CharsetRecog_mbcs_4_0
+#define CharsetRecog_sbcs CharsetRecog_sbcs_4_0
+#define CharsetRecog_sjis CharsetRecog_sjis_4_0
+#define CharsetRecog_windows_1251 CharsetRecog_windows_1251_4_0
+#define CharsetRecog_windows_1256 CharsetRecog_windows_1256_4_0
+#define CharsetRecognizer CharsetRecognizer_4_0
+#define ChineseCalendar ChineseCalendar_4_0
+#define ChoiceFormat ChoiceFormat_4_0
+#define ClassDefFormat1Table ClassDefFormat1Table_4_0
+#define ClassDefFormat2Table ClassDefFormat2Table_4_0
+#define ClassDefinitionTable ClassDefinitionTable_4_0
+#define CollationElementIterator CollationElementIterator_4_0
+#define CollationKey CollationKey_4_0
+#define CollationLocaleListEnumeration CollationLocaleListEnumeration_4_0
+#define Collator Collator_4_0
+#define CollatorFactory CollatorFactory_4_0
+#define CompactTrieDictionary CompactTrieDictionary_4_0
+#define CompactTrieEnumeration CompactTrieEnumeration_4_0
+#define CompoundTransliterator CompoundTransliterator_4_0
+#define ContextualGlyphSubstitutionProcessor ContextualGlyphSubstitutionProcessor_4_0
+#define ContextualSubstitutionBase ContextualSubstitutionBase_4_0
+#define ContextualSubstitutionFormat1Subtable ContextualSubstitutionFormat1Subtable_4_0
+#define ContextualSubstitutionFormat2Subtable ContextualSubstitutionFormat2Subtable_4_0
+#define ContextualSubstitutionFormat3Subtable ContextualSubstitutionFormat3Subtable_4_0
+#define ContextualSubstitutionSubtable ContextualSubstitutionSubtable_4_0
+#define CopticCalendar CopticCalendar_4_0
+#define CoverageFormat1Table CoverageFormat1Table_4_0
+#define CoverageFormat2Table CoverageFormat2Table_4_0
+#define CoverageTable CoverageTable_4_0
+#define CurrencyAmount CurrencyAmount_4_0
+#define CurrencyFormat CurrencyFormat_4_0
+#define CurrencyUnit CurrencyUnit_4_0
+#define CursiveAttachmentSubtable CursiveAttachmentSubtable_4_0
+#define DTRedundantEnumeration DTRedundantEnumeration_4_0
+#define DTSkeletonEnumeration DTSkeletonEnumeration_4_0
+#define DateFormat DateFormat_4_0
+#define DateFormatSymbols DateFormatSymbols_4_0
+#define DateInterval DateInterval_4_0
+#define DateIntervalFormat DateIntervalFormat_4_0
+#define DateIntervalInfo DateIntervalInfo_4_0
+#define DateTimeMatcher DateTimeMatcher_4_0
+#define DateTimePatternGenerator DateTimePatternGenerator_4_0
+#define DateTimeRule DateTimeRule_4_0
+#define DecimalFormat DecimalFormat_4_0
+#define DecimalFormatSymbols DecimalFormatSymbols_4_0
+#define DefaultCalendarFactory DefaultCalendarFactory_4_0
+#define DefaultCharMapper DefaultCharMapper_4_0
+#define DeviceTable DeviceTable_4_0
+#define DictionaryBreakEngine DictionaryBreakEngine_4_0
+#define DigitList DigitList_4_0
+#define DistanceInfo DistanceInfo_4_0
+#define Entry Entry_4_0
+#define EnumToOffset EnumToOffset_4_0
+#define EscapeTransliterator EscapeTransliterator_4_0
+#define EthiopicCalendar EthiopicCalendar_4_0
+#define EventListener EventListener_4_0
+#define ExtensionSubtable ExtensionSubtable_4_0
+#define FeatureListTable FeatureListTable_4_0
+#define FieldPosition FieldPosition_4_0
+#define FontRuns FontRuns_4_0
+#define Format Format_4_0
+#define Format1AnchorTable Format1AnchorTable_4_0
+#define Format2AnchorTable Format2AnchorTable_4_0
+#define Format3AnchorTable Format3AnchorTable_4_0
+#define FormatNameEnumeration FormatNameEnumeration_4_0
+#define FormatParser FormatParser_4_0
+#define Formattable Formattable_4_0
+#define ForwardCharacterIterator ForwardCharacterIterator_4_0
+#define FractionalPartSubstitution FractionalPartSubstitution_4_0
+#define FunctionReplacer FunctionReplacer_4_0
+#define GDEFMarkFilter GDEFMarkFilter_4_0
+#define GXLayoutEngine GXLayoutEngine_4_0
+#define GlyphDefinitionTableHeader GlyphDefinitionTableHeader_4_0
+#define GlyphIterator GlyphIterator_4_0
+#define GlyphLookupTableHeader GlyphLookupTableHeader_4_0
+#define GlyphPositionAdjustments GlyphPositionAdjustments_4_0
+#define GlyphPositioningLookupProcessor GlyphPositioningLookupProcessor_4_0
+#define GlyphPositioningTableHeader GlyphPositioningTableHeader_4_0
+#define GlyphSubstitutionLookupProcessor GlyphSubstitutionLookupProcessor_4_0
+#define GlyphSubstitutionTableHeader GlyphSubstitutionTableHeader_4_0
+#define Grego Grego_4_0
+#define GregorianCalendar GregorianCalendar_4_0
+#define HanOpenTypeLayoutEngine HanOpenTypeLayoutEngine_4_0
+#define HangulOpenTypeLayoutEngine HangulOpenTypeLayoutEngine_4_0
+#define HebrewCalendar HebrewCalendar_4_0
+#define ICUBreakIteratorFactory ICUBreakIteratorFactory_4_0
+#define ICUBreakIteratorService ICUBreakIteratorService_4_0
+#define ICUCollatorFactory ICUCollatorFactory_4_0
+#define ICUCollatorService ICUCollatorService_4_0
+#define ICULanguageBreakFactory ICULanguageBreakFactory_4_0
+#define ICULocaleService ICULocaleService_4_0
+#define ICUNotifier ICUNotifier_4_0
+#define ICUNumberFormatFactory ICUNumberFormatFactory_4_0
+#define ICUNumberFormatService ICUNumberFormatService_4_0
+#define ICUResourceBundleFactory ICUResourceBundleFactory_4_0
+#define ICUService ICUService_4_0
+#define ICUServiceFactory ICUServiceFactory_4_0
+#define ICUServiceKey ICUServiceKey_4_0
+#define ICU_Utility ICU_Utility_4_0
+#define IndianCalendar IndianCalendar_4_0
+#define IndicClassTable IndicClassTable_4_0
+#define IndicOpenTypeLayoutEngine IndicOpenTypeLayoutEngine_4_0
+#define IndicRearrangementProcessor IndicRearrangementProcessor_4_0
+#define IndicReordering IndicReordering_4_0
+#define InitialTimeZoneRule InitialTimeZoneRule_4_0
+#define InputText InputText_4_0
+#define IntegralPartSubstitution IntegralPartSubstitution_4_0
+#define IslamicCalendar IslamicCalendar_4_0
+#define IteratedChar IteratedChar_4_0
+#define JapaneseCalendar JapaneseCalendar_4_0
+#define KernTable KernTable_4_0
+#define KeywordEnumeration KeywordEnumeration_4_0
+#define KhmerClassTable KhmerClassTable_4_0
+#define KhmerOpenTypeLayoutEngine KhmerOpenTypeLayoutEngine_4_0
+#define KhmerReordering KhmerReordering_4_0
+#define LECharMapper LECharMapper_4_0
+#define LEFontInstance LEFontInstance_4_0
+#define LEGlyphFilter LEGlyphFilter_4_0
+#define LEGlyphStorage LEGlyphStorage_4_0
+#define LEInsertionCallback LEInsertionCallback_4_0
+#define LEInsertionList LEInsertionList_4_0
+#define LXUtilities LXUtilities_4_0
+#define LanguageBreakEngine LanguageBreakEngine_4_0
+#define LanguageBreakFactory LanguageBreakFactory_4_0
+#define LayoutEngine LayoutEngine_4_0
+#define LigatureSubstitutionProcessor LigatureSubstitutionProcessor_4_0
+#define LigatureSubstitutionSubtable LigatureSubstitutionSubtable_4_0
+#define LocDataParser LocDataParser_4_0
+#define Locale Locale_4_0
+#define LocaleBased LocaleBased_4_0
+#define LocaleKey LocaleKey_4_0
+#define LocaleKeyFactory LocaleKeyFactory_4_0
+#define LocaleRuns LocaleRuns_4_0
+#define LocaleUtility LocaleUtility_4_0
+#define LocalizationInfo LocalizationInfo_4_0
+#define LookupListTable LookupListTable_4_0
+#define LookupProcessor LookupProcessor_4_0
+#define LookupSubtable LookupSubtable_4_0
+#define LookupTable LookupTable_4_0
+#define LowercaseTransliterator LowercaseTransliterator_4_0
+#define MPreFixups MPreFixups_4_0
+#define MarkArray MarkArray_4_0
+#define MarkToBasePositioningSubtable MarkToBasePositioningSubtable_4_0
+#define MarkToLigaturePositioningSubtable MarkToLigaturePositioningSubtable_4_0
+#define MarkToMarkPositioningSubtable MarkToMarkPositioningSubtable_4_0
+#define Math Math_4_0
+#define Measure Measure_4_0
+#define MeasureFormat MeasureFormat_4_0
+#define MeasureUnit MeasureUnit_4_0
+#define MessageFormat MessageFormat_4_0
+#define MessageFormatAdapter MessageFormatAdapter_4_0
+#define ModulusSubstitution ModulusSubstitution_4_0
+#define MoonRiseSetCoordFunc MoonRiseSetCoordFunc_4_0
+#define MoonTimeAngleFunc MoonTimeAngleFunc_4_0
+#define MorphSubtableHeader MorphSubtableHeader_4_0
+#define MorphTableHeader MorphTableHeader_4_0
+#define MultipleSubstitutionSubtable MultipleSubstitutionSubtable_4_0
+#define MultiplierSubstitution MultiplierSubstitution_4_0
+#define MutableTrieDictionary MutableTrieDictionary_4_0
+#define MutableTrieEnumeration MutableTrieEnumeration_4_0
+#define NFFactory NFFactory_4_0
+#define NFRule NFRule_4_0
+#define NFRuleSet NFRuleSet_4_0
+#define NFSubstitution NFSubstitution_4_0
+#define NGramParser NGramParser_4_0
+#define NameToEnum NameToEnum_4_0
+#define NameUnicodeTransliterator NameUnicodeTransliterator_4_0
+#define NonContextualGlyphSubstitutionProcessor NonContextualGlyphSubstitutionProcessor_4_0
+#define NonContiguousEnumToOffset NonContiguousEnumToOffset_4_0
+#define NormalizationTransliterator NormalizationTransliterator_4_0
+#define Normalizer Normalizer_4_0
+#define NullSubstitution NullSubstitution_4_0
+#define NullTransliterator NullTransliterator_4_0
+#define NumberFormat NumberFormat_4_0
+#define NumberFormatFactory NumberFormatFactory_4_0
+#define NumeratorSubstitution NumeratorSubstitution_4_0
+#define OlsonTimeZone OlsonTimeZone_4_0
+#define OpenTypeLayoutEngine OpenTypeLayoutEngine_4_0
+#define OpenTypeUtilities OpenTypeUtilities_4_0
+#define OrConstraint OrConstraint_4_0
+#define PCEBuffer PCEBuffer_4_0
+#define PairPositioningFormat1Subtable PairPositioningFormat1Subtable_4_0
+#define PairPositioningFormat2Subtable PairPositioningFormat2Subtable_4_0
+#define PairPositioningSubtable PairPositioningSubtable_4_0
+#define ParagraphLayout ParagraphLayout_4_0
+#define ParseData ParseData_4_0
+#define ParsePosition ParsePosition_4_0
+#define PatternMap PatternMap_4_0
+#define PatternMapIterator PatternMapIterator_4_0
+#define PersianCalendar PersianCalendar_4_0
+#define PluralFormat PluralFormat_4_0
+#define PluralKeywordEnumeration PluralKeywordEnumeration_4_0
+#define PluralRules PluralRules_4_0
+#define PropertyAliases PropertyAliases_4_0
+#define PtnElem PtnElem_4_0
+#define PtnSkeleton PtnSkeleton_4_0
+#define Quantifier Quantifier_4_0
+#define RBBIDataWrapper RBBIDataWrapper_4_0
+#define RBBINode RBBINode_4_0
+#define RBBIRuleBuilder RBBIRuleBuilder_4_0
+#define RBBIRuleScanner RBBIRuleScanner_4_0
+#define RBBISetBuilder RBBISetBuilder_4_0
+#define RBBIStateDescriptor RBBIStateDescriptor_4_0
+#define RBBISymbolTable RBBISymbolTable_4_0
+#define RBBISymbolTableEntry RBBISymbolTableEntry_4_0
+#define RBBITableBuilder RBBITableBuilder_4_0
+#define RCEBuffer RCEBuffer_4_0
+#define RangeDescriptor RangeDescriptor_4_0
+#define RegexCImpl RegexCImpl_4_0
+#define RegexCompile RegexCompile_4_0
+#define RegexMatcher RegexMatcher_4_0
+#define RegexPattern RegexPattern_4_0
+#define RegexStaticSets RegexStaticSets_4_0
+#define RelativeDateFormat RelativeDateFormat_4_0
+#define RemoveTransliterator RemoveTransliterator_4_0
+#define Replaceable Replaceable_4_0
+#define ReplaceableGlue ReplaceableGlue_4_0
+#define ResourceBundle ResourceBundle_4_0
+#define RiseSetCoordFunc RiseSetCoordFunc_4_0
+#define RuleBasedBreakIterator RuleBasedBreakIterator_4_0
+#define RuleBasedCollator RuleBasedCollator_4_0
+#define RuleBasedNumberFormat RuleBasedNumberFormat_4_0
+#define RuleBasedTimeZone RuleBasedTimeZone_4_0
+#define RuleBasedTransliterator RuleBasedTransliterator_4_0
+#define RuleChain RuleChain_4_0
+#define RuleCharacterIterator RuleCharacterIterator_4_0
+#define RuleHalf RuleHalf_4_0
+#define RuleParser RuleParser_4_0
+#define RunArray RunArray_4_0
+#define SafeZoneStringFormatPtr SafeZoneStringFormatPtr_4_0
+#define SameValueSubstitution SameValueSubstitution_4_0
+#define ScriptListTable ScriptListTable_4_0
+#define ScriptRunIterator ScriptRunIterator_4_0
+#define ScriptTable ScriptTable_4_0
+#define SearchIterator SearchIterator_4_0
+#define SegmentArrayProcessor SegmentArrayProcessor_4_0
+#define SegmentSingleProcessor SegmentSingleProcessor_4_0
+#define ServiceEnumeration ServiceEnumeration_4_0
+#define ServiceListener ServiceListener_4_0
+#define SimpleArrayProcessor SimpleArrayProcessor_4_0
+#define SimpleDateFormat SimpleDateFormat_4_0
+#define SimpleFactory SimpleFactory_4_0
+#define SimpleLocaleKeyFactory SimpleLocaleKeyFactory_4_0
+#define SimpleNumberFormatFactory SimpleNumberFormatFactory_4_0
+#define SimpleTimeZone SimpleTimeZone_4_0
+#define SinglePositioningFormat1Subtable SinglePositioningFormat1Subtable_4_0
+#define SinglePositioningFormat2Subtable SinglePositioningFormat2Subtable_4_0
+#define SinglePositioningSubtable SinglePositioningSubtable_4_0
+#define SingleSubstitutionFormat1Subtable SingleSubstitutionFormat1Subtable_4_0
+#define SingleSubstitutionFormat2Subtable SingleSubstitutionFormat2Subtable_4_0
+#define SingleSubstitutionSubtable SingleSubstitutionSubtable_4_0
+#define SingleTableProcessor SingleTableProcessor_4_0
+#define Spec Spec_4_0
+#define StateTableProcessor StateTableProcessor_4_0
+#define StringCharacterIterator StringCharacterIterator_4_0
+#define StringEnumeration StringEnumeration_4_0
+#define StringLocalizationInfo StringLocalizationInfo_4_0
+#define StringMatcher StringMatcher_4_0
+#define StringPair StringPair_4_0
+#define StringReplacer StringReplacer_4_0
+#define StringSearch StringSearch_4_0
+#define StyleRuns StyleRuns_4_0
+#define SubstitutionLookup SubstitutionLookup_4_0
+#define SubtableProcessor SubtableProcessor_4_0
+#define SunTimeAngleFunc SunTimeAngleFunc_4_0
+#define SymbolTable SymbolTable_4_0
+#define TZEnumeration TZEnumeration_4_0
+#define TaiwanCalendar TaiwanCalendar_4_0
+#define TernaryNode TernaryNode_4_0
+#define TextTrieMap TextTrieMap_4_0
+#define TextTrieMapSearchResultHandler TextTrieMapSearchResultHandler_4_0
+#define ThaiBreakEngine ThaiBreakEngine_4_0
+#define ThaiLayoutEngine ThaiLayoutEngine_4_0
+#define ThaiShaping ThaiShaping_4_0
+#define TibetanClassTable TibetanClassTable_4_0
+#define TibetanOpenTypeLayoutEngine TibetanOpenTypeLayoutEngine_4_0
+#define TibetanReordering TibetanReordering_4_0
+#define TimeArrayTimeZoneRule TimeArrayTimeZoneRule_4_0
+#define TimeZone TimeZone_4_0
+#define TimeZoneRule TimeZoneRule_4_0
+#define TimeZoneTransition TimeZoneTransition_4_0
+#define TitlecaseTransliterator TitlecaseTransliterator_4_0
+#define TransliterationRule TransliterationRule_4_0
+#define TransliterationRuleData TransliterationRuleData_4_0
+#define TransliterationRuleSet TransliterationRuleSet_4_0
+#define Transliterator Transliterator_4_0
+#define TransliteratorAlias TransliteratorAlias_4_0
+#define TransliteratorIDParser TransliteratorIDParser_4_0
+#define TransliteratorParser TransliteratorParser_4_0
+#define TransliteratorRegistry TransliteratorRegistry_4_0
+#define TrieWordDictionary TrieWordDictionary_4_0
+#define TrimmedArrayProcessor TrimmedArrayProcessor_4_0
+#define UCharCharacterIterator UCharCharacterIterator_4_0
+#define UCollationPCE UCollationPCE_4_0
+#define ULocRuns ULocRuns_4_0
+#define UMemory UMemory_4_0
+#define UObject UObject_4_0
+#define URegularExpression URegularExpression_4_0
+#define UStack UStack_4_0
+#define UStringEnumeration UStringEnumeration_4_0
+#define UVector UVector_4_0
+#define UVector32 UVector32_4_0
+#define UnescapeTransliterator UnescapeTransliterator_4_0
+#define UnhandledEngine UnhandledEngine_4_0
+#define UnicodeArabicOpenTypeLayoutEngine UnicodeArabicOpenTypeLayoutEngine_4_0
+#define UnicodeFilter UnicodeFilter_4_0
+#define UnicodeFunctor UnicodeFunctor_4_0
+#define UnicodeMatcher UnicodeMatcher_4_0
+#define UnicodeNameTransliterator UnicodeNameTransliterator_4_0
+#define UnicodeReplacer UnicodeReplacer_4_0
+#define UnicodeSet UnicodeSet_4_0
+#define UnicodeSetIterator UnicodeSetIterator_4_0
+#define UnicodeSetStringSpan UnicodeSetStringSpan_4_0
+#define UnicodeString UnicodeString_4_0
+#define UppercaseTransliterator UppercaseTransliterator_4_0
+#define VTZReader VTZReader_4_0
+#define VTZWriter VTZWriter_4_0
+#define VTimeZone VTimeZone_4_0
+#define ValueRecord ValueRecord_4_0
+#define ValueRuns ValueRuns_4_0
+#define ZSFCache ZSFCache_4_0
+#define ZSFCacheEntry ZSFCacheEntry_4_0
+#define ZoneMeta ZoneMeta_4_0
+#define ZoneStringFormat ZoneStringFormat_4_0
+#define ZoneStringInfo ZoneStringInfo_4_0
+#define ZoneStringSearchResultHandler ZoneStringSearchResultHandler_4_0
+#define ZoneStrings ZoneStrings_4_0
+#define locale_set_default_internal locale_set_default_internal_4_0
+#define util64_fromDouble util64_fromDouble_4_0
+#define util64_pow util64_pow_4_0
+#define util64_tou util64_tou_4_0
+
+#endif
+#endif
+
+#endif
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/urep.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/urep.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/urep.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,155 +0,0 @@
-/*
-******************************************************************************
-*   Copyright (C) 1997-2005, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-******************************************************************************
-*   Date        Name        Description
-*   06/23/00    aliu        Creation.
-******************************************************************************
-*/
-
-#ifndef __UREP_H
-#define __UREP_H
-
-#include "unicode/utypes.h"
-
-U_CDECL_BEGIN
-
-/********************************************************************
- * General Notes
- ********************************************************************
- * TODO
- * Add usage scenario
- * Add test code
- * Talk about pinning
- * Talk about "can truncate result if out of memory"
- */
-
-/********************************************************************
- * Data Structures
- ********************************************************************/
-/**
- * \file
- * \brief C API: Callbacks for UReplacebale
- */
-/**
- * An opaque replaceable text object.  This will be manipulated only
- * through the caller-supplied UReplaceableFunctor struct.  Related
- * to the C++ class Replaceable.
- * This is currently only used in the Transliterator C API, see utrans.h .
- * @stable ICU 2.0
- */
-typedef void* UReplaceable;
-
-/**
- * A set of function pointers that transliterators use to manipulate a
- * UReplaceable.  The caller should supply the required functions to
- * manipulate their text appropriately.  Related to the C++ class
- * Replaceable.
- * @stable ICU 2.0
- */
-typedef struct UReplaceableCallbacks {
-
-    /**
-     * Function pointer that returns the number of UChar code units in
-     * this text.
-     *
-     * @param rep A pointer to "this" UReplaceable object.
-     * @return The length of the text.
-     * @stable ICU 2.0
-     */
-    int32_t (*length)(const UReplaceable* rep);
-
-    /**
-     * Function pointer that returns a UChar code units at the given
-     * offset into this text; 0 <= offset < n, where n is the value
-     * returned by (*length)(rep).  See unistr.h for a description of
-     * charAt() vs. char32At().
-     *
-     * @param rep A pointer to "this" UReplaceable object.
-     * @param offset The index at which to fetch the UChar (code unit).
-     * @return The UChar (code unit) at offset, or U+FFFF if the offset is out of bounds.
-     * @stable ICU 2.0
-     */
-    UChar   (*charAt)(const UReplaceable* rep,
-                      int32_t offset);
-
-    /**
-     * Function pointer that returns a UChar32 code point at the given
-     * offset into this text.  See unistr.h for a description of
-     * charAt() vs. char32At().
-     *
-     * @param rep A pointer to "this" UReplaceable object.
-     * @param offset The index at which to fetch the UChar32 (code point).
-     * @return The UChar32 (code point) at offset, or U+FFFF if the offset is out of bounds.
-     * @stable ICU 2.0
-     */
-    UChar32 (*char32At)(const UReplaceable* rep,
-                        int32_t offset);
-    
-    /**
-     * Function pointer that replaces text between start and limit in
-     * this text with the given text.  Attributes (out of band info)
-     * should be retained.
-     *
-     * @param rep A pointer to "this" UReplaceable object.
-     * @param start the starting index of the text to be replaced,
-     * inclusive.
-     * @param limit the ending index of the text to be replaced,
-     * exclusive.
-     * @param text the new text to replace the UChars from
-     * start..limit-1.
-     * @param textLength the number of UChars at text, or -1 if text
-     * is null-terminated.
-     * @stable ICU 2.0
-     */
-    void    (*replace)(UReplaceable* rep,
-                       int32_t start,
-                       int32_t limit,
-                       const UChar* text,
-                       int32_t textLength);
-    
-    /**
-     * Function pointer that copies the characters in the range
-     * [<tt>start</tt>, <tt>limit</tt>) into the array <tt>dst</tt>.
-     *
-     * @param rep A pointer to "this" UReplaceable object.
-     * @param start offset of first character which will be copied
-     * into the array
-     * @param limit offset immediately following the last character to
-     * be copied
-     * @param dst array in which to copy characters.  The length of
-     * <tt>dst</tt> must be at least <tt>(limit - start)</tt>.
-     * @stable ICU 2.1
-     */
-    void    (*extract)(UReplaceable* rep,
-                       int32_t start,
-                       int32_t limit,
-                       UChar* dst);
-
-    /**
-     * Function pointer that copies text between start and limit in
-     * this text to another index in the text.  Attributes (out of
-     * band info) should be retained.  After this call, there will be
-     * (at least) two copies of the characters originally located at
-     * start..limit-1.
-     *
-     * @param rep A pointer to "this" UReplaceable object.
-     * @param start the starting index of the text to be copied,
-     * inclusive.
-     * @param limit the ending index of the text to be copied,
-     * exclusive.
-     * @param dest the index at which the copy of the UChars should be
-     * inserted.
-     * @stable ICU 2.0
-     */
-    void    (*copy)(UReplaceable* rep,
-                    int32_t start,
-                    int32_t limit,
-                    int32_t dest);    
-
-} UReplaceableCallbacks;
-
-U_CDECL_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/urep.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/urep.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/urep.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/urep.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,155 @@
+/*
+******************************************************************************
+*   Copyright (C) 1997-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+******************************************************************************
+*   Date        Name        Description
+*   06/23/00    aliu        Creation.
+******************************************************************************
+*/
+
+#ifndef __UREP_H
+#define __UREP_H
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/********************************************************************
+ * General Notes
+ ********************************************************************
+ * TODO
+ * Add usage scenario
+ * Add test code
+ * Talk about pinning
+ * Talk about "can truncate result if out of memory"
+ */
+
+/********************************************************************
+ * Data Structures
+ ********************************************************************/
+/**
+ * \file
+ * \brief C API: Callbacks for UReplacebale
+ */
+/**
+ * An opaque replaceable text object.  This will be manipulated only
+ * through the caller-supplied UReplaceableFunctor struct.  Related
+ * to the C++ class Replaceable.
+ * This is currently only used in the Transliterator C API, see utrans.h .
+ * @stable ICU 2.0
+ */
+typedef void* UReplaceable;
+
+/**
+ * A set of function pointers that transliterators use to manipulate a
+ * UReplaceable.  The caller should supply the required functions to
+ * manipulate their text appropriately.  Related to the C++ class
+ * Replaceable.
+ * @stable ICU 2.0
+ */
+typedef struct UReplaceableCallbacks {
+
+    /**
+     * Function pointer that returns the number of UChar code units in
+     * this text.
+     *
+     * @param rep A pointer to "this" UReplaceable object.
+     * @return The length of the text.
+     * @stable ICU 2.0
+     */
+    int32_t (*length)(const UReplaceable* rep);
+
+    /**
+     * Function pointer that returns a UChar code units at the given
+     * offset into this text; 0 <= offset < n, where n is the value
+     * returned by (*length)(rep).  See unistr.h for a description of
+     * charAt() vs. char32At().
+     *
+     * @param rep A pointer to "this" UReplaceable object.
+     * @param offset The index at which to fetch the UChar (code unit).
+     * @return The UChar (code unit) at offset, or U+FFFF if the offset is out of bounds.
+     * @stable ICU 2.0
+     */
+    UChar   (*charAt)(const UReplaceable* rep,
+                      int32_t offset);
+
+    /**
+     * Function pointer that returns a UChar32 code point at the given
+     * offset into this text.  See unistr.h for a description of
+     * charAt() vs. char32At().
+     *
+     * @param rep A pointer to "this" UReplaceable object.
+     * @param offset The index at which to fetch the UChar32 (code point).
+     * @return The UChar32 (code point) at offset, or U+FFFF if the offset is out of bounds.
+     * @stable ICU 2.0
+     */
+    UChar32 (*char32At)(const UReplaceable* rep,
+                        int32_t offset);
+    
+    /**
+     * Function pointer that replaces text between start and limit in
+     * this text with the given text.  Attributes (out of band info)
+     * should be retained.
+     *
+     * @param rep A pointer to "this" UReplaceable object.
+     * @param start the starting index of the text to be replaced,
+     * inclusive.
+     * @param limit the ending index of the text to be replaced,
+     * exclusive.
+     * @param text the new text to replace the UChars from
+     * start..limit-1.
+     * @param textLength the number of UChars at text, or -1 if text
+     * is null-terminated.
+     * @stable ICU 2.0
+     */
+    void    (*replace)(UReplaceable* rep,
+                       int32_t start,
+                       int32_t limit,
+                       const UChar* text,
+                       int32_t textLength);
+    
+    /**
+     * Function pointer that copies the characters in the range
+     * [<tt>start</tt>, <tt>limit</tt>) into the array <tt>dst</tt>.
+     *
+     * @param rep A pointer to "this" UReplaceable object.
+     * @param start offset of first character which will be copied
+     * into the array
+     * @param limit offset immediately following the last character to
+     * be copied
+     * @param dst array in which to copy characters.  The length of
+     * <tt>dst</tt> must be at least <tt>(limit - start)</tt>.
+     * @stable ICU 2.1
+     */
+    void    (*extract)(UReplaceable* rep,
+                       int32_t start,
+                       int32_t limit,
+                       UChar* dst);
+
+    /**
+     * Function pointer that copies text between start and limit in
+     * this text to another index in the text.  Attributes (out of
+     * band info) should be retained.  After this call, there will be
+     * (at least) two copies of the characters originally located at
+     * start..limit-1.
+     *
+     * @param rep A pointer to "this" UReplaceable object.
+     * @param start the starting index of the text to be copied,
+     * inclusive.
+     * @param limit the ending index of the text to be copied,
+     * exclusive.
+     * @param dest the index at which the copy of the UChars should be
+     * inserted.
+     * @stable ICU 2.0
+     */
+    void    (*copy)(UReplaceable* rep,
+                    int32_t start,
+                    int32_t limit,
+                    int32_t dest);    
+
+} UReplaceableCallbacks;
+
+U_CDECL_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/ures.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ures.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ures.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,871 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1997-2007, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*
-* File URES.H (formerly CRESBUND.H)
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   04/01/97    aliu        Creation.
-*   02/22/99    damiba      overhaul.
-*   04/04/99    helena      Fixed internal header inclusion.
-*   04/15/99    Madhu       Updated Javadoc  
-*   06/14/99    stephen     Removed functions taking a filename suffix.
-*   07/20/99    stephen     Language-independent ypedef to void*
-*   11/09/99    weiv        Added ures_getLocale()
-*   06/24/02    weiv        Added support for resource sharing
-******************************************************************************
-*/
-
-#ifndef URES_H
-#define URES_H
-
-#include "unicode/utypes.h"
-#include "unicode/uloc.h"
-
-/**
- * \file
- * \brief C API: Resource Bundle 
- *
- * <h2>C API: Resource Bundle</h2>
- *
- * C API representing a collection of resource information pertaining to a given
- * locale. A resource bundle provides a way of accessing locale- specific information in
- * a data file. You create a resource bundle that manages the resources for a given
- * locale and then ask it for individual resources.
- * <P>
- * Resource bundles in ICU4C are currently defined using text files which conform to the following
- * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/bnf_rb.txt">BNF definition</a>.
- * More on resource bundle concepts and syntax can be found in the 
- * <a href="http://icu-project.org/userguide/ResourceManagement.html">Users Guide</a>.
- * <P>
- */
-
-/**
- * UResourceBundle is an opaque type for handles for resource bundles in C APIs.
- * @stable ICU 2.0
- */
-struct UResourceBundle;
-
-/**
- * @stable ICU 2.0
- */
-typedef struct UResourceBundle UResourceBundle;
-
-/**
- * Numeric constants for types of resource items.
- * @see ures_getType
- * @stable ICU 2.0
- */
-typedef enum {
-    /** Resource type constant for "no resource". @stable ICU 2.6 */
-    URES_NONE=-1,
-
-    /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */
-    URES_STRING=0,
-
-    /** Resource type constant for binary data. @stable ICU 2.6 */
-    URES_BINARY=1,
-
-    /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */
-    URES_TABLE=2,
-
-    /**
-     * Resource type constant for aliases;
-     * internally stores a string which identifies the actual resource
-     * storing the data (can be in a different resource bundle).
-     * Resolved internally before delivering the actual resource through the API.
-     * @stable ICU 2.6
-     */
-    URES_ALIAS=3,
-
-#ifndef U_HIDE_INTERNAL_API
-
-    /**
-     * Internal use only.
-     * Alternative resource type constant for tables of key-value pairs.
-     * Never returned by ures_getType().
-     * @internal
-     */
-    URES_TABLE32=4,
-
-#endif /* U_HIDE_INTERNAL_API */
-
-    /**
-     * Resource type constant for a single 28-bit integer, interpreted as
-     * signed or unsigned by the ures_getInt() or ures_getUInt() function.
-     * @see ures_getInt
-     * @see ures_getUInt
-     * @stable ICU 2.6
-     */
-    URES_INT=7,
-
-    /** Resource type constant for arrays of resources. @stable ICU 2.6 */
-    URES_ARRAY=8,
-
-    /**
-     * Resource type constant for vectors of 32-bit integers.
-     * @see ures_getIntVector
-     * @stable ICU 2.6
-     */
-    URES_INT_VECTOR = 14,
-#ifndef U_HIDE_DEPRECATED_API
-    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
-    RES_NONE=URES_NONE,
-    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
-    RES_STRING=URES_STRING,
-    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
-    RES_BINARY=URES_BINARY,
-    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
-    RES_TABLE=URES_TABLE,
-    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
-    RES_ALIAS=URES_ALIAS,
-    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
-    RES_INT=URES_INT,
-    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
-    RES_ARRAY=URES_ARRAY,
-    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
-    RES_INT_VECTOR=URES_INT_VECTOR,
-    /** @deprecated ICU 2.6 Not used. */
-    RES_RESERVED=15, 
-#endif /* U_HIDE_DEPRECATED_API */
-
-    URES_LIMIT = 16
-} UResType;
-
-/*
- * Functions to create and destroy resource bundles.
- */
-
-/**
- * Opens a UResourceBundle, from which users can extract strings by using
- * their corresponding keys.
- * Note that the caller is responsible of calling <TT>ures_close</TT> on each succesfully
- * opened resource bundle.
- * @param packageName   The packageName and locale together point to an ICU udata object, 
- *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
- *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
- *                      a package registered with udata_setAppData(). Using a full file or directory
- *                      pathname for packageName is deprecated. If NULL, ICU data will be used.
- * @param locale  specifies the locale for which we want to open the resource
- *                if NULL, the default locale will be used. If strlen(locale) == 0
- *                root locale will be used.
- *                
- * @param status  fills in the outgoing error code.
- * The UErrorCode err parameter is used to return status information to the user. To
- * check whether the construction succeeded or not, you should check the value of
- * U_SUCCESS(err). If you wish more detailed information, you can check for
- * informational status results which still indicate success. U_USING_FALLBACK_WARNING
- * indicates that a fall back locale was used. For example, 'de_CH' was requested,
- * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
- * the default locale data or root locale data was used; neither the requested locale 
- * nor any of its fall back locales could be found. Please see the users guide for more 
- * information on this topic.
- * @return      a newly allocated resource bundle.
- * @see ures_close
- * @stable ICU 2.0
- */
-U_STABLE UResourceBundle*  U_EXPORT2 
-ures_open(const char*    packageName,
-          const char*  locale, 
-          UErrorCode*     status);
-
-
-/** This function does not care what kind of localeID is passed in. It simply opens a bundle with 
- *  that name. Fallback mechanism is disabled for the new bundle. If the requested bundle contains
- *  an %%ALIAS directive, the results are undefined.
- * @param packageName   The packageName and locale together point to an ICU udata object, 
- *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
- *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
- *                      a package registered with udata_setAppData(). Using a full file or directory
- *                      pathname for packageName is deprecated. If NULL, ICU data will be used.
- * @param locale  specifies the locale for which we want to open the resource
- *                if NULL, the default locale will be used. If strlen(locale) == 0
- *                root locale will be used.
- *                
- * @param status fills in the outgoing error code. Either U_ZERO_ERROR or U_MISSING_RESOURCE_ERROR
- * @return      a newly allocated resource bundle or NULL if it doesn't exist.
- * @see ures_close
- * @stable ICU 2.0
- */
-U_STABLE UResourceBundle* U_EXPORT2 
-ures_openDirect(const char* packageName, 
-                const char* locale, 
-                UErrorCode* status);
-
-/**
- * Same as ures_open() but takes a const UChar *path.
- * This path will be converted to char * using the default converter,
- * then ures_open() is called.
- *
- * @param packageName   The packageName and locale together point to an ICU udata object, 
- *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
- *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
- *                      a package registered with udata_setAppData(). Using a full file or directory
- *                      pathname for packageName is deprecated. If NULL, ICU data will be used.
- * @param locale  specifies the locale for which we want to open the resource
- *                if NULL, the default locale will be used. If strlen(locale) == 0
- *                root locale will be used.
- * @param status  fills in the outgoing error code.
- * @return      a newly allocated resource bundle.
- * @see ures_open
- * @stable ICU 2.0
- */
-U_STABLE UResourceBundle* U_EXPORT2 
-ures_openU(const UChar* packageName, 
-           const char* locale, 
-           UErrorCode* status);
-
-/**
- * Returns the number of strings/arrays in resource bundles.
- * Better to use ures_getSize, as this function will be deprecated. 
- *
- *@param resourceBundle resource bundle containing the desired strings
- *@param resourceKey key tagging the resource
- *@param err fills in the outgoing error code
- *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
- *                could be a non-failing error 
- *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_FALLBACK_WARNING </TT>
- *@return: for    <STRONG>Arrays</STRONG>: returns the number of resources in the array
- *                <STRONG>Tables</STRONG>: returns the number of resources in the table
- *                <STRONG>single string</STRONG>: returns 1
- *@see ures_getSize
- * @deprecated ICU 2.8 User ures_getSize instead
- */
-U_DEPRECATED int32_t U_EXPORT2 
-ures_countArrayItems(const UResourceBundle* resourceBundle,
-                     const char* resourceKey,
-                     UErrorCode* err);
-/**
- * Close a resource bundle, all pointers returned from the various ures_getXXX calls
- * on this particular bundle should be considered invalid henceforth.
- *
- * @param resourceBundle a pointer to a resourceBundle struct. Can be NULL.
- * @see ures_open
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ures_close(UResourceBundle* resourceBundle);
-
-/**
- * Return the version number associated with this ResourceBundle as a string. Please
- * use ures_getVersion as this function is going to be deprecated.
- *
- * @param resourceBundle The resource bundle for which the version is checked.
- * @return  A version number string as specified in the resource bundle or its parent.
- *          The caller does not own this string.
- * @see ures_getVersion
- * @deprecated ICU 2.8 Use ures_getVersion instead.
- */
-U_DEPRECATED const char* U_EXPORT2 
-ures_getVersionNumber(const UResourceBundle*   resourceBundle);
-
-/**
- * Return the version number associated with this ResourceBundle as an 
- * UVersionInfo array.
- *
- * @param resB The resource bundle for which the version is checked.
- * @param versionInfo A UVersionInfo array that is filled with the version number
- *                    as specified in the resource bundle or its parent.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ures_getVersion(const UResourceBundle* resB, 
-                UVersionInfo versionInfo);
-
-/**
- * Return the name of the Locale associated with this ResourceBundle. This API allows
- * you to query for the real locale of the resource. For example, if you requested 
- * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned. 
- * For subresources, the locale where this resource comes from will be returned.
- * If fallback has occured, getLocale will reflect this.
- *
- * @param resourceBundle resource bundle in question
- * @param status just for catching illegal arguments
- * @return  A Locale name
- * @deprecated ICU 2.8 Use ures_getLocaleByType instead.
- */
-U_DEPRECATED const char* U_EXPORT2 
-ures_getLocale(const UResourceBundle* resourceBundle, 
-               UErrorCode* status);
-
-
-/**
- * Return the name of the Locale associated with this ResourceBundle. 
- * You can choose between requested, valid and real locale.
- *
- * @param resourceBundle resource bundle in question
- * @param type You can choose between requested, valid and actual
- *             locale. For description see the definition of
- *             ULocDataLocaleType in uloc.h
- * @param status just for catching illegal arguments
- * @return  A Locale name
- * @stable ICU 2.8
- */
-U_STABLE const char* U_EXPORT2 
-ures_getLocaleByType(const UResourceBundle* resourceBundle, 
-                     ULocDataLocaleType type, 
-                     UErrorCode* status);
-
-
-/**
- * Same as ures_open() but uses the fill-in parameter instead of allocating
- * a bundle, if r!=NULL.
- * TODO need to revisit usefulness of this function
- *      and usage model for fillIn parameters without knowing sizeof(UResourceBundle)
- * @param r The resourcebundle to open
- * @param packageName   The packageName and locale together point to an ICU udata object, 
- *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
- *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
- *                      a package registered with udata_setAppData(). Using a full file or directory
- *                      pathname for packageName is deprecated. If NULL, ICU data will be used.
- * @param localeID specifies the locale for which we want to open the resource
- * @param status The error code
- * @return a newly allocated resource bundle or NULL if it doesn't exist.
- * @internal
- */
-U_INTERNAL void U_EXPORT2 
-ures_openFillIn(UResourceBundle *r, 
-                const char* packageName,
-                const char* localeID, 
-                UErrorCode* status);
-
-/**
- * Returns a string from a string resource type
- *
- * @param resourceBundle a string resource
- * @param len    fills in the length of resulting string
- * @param status fills in the outgoing error code
- *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
- *                Always check the value of status. Don't count on returning NULL.
- *                could be a non-failing error 
- *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
- * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
- * @see ures_getBinary
- * @see ures_getIntVector
- * @see ures_getInt
- * @see ures_getUInt
- * @stable ICU 2.0
- */
-U_STABLE const UChar* U_EXPORT2 
-ures_getString(const UResourceBundle* resourceBundle, 
-               int32_t* len, 
-               UErrorCode* status);
-
-/**
- * Returns a UTF-8 string from a string resource.
- * The UTF-8 string may be returnable directly as a pointer, or
- * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
- * or equivalent.
- *
- * If forceCopy==TRUE, then the string is always written to the dest buffer
- * and dest is returned.
- *
- * If forceCopy==FALSE, then the string is returned as a pointer if possible,
- * without needing a dest buffer (it can be NULL). If the string needs to be
- * copied or transformed, then it may be placed into dest at an arbitrary offset.
- *
- * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
- * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
- *
- * If the string is transformed from UTF-16, then a conversion error may occur
- * if an unpaired surrogate is encountered. If the function is successful, then
- * the output UTF-8 string is always well-formed.
- *
- * @param resB Resource bundle.
- * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
- * @param length Input: Capacity of destination buffer.
- *               Output: Actual length of the UTF-8 string, not counting the
- *               terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
- *               Can be NULL, meaning capacity=0 and the string length is not
- *               returned to the caller.
- * @param forceCopy If TRUE, then the output string will always be written to
- *                  dest, with U_BUFFER_OVERFLOW_ERROR and
- *                  U_STRING_NOT_TERMINATED_WARNING set if appropriate.
- *                  If FALSE, then the dest buffer may or may not contain a
- *                  copy of the string. dest may or may not be modified.
- *                  If a copy needs to be written, then the UErrorCode parameter
- *                  indicates overflow etc. as usual.
- * @param status Pointer to a standard ICU error code. Its input value must
- *               pass the U_SUCCESS() test, or else the function returns
- *               immediately. Check for U_FAILURE() on output or use with
- *               function chaining. (See User Guide for details.)
- * @return The pointer to the UTF-8 string. It may be dest, or at some offset
- *         from dest (only if !forceCopy), or in unrelated memory.
- *         Always NUL-terminated unless the string was written to dest and
- *         length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
- *
- * @see ures_getString
- * @see u_strToUTF8
- * @stable ICU 3.6
- */
-U_STABLE const char * U_EXPORT2
-ures_getUTF8String(const UResourceBundle *resB,
-                   char *dest, int32_t *length,
-                   UBool forceCopy,
-                   UErrorCode *status);
-
-/**
- * Returns a binary data from a binary resource. 
- *
- * @param resourceBundle a string resource
- * @param len    fills in the length of resulting byte chunk
- * @param status fills in the outgoing error code
- *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
- *                Always check the value of status. Don't count on returning NULL.
- *                could be a non-failing error 
- *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
- * @return a pointer to a chuck of unsigned bytes which live in a memory mapped/DLL file.
- * @see ures_getString
- * @see ures_getIntVector
- * @see ures_getInt
- * @see ures_getUInt
- * @stable ICU 2.0
- */
-U_STABLE const uint8_t* U_EXPORT2 
-ures_getBinary(const UResourceBundle* resourceBundle, 
-               int32_t* len, 
-               UErrorCode* status);
-
-/**
- * Returns a 32 bit integer array from a resource. 
- *
- * @param resourceBundle an int vector resource
- * @param len    fills in the length of resulting byte chunk
- * @param status fills in the outgoing error code
- *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
- *                Always check the value of status. Don't count on returning NULL.
- *                could be a non-failing error 
- *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
- * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
- * @see ures_getBinary
- * @see ures_getString
- * @see ures_getInt
- * @see ures_getUInt
- * @stable ICU 2.0
- */
-U_STABLE const int32_t* U_EXPORT2 
-ures_getIntVector(const UResourceBundle* resourceBundle, 
-                  int32_t* len, 
-                  UErrorCode* status);
-
-/**
- * Returns an unsigned integer from a resource. 
- * This integer is originally 28 bits.
- *
- * @param resourceBundle a string resource
- * @param status fills in the outgoing error code
- *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
- *                could be a non-failing error 
- *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
- * @return an integer value
- * @see ures_getInt
- * @see ures_getIntVector
- * @see ures_getBinary
- * @see ures_getString
- * @stable ICU 2.0
- */
-U_STABLE uint32_t U_EXPORT2 
-ures_getUInt(const UResourceBundle* resourceBundle, 
-             UErrorCode *status);
-
-/**
- * Returns a signed integer from a resource. 
- * This integer is originally 28 bit and the sign gets propagated.
- *
- * @param resourceBundle a string resource
- * @param status  fills in the outgoing error code
- *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
- *                could be a non-failing error 
- *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
- * @return an integer value
- * @see ures_getUInt
- * @see ures_getIntVector
- * @see ures_getBinary
- * @see ures_getString
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ures_getInt(const UResourceBundle* resourceBundle, 
-            UErrorCode *status);
-
-/**
- * Returns the size of a resource. Size for scalar types is always 1, 
- * and for vector/table types is the number of child resources.
- * @warning Integer array is treated as a scalar type. There are no 
- *          APIs to access individual members of an integer array. It
- *          is always returned as a whole.
- * @param resourceBundle a resource
- * @return number of resources in a given resource.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-ures_getSize(const UResourceBundle *resourceBundle);
-
-/**
- * Returns the type of a resource. Available types are defined in enum UResType
- *
- * @param resourceBundle a resource
- * @return type of the given resource.
- * @see UResType
- * @stable ICU 2.0
- */
-U_STABLE UResType U_EXPORT2 
-ures_getType(const UResourceBundle *resourceBundle);
-
-/**
- * Returns the key associated with a given resource. Not all the resources have a key - only 
- * those that are members of a table.
- *
- * @param resourceBundle a resource
- * @return a key associated to this resource, or NULL if it doesn't have a key
- * @stable ICU 2.0
- */
-U_STABLE const char * U_EXPORT2 
-ures_getKey(const UResourceBundle *resourceBundle);
-
-/* ITERATION API 
-    This API provides means for iterating through a resource
-*/
-
-/**
- * Resets the internal context of a resource so that iteration starts from the first element.
- *
- * @param resourceBundle a resource
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-ures_resetIterator(UResourceBundle *resourceBundle);
-
-/**
- * Checks whether the given resource has another element to iterate over.
- *
- * @param resourceBundle a resource
- * @return TRUE if there are more elements, FALSE if there is no more elements
- * @stable ICU 2.0
- */
-U_STABLE UBool U_EXPORT2 
-ures_hasNext(const UResourceBundle *resourceBundle);
-
-/**
- * Returns the next resource in a given resource or NULL if there are no more resources 
- * to iterate over. Features a fill-in parameter. 
- *
- * @param resourceBundle    a resource
- * @param fillIn            if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
- *                          Alternatively, you can supply a struct to be filled by this function.
- * @param status            fills in the outgoing error code. You may still get a non NULL result even if an
- *                          error occured. Check status instead.
- * @return                  a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
- * @stable ICU 2.0
- */
-U_STABLE UResourceBundle* U_EXPORT2 
-ures_getNextResource(UResourceBundle *resourceBundle, 
-                     UResourceBundle *fillIn, 
-                     UErrorCode *status);
-
-/**
- * Returns the next string in a given resource or NULL if there are no more resources 
- * to iterate over. 
- *
- * @param resourceBundle    a resource
- * @param len               fill in length of the string
- * @param key               fill in for key associated with this string. NULL if no key
- * @param status            fills in the outgoing error code. If an error occured, we may return NULL, but don't
- *                          count on it. Check status instead!
- * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
- * @stable ICU 2.0
- */
-U_STABLE const UChar* U_EXPORT2 
-ures_getNextString(UResourceBundle *resourceBundle, 
-                   int32_t* len, 
-                   const char ** key, 
-                   UErrorCode *status);
-
-/**
- * Returns the resource in a given resource at the specified index. Features a fill-in parameter. 
- *
- * @param resourceBundle    the resource bundle from which to get a sub-resource
- * @param indexR            an index to the wanted resource.
- * @param fillIn            if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
- *                          Alternatively, you can supply a struct to be filled by this function.
- * @param status            fills in the outgoing error code. Don't count on NULL being returned if an error has
- *                          occured. Check status instead.
- * @return                  a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
- * @stable ICU 2.0
- */
-U_STABLE UResourceBundle* U_EXPORT2 
-ures_getByIndex(const UResourceBundle *resourceBundle, 
-                int32_t indexR, 
-                UResourceBundle *fillIn, 
-                UErrorCode *status);
-
-/**
- * Returns the string in a given resource at the specified index.
- *
- * @param resourceBundle    a resource
- * @param indexS            an index to the wanted string.
- * @param len               fill in length of the string
- * @param status            fills in the outgoing error code. If an error occured, we may return NULL, but don't
- *                          count on it. Check status instead!
- * @return                  a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
- * @stable ICU 2.0
- */
-U_STABLE const UChar* U_EXPORT2 
-ures_getStringByIndex(const UResourceBundle *resourceBundle, 
-                      int32_t indexS, 
-                      int32_t* len, 
-                      UErrorCode *status);
-
-/**
- * Returns a UTF-8 string from a resource at the specified index.
- * The UTF-8 string may be returnable directly as a pointer, or
- * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
- * or equivalent.
- *
- * If forceCopy==TRUE, then the string is always written to the dest buffer
- * and dest is returned.
- *
- * If forceCopy==FALSE, then the string is returned as a pointer if possible,
- * without needing a dest buffer (it can be NULL). If the string needs to be
- * copied or transformed, then it may be placed into dest at an arbitrary offset.
- *
- * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
- * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
- *
- * If the string is transformed from UTF-16, then a conversion error may occur
- * if an unpaired surrogate is encountered. If the function is successful, then
- * the output UTF-8 string is always well-formed.
- *
- * @param resB Resource bundle.
- * @param index An index to the wanted string.
- * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
- * @param pLength Input: Capacity of destination buffer.
- *               Output: Actual length of the UTF-8 string, not counting the
- *               terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
- *               Can be NULL, meaning capacity=0 and the string length is not
- *               returned to the caller.
- * @param forceCopy If TRUE, then the output string will always be written to
- *                  dest, with U_BUFFER_OVERFLOW_ERROR and
- *                  U_STRING_NOT_TERMINATED_WARNING set if appropriate.
- *                  If FALSE, then the dest buffer may or may not contain a
- *                  copy of the string. dest may or may not be modified.
- *                  If a copy needs to be written, then the UErrorCode parameter
- *                  indicates overflow etc. as usual.
- * @param status Pointer to a standard ICU error code. Its input value must
- *               pass the U_SUCCESS() test, or else the function returns
- *               immediately. Check for U_FAILURE() on output or use with
- *               function chaining. (See User Guide for details.)
- * @return The pointer to the UTF-8 string. It may be dest, or at some offset
- *         from dest (only if !forceCopy), or in unrelated memory.
- *         Always NUL-terminated unless the string was written to dest and
- *         length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
- *
- * @see ures_getStringByIndex
- * @see u_strToUTF8
- * @stable ICU 3.6
- */
-U_STABLE const char * U_EXPORT2
-ures_getUTF8StringByIndex(const UResourceBundle *resB,
-                          int32_t index,
-                          char *dest, int32_t *pLength,
-                          UBool forceCopy,
-                          UErrorCode *status);
-
-/**
- * Returns a resource in a given resource that has a given key. This procedure works only with table
- * resources. Features a fill-in parameter. 
- *
- * @param resourceBundle    a resource
- * @param key               a key associated with the wanted resource
- * @param fillIn            if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
- *                          Alternatively, you can supply a struct to be filled by this function.
- * @param status            fills in the outgoing error code.
- * @return                  a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
- * @stable ICU 2.0
- */
-U_STABLE UResourceBundle* U_EXPORT2 
-ures_getByKey(const UResourceBundle *resourceBundle, 
-              const char* key, 
-              UResourceBundle *fillIn, 
-              UErrorCode *status);
-
-/**
- * Returns a string in a given resource that has a given key. This procedure works only with table
- * resources. 
- *
- * @param resB              a resource
- * @param key               a key associated with the wanted string
- * @param len               fill in length of the string
- * @param status            fills in the outgoing error code. If an error occured, we may return NULL, but don't
- *                          count on it. Check status instead!
- * @return                  a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
- * @stable ICU 2.0
- */
-U_STABLE const UChar* U_EXPORT2 
-ures_getStringByKey(const UResourceBundle *resB, 
-                    const char* key, 
-                    int32_t* len, 
-                    UErrorCode *status);
-
-/**
- * Returns a UTF-8 string from a resource and a key.
- * This function works only with table resources.
- *
- * The UTF-8 string may be returnable directly as a pointer, or
- * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
- * or equivalent.
- *
- * If forceCopy==TRUE, then the string is always written to the dest buffer
- * and dest is returned.
- *
- * If forceCopy==FALSE, then the string is returned as a pointer if possible,
- * without needing a dest buffer (it can be NULL). If the string needs to be
- * copied or transformed, then it may be placed into dest at an arbitrary offset.
- *
- * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
- * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
- *
- * If the string is transformed from UTF-16, then a conversion error may occur
- * if an unpaired surrogate is encountered. If the function is successful, then
- * the output UTF-8 string is always well-formed.
- *
- * @param resB Resource bundle.
- * @param key  A key associated with the wanted resource
- * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
- * @param pLength Input: Capacity of destination buffer.
- *               Output: Actual length of the UTF-8 string, not counting the
- *               terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
- *               Can be NULL, meaning capacity=0 and the string length is not
- *               returned to the caller.
- * @param forceCopy If TRUE, then the output string will always be written to
- *                  dest, with U_BUFFER_OVERFLOW_ERROR and
- *                  U_STRING_NOT_TERMINATED_WARNING set if appropriate.
- *                  If FALSE, then the dest buffer may or may not contain a
- *                  copy of the string. dest may or may not be modified.
- *                  If a copy needs to be written, then the UErrorCode parameter
- *                  indicates overflow etc. as usual.
- * @param status Pointer to a standard ICU error code. Its input value must
- *               pass the U_SUCCESS() test, or else the function returns
- *               immediately. Check for U_FAILURE() on output or use with
- *               function chaining. (See User Guide for details.)
- * @return The pointer to the UTF-8 string. It may be dest, or at some offset
- *         from dest (only if !forceCopy), or in unrelated memory.
- *         Always NUL-terminated unless the string was written to dest and
- *         length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
- *
- * @see ures_getStringByKey
- * @see u_strToUTF8
- * @stable ICU 3.6
- */
-U_STABLE const char * U_EXPORT2
-ures_getUTF8StringByKey(const UResourceBundle *resB,
-                        const char *key,
-                        char *dest, int32_t *pLength,
-                        UBool forceCopy,
-                        UErrorCode *status);
-
-#ifdef XP_CPLUSPLUS
-#include "unicode/unistr.h"
-
-U_NAMESPACE_BEGIN
-/**
- * returns a string from a string resource type
- *
- * @param resB    a resource
- * @param status: fills in the outgoing error code
- *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
- *                could be a non-failing error 
- *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
- * @return        a UnicodeString object. If there is an error, string is bogus
- * @stable ICU 2.0
- */
-inline UnicodeString 
-ures_getUnicodeString(const UResourceBundle *resB, 
-                      UErrorCode* status) 
-{
-    int32_t len = 0;
-    const UChar *r = ures_getString(resB, &len, status);
-    return UnicodeString(TRUE, r, len);
-}
-
-/**
- * Returns the next string in a resource or NULL if there are no more resources 
- * to iterate over. 
- *
- * @param resB              a resource
- * @param key               fill in for key associated with this string
- * @param status            fills in the outgoing error code
- * @return an UnicodeString object.
- * @stable ICU 2.0
- */
-inline UnicodeString 
-ures_getNextUnicodeString(UResourceBundle *resB, 
-                          const char ** key, 
-                          UErrorCode* status) 
-{
-    int32_t len = 0;
-    const UChar* r = ures_getNextString(resB, &len, key, status);
-    return UnicodeString(TRUE, r, len);
-}
-
-/**
- * Returns the string in a given resource at the specified index.
- *
- * @param resB              a resource
- * @param index             an index to the wanted string.
- * @param status            fills in the outgoing error code
- * @return                  an UnicodeString object. If there is an error, string is bogus
- * @stable ICU 2.0
- */
-inline UnicodeString 
-ures_getUnicodeStringByIndex(const UResourceBundle *resB, 
-                             int32_t indexS, 
-                             UErrorCode* status) 
-{
-    int32_t len = 0;
-    const UChar* r = ures_getStringByIndex(resB, indexS, &len, status);
-    return UnicodeString(TRUE, r, len);
-}
-
-/**
- * Returns a string in a resource that has a given key. This procedure works only with table
- * resources. 
- *
- * @param resB              a resource
- * @param key               a key associated with the wanted string
- * @param status            fills in the outgoing error code
- * @return                  an UnicodeString object. If there is an error, string is bogus
- * @stable ICU 2.0
- */
-inline UnicodeString 
-ures_getUnicodeStringByKey(const UResourceBundle *resB, 
-                           const char* key, 
-                           UErrorCode* status) 
-{
-    int32_t len = 0;
-    const UChar* r = ures_getStringByKey(resB, key, &len, status);
-    return UnicodeString(TRUE, r, len);
-}
-
-U_NAMESPACE_END
-
-#endif
-
-/**
- * Create a string enumerator, owned by the caller, of all locales located within 
- * the specified resource tree.
- * @param packageName name of the tree, such as (NULL) or U_ICUDATA_ALIAS or  or "ICUDATA-coll"
- * This call is similar to uloc_getAvailable().
- * @param status error code
- * @stable ICU 3.2
- */
-U_STABLE UEnumeration* U_EXPORT2
-ures_openAvailableLocales(const char *packageName, UErrorCode *status);
-
-
-#endif /*_URES*/
-/*eof*/

Copied: MacRuby/trunk/icu-1060/unicode/ures.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ures.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ures.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ures.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,871 @@
+/*
+**********************************************************************
+*   Copyright (C) 1997-2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+* File URES.H (formerly CRESBUND.H)
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   04/01/97    aliu        Creation.
+*   02/22/99    damiba      overhaul.
+*   04/04/99    helena      Fixed internal header inclusion.
+*   04/15/99    Madhu       Updated Javadoc  
+*   06/14/99    stephen     Removed functions taking a filename suffix.
+*   07/20/99    stephen     Language-independent ypedef to void*
+*   11/09/99    weiv        Added ures_getLocale()
+*   06/24/02    weiv        Added support for resource sharing
+******************************************************************************
+*/
+
+#ifndef URES_H
+#define URES_H
+
+#include "unicode/utypes.h"
+#include "unicode/uloc.h"
+
+/**
+ * \file
+ * \brief C API: Resource Bundle 
+ *
+ * <h2>C API: Resource Bundle</h2>
+ *
+ * C API representing a collection of resource information pertaining to a given
+ * locale. A resource bundle provides a way of accessing locale- specific information in
+ * a data file. You create a resource bundle that manages the resources for a given
+ * locale and then ask it for individual resources.
+ * <P>
+ * Resource bundles in ICU4C are currently defined using text files which conform to the following
+ * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/bnf_rb.txt">BNF definition</a>.
+ * More on resource bundle concepts and syntax can be found in the 
+ * <a href="http://icu-project.org/userguide/ResourceManagement.html">Users Guide</a>.
+ * <P>
+ */
+
+/**
+ * UResourceBundle is an opaque type for handles for resource bundles in C APIs.
+ * @stable ICU 2.0
+ */
+struct UResourceBundle;
+
+/**
+ * @stable ICU 2.0
+ */
+typedef struct UResourceBundle UResourceBundle;
+
+/**
+ * Numeric constants for types of resource items.
+ * @see ures_getType
+ * @stable ICU 2.0
+ */
+typedef enum {
+    /** Resource type constant for "no resource". @stable ICU 2.6 */
+    URES_NONE=-1,
+
+    /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */
+    URES_STRING=0,
+
+    /** Resource type constant for binary data. @stable ICU 2.6 */
+    URES_BINARY=1,
+
+    /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */
+    URES_TABLE=2,
+
+    /**
+     * Resource type constant for aliases;
+     * internally stores a string which identifies the actual resource
+     * storing the data (can be in a different resource bundle).
+     * Resolved internally before delivering the actual resource through the API.
+     * @stable ICU 2.6
+     */
+    URES_ALIAS=3,
+
+#ifndef U_HIDE_INTERNAL_API
+
+    /**
+     * Internal use only.
+     * Alternative resource type constant for tables of key-value pairs.
+     * Never returned by ures_getType().
+     * @internal
+     */
+    URES_TABLE32=4,
+
+#endif /* U_HIDE_INTERNAL_API */
+
+    /**
+     * Resource type constant for a single 28-bit integer, interpreted as
+     * signed or unsigned by the ures_getInt() or ures_getUInt() function.
+     * @see ures_getInt
+     * @see ures_getUInt
+     * @stable ICU 2.6
+     */
+    URES_INT=7,
+
+    /** Resource type constant for arrays of resources. @stable ICU 2.6 */
+    URES_ARRAY=8,
+
+    /**
+     * Resource type constant for vectors of 32-bit integers.
+     * @see ures_getIntVector
+     * @stable ICU 2.6
+     */
+    URES_INT_VECTOR = 14,
+#ifndef U_HIDE_DEPRECATED_API
+    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+    RES_NONE=URES_NONE,
+    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+    RES_STRING=URES_STRING,
+    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+    RES_BINARY=URES_BINARY,
+    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+    RES_TABLE=URES_TABLE,
+    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+    RES_ALIAS=URES_ALIAS,
+    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+    RES_INT=URES_INT,
+    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+    RES_ARRAY=URES_ARRAY,
+    /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+    RES_INT_VECTOR=URES_INT_VECTOR,
+    /** @deprecated ICU 2.6 Not used. */
+    RES_RESERVED=15, 
+#endif /* U_HIDE_DEPRECATED_API */
+
+    URES_LIMIT = 16
+} UResType;
+
+/*
+ * Functions to create and destroy resource bundles.
+ */
+
+/**
+ * Opens a UResourceBundle, from which users can extract strings by using
+ * their corresponding keys.
+ * Note that the caller is responsible of calling <TT>ures_close</TT> on each succesfully
+ * opened resource bundle.
+ * @param packageName   The packageName and locale together point to an ICU udata object, 
+ *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
+ *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
+ *                      a package registered with udata_setAppData(). Using a full file or directory
+ *                      pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale  specifies the locale for which we want to open the resource
+ *                if NULL, the default locale will be used. If strlen(locale) == 0
+ *                root locale will be used.
+ *                
+ * @param status  fills in the outgoing error code.
+ * The UErrorCode err parameter is used to return status information to the user. To
+ * check whether the construction succeeded or not, you should check the value of
+ * U_SUCCESS(err). If you wish more detailed information, you can check for
+ * informational status results which still indicate success. U_USING_FALLBACK_WARNING
+ * indicates that a fall back locale was used. For example, 'de_CH' was requested,
+ * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
+ * the default locale data or root locale data was used; neither the requested locale 
+ * nor any of its fall back locales could be found. Please see the users guide for more 
+ * information on this topic.
+ * @return      a newly allocated resource bundle.
+ * @see ures_close
+ * @stable ICU 2.0
+ */
+U_STABLE UResourceBundle*  U_EXPORT2 
+ures_open(const char*    packageName,
+          const char*  locale, 
+          UErrorCode*     status);
+
+
+/** This function does not care what kind of localeID is passed in. It simply opens a bundle with 
+ *  that name. Fallback mechanism is disabled for the new bundle. If the requested bundle contains
+ *  an %%ALIAS directive, the results are undefined.
+ * @param packageName   The packageName and locale together point to an ICU udata object, 
+ *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
+ *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
+ *                      a package registered with udata_setAppData(). Using a full file or directory
+ *                      pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale  specifies the locale for which we want to open the resource
+ *                if NULL, the default locale will be used. If strlen(locale) == 0
+ *                root locale will be used.
+ *                
+ * @param status fills in the outgoing error code. Either U_ZERO_ERROR or U_MISSING_RESOURCE_ERROR
+ * @return      a newly allocated resource bundle or NULL if it doesn't exist.
+ * @see ures_close
+ * @stable ICU 2.0
+ */
+U_STABLE UResourceBundle* U_EXPORT2 
+ures_openDirect(const char* packageName, 
+                const char* locale, 
+                UErrorCode* status);
+
+/**
+ * Same as ures_open() but takes a const UChar *path.
+ * This path will be converted to char * using the default converter,
+ * then ures_open() is called.
+ *
+ * @param packageName   The packageName and locale together point to an ICU udata object, 
+ *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
+ *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
+ *                      a package registered with udata_setAppData(). Using a full file or directory
+ *                      pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale  specifies the locale for which we want to open the resource
+ *                if NULL, the default locale will be used. If strlen(locale) == 0
+ *                root locale will be used.
+ * @param status  fills in the outgoing error code.
+ * @return      a newly allocated resource bundle.
+ * @see ures_open
+ * @stable ICU 2.0
+ */
+U_STABLE UResourceBundle* U_EXPORT2 
+ures_openU(const UChar* packageName, 
+           const char* locale, 
+           UErrorCode* status);
+
+/**
+ * Returns the number of strings/arrays in resource bundles.
+ * Better to use ures_getSize, as this function will be deprecated. 
+ *
+ *@param resourceBundle resource bundle containing the desired strings
+ *@param resourceKey key tagging the resource
+ *@param err fills in the outgoing error code
+ *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ *                could be a non-failing error 
+ *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_FALLBACK_WARNING </TT>
+ *@return: for    <STRONG>Arrays</STRONG>: returns the number of resources in the array
+ *                <STRONG>Tables</STRONG>: returns the number of resources in the table
+ *                <STRONG>single string</STRONG>: returns 1
+ *@see ures_getSize
+ * @deprecated ICU 2.8 User ures_getSize instead
+ */
+U_DEPRECATED int32_t U_EXPORT2 
+ures_countArrayItems(const UResourceBundle* resourceBundle,
+                     const char* resourceKey,
+                     UErrorCode* err);
+/**
+ * Close a resource bundle, all pointers returned from the various ures_getXXX calls
+ * on this particular bundle should be considered invalid henceforth.
+ *
+ * @param resourceBundle a pointer to a resourceBundle struct. Can be NULL.
+ * @see ures_open
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ures_close(UResourceBundle* resourceBundle);
+
+/**
+ * Return the version number associated with this ResourceBundle as a string. Please
+ * use ures_getVersion as this function is going to be deprecated.
+ *
+ * @param resourceBundle The resource bundle for which the version is checked.
+ * @return  A version number string as specified in the resource bundle or its parent.
+ *          The caller does not own this string.
+ * @see ures_getVersion
+ * @deprecated ICU 2.8 Use ures_getVersion instead.
+ */
+U_DEPRECATED const char* U_EXPORT2 
+ures_getVersionNumber(const UResourceBundle*   resourceBundle);
+
+/**
+ * Return the version number associated with this ResourceBundle as an 
+ * UVersionInfo array.
+ *
+ * @param resB The resource bundle for which the version is checked.
+ * @param versionInfo A UVersionInfo array that is filled with the version number
+ *                    as specified in the resource bundle or its parent.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ures_getVersion(const UResourceBundle* resB, 
+                UVersionInfo versionInfo);
+
+/**
+ * Return the name of the Locale associated with this ResourceBundle. This API allows
+ * you to query for the real locale of the resource. For example, if you requested 
+ * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned. 
+ * For subresources, the locale where this resource comes from will be returned.
+ * If fallback has occured, getLocale will reflect this.
+ *
+ * @param resourceBundle resource bundle in question
+ * @param status just for catching illegal arguments
+ * @return  A Locale name
+ * @deprecated ICU 2.8 Use ures_getLocaleByType instead.
+ */
+U_DEPRECATED const char* U_EXPORT2 
+ures_getLocale(const UResourceBundle* resourceBundle, 
+               UErrorCode* status);
+
+
+/**
+ * Return the name of the Locale associated with this ResourceBundle. 
+ * You can choose between requested, valid and real locale.
+ *
+ * @param resourceBundle resource bundle in question
+ * @param type You can choose between requested, valid and actual
+ *             locale. For description see the definition of
+ *             ULocDataLocaleType in uloc.h
+ * @param status just for catching illegal arguments
+ * @return  A Locale name
+ * @stable ICU 2.8
+ */
+U_STABLE const char* U_EXPORT2 
+ures_getLocaleByType(const UResourceBundle* resourceBundle, 
+                     ULocDataLocaleType type, 
+                     UErrorCode* status);
+
+
+/**
+ * Same as ures_open() but uses the fill-in parameter instead of allocating
+ * a bundle, if r!=NULL.
+ * TODO need to revisit usefulness of this function
+ *      and usage model for fillIn parameters without knowing sizeof(UResourceBundle)
+ * @param r The resourcebundle to open
+ * @param packageName   The packageName and locale together point to an ICU udata object, 
+ *                      as defined by <code> udata_open( packageName, "res", locale, err) </code> 
+ *                      or equivalent.  Typically, packageName will refer to a (.dat) file, or to
+ *                      a package registered with udata_setAppData(). Using a full file or directory
+ *                      pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param localeID specifies the locale for which we want to open the resource
+ * @param status The error code
+ * @return a newly allocated resource bundle or NULL if it doesn't exist.
+ * @internal
+ */
+U_INTERNAL void U_EXPORT2 
+ures_openFillIn(UResourceBundle *r, 
+                const char* packageName,
+                const char* localeID, 
+                UErrorCode* status);
+
+/**
+ * Returns a string from a string resource type
+ *
+ * @param resourceBundle a string resource
+ * @param len    fills in the length of resulting string
+ * @param status fills in the outgoing error code
+ *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ *                Always check the value of status. Don't count on returning NULL.
+ *                could be a non-failing error 
+ *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @see ures_getBinary
+ * @see ures_getIntVector
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.0
+ */
+U_STABLE const UChar* U_EXPORT2 
+ures_getString(const UResourceBundle* resourceBundle, 
+               int32_t* len, 
+               UErrorCode* status);
+
+/**
+ * Returns a UTF-8 string from a string resource.
+ * The UTF-8 string may be returnable directly as a pointer, or
+ * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
+ * or equivalent.
+ *
+ * If forceCopy==TRUE, then the string is always written to the dest buffer
+ * and dest is returned.
+ *
+ * If forceCopy==FALSE, then the string is returned as a pointer if possible,
+ * without needing a dest buffer (it can be NULL). If the string needs to be
+ * copied or transformed, then it may be placed into dest at an arbitrary offset.
+ *
+ * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
+ *
+ * If the string is transformed from UTF-16, then a conversion error may occur
+ * if an unpaired surrogate is encountered. If the function is successful, then
+ * the output UTF-8 string is always well-formed.
+ *
+ * @param resB Resource bundle.
+ * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
+ * @param length Input: Capacity of destination buffer.
+ *               Output: Actual length of the UTF-8 string, not counting the
+ *               terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
+ *               Can be NULL, meaning capacity=0 and the string length is not
+ *               returned to the caller.
+ * @param forceCopy If TRUE, then the output string will always be written to
+ *                  dest, with U_BUFFER_OVERFLOW_ERROR and
+ *                  U_STRING_NOT_TERMINATED_WARNING set if appropriate.
+ *                  If FALSE, then the dest buffer may or may not contain a
+ *                  copy of the string. dest may or may not be modified.
+ *                  If a copy needs to be written, then the UErrorCode parameter
+ *                  indicates overflow etc. as usual.
+ * @param status Pointer to a standard ICU error code. Its input value must
+ *               pass the U_SUCCESS() test, or else the function returns
+ *               immediately. Check for U_FAILURE() on output or use with
+ *               function chaining. (See User Guide for details.)
+ * @return The pointer to the UTF-8 string. It may be dest, or at some offset
+ *         from dest (only if !forceCopy), or in unrelated memory.
+ *         Always NUL-terminated unless the string was written to dest and
+ *         length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
+ *
+ * @see ures_getString
+ * @see u_strToUTF8
+ * @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ures_getUTF8String(const UResourceBundle *resB,
+                   char *dest, int32_t *length,
+                   UBool forceCopy,
+                   UErrorCode *status);
+
+/**
+ * Returns a binary data from a binary resource. 
+ *
+ * @param resourceBundle a string resource
+ * @param len    fills in the length of resulting byte chunk
+ * @param status fills in the outgoing error code
+ *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ *                Always check the value of status. Don't count on returning NULL.
+ *                could be a non-failing error 
+ *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a chuck of unsigned bytes which live in a memory mapped/DLL file.
+ * @see ures_getString
+ * @see ures_getIntVector
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.0
+ */
+U_STABLE const uint8_t* U_EXPORT2 
+ures_getBinary(const UResourceBundle* resourceBundle, 
+               int32_t* len, 
+               UErrorCode* status);
+
+/**
+ * Returns a 32 bit integer array from a resource. 
+ *
+ * @param resourceBundle an int vector resource
+ * @param len    fills in the length of resulting byte chunk
+ * @param status fills in the outgoing error code
+ *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ *                Always check the value of status. Don't count on returning NULL.
+ *                could be a non-failing error 
+ *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
+ * @see ures_getBinary
+ * @see ures_getString
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.0
+ */
+U_STABLE const int32_t* U_EXPORT2 
+ures_getIntVector(const UResourceBundle* resourceBundle, 
+                  int32_t* len, 
+                  UErrorCode* status);
+
+/**
+ * Returns an unsigned integer from a resource. 
+ * This integer is originally 28 bits.
+ *
+ * @param resourceBundle a string resource
+ * @param status fills in the outgoing error code
+ *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ *                could be a non-failing error 
+ *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return an integer value
+ * @see ures_getInt
+ * @see ures_getIntVector
+ * @see ures_getBinary
+ * @see ures_getString
+ * @stable ICU 2.0
+ */
+U_STABLE uint32_t U_EXPORT2 
+ures_getUInt(const UResourceBundle* resourceBundle, 
+             UErrorCode *status);
+
+/**
+ * Returns a signed integer from a resource. 
+ * This integer is originally 28 bit and the sign gets propagated.
+ *
+ * @param resourceBundle a string resource
+ * @param status  fills in the outgoing error code
+ *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ *                could be a non-failing error 
+ *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return an integer value
+ * @see ures_getUInt
+ * @see ures_getIntVector
+ * @see ures_getBinary
+ * @see ures_getString
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ures_getInt(const UResourceBundle* resourceBundle, 
+            UErrorCode *status);
+
+/**
+ * Returns the size of a resource. Size for scalar types is always 1, 
+ * and for vector/table types is the number of child resources.
+ * @warning Integer array is treated as a scalar type. There are no 
+ *          APIs to access individual members of an integer array. It
+ *          is always returned as a whole.
+ * @param resourceBundle a resource
+ * @return number of resources in a given resource.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+ures_getSize(const UResourceBundle *resourceBundle);
+
+/**
+ * Returns the type of a resource. Available types are defined in enum UResType
+ *
+ * @param resourceBundle a resource
+ * @return type of the given resource.
+ * @see UResType
+ * @stable ICU 2.0
+ */
+U_STABLE UResType U_EXPORT2 
+ures_getType(const UResourceBundle *resourceBundle);
+
+/**
+ * Returns the key associated with a given resource. Not all the resources have a key - only 
+ * those that are members of a table.
+ *
+ * @param resourceBundle a resource
+ * @return a key associated to this resource, or NULL if it doesn't have a key
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2 
+ures_getKey(const UResourceBundle *resourceBundle);
+
+/* ITERATION API 
+    This API provides means for iterating through a resource
+*/
+
+/**
+ * Resets the internal context of a resource so that iteration starts from the first element.
+ *
+ * @param resourceBundle a resource
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+ures_resetIterator(UResourceBundle *resourceBundle);
+
+/**
+ * Checks whether the given resource has another element to iterate over.
+ *
+ * @param resourceBundle a resource
+ * @return TRUE if there are more elements, FALSE if there is no more elements
+ * @stable ICU 2.0
+ */
+U_STABLE UBool U_EXPORT2 
+ures_hasNext(const UResourceBundle *resourceBundle);
+
+/**
+ * Returns the next resource in a given resource or NULL if there are no more resources 
+ * to iterate over. Features a fill-in parameter. 
+ *
+ * @param resourceBundle    a resource
+ * @param fillIn            if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
+ *                          Alternatively, you can supply a struct to be filled by this function.
+ * @param status            fills in the outgoing error code. You may still get a non NULL result even if an
+ *                          error occured. Check status instead.
+ * @return                  a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
+ * @stable ICU 2.0
+ */
+U_STABLE UResourceBundle* U_EXPORT2 
+ures_getNextResource(UResourceBundle *resourceBundle, 
+                     UResourceBundle *fillIn, 
+                     UErrorCode *status);
+
+/**
+ * Returns the next string in a given resource or NULL if there are no more resources 
+ * to iterate over. 
+ *
+ * @param resourceBundle    a resource
+ * @param len               fill in length of the string
+ * @param key               fill in for key associated with this string. NULL if no key
+ * @param status            fills in the outgoing error code. If an error occured, we may return NULL, but don't
+ *                          count on it. Check status instead!
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+U_STABLE const UChar* U_EXPORT2 
+ures_getNextString(UResourceBundle *resourceBundle, 
+                   int32_t* len, 
+                   const char ** key, 
+                   UErrorCode *status);
+
+/**
+ * Returns the resource in a given resource at the specified index. Features a fill-in parameter. 
+ *
+ * @param resourceBundle    the resource bundle from which to get a sub-resource
+ * @param indexR            an index to the wanted resource.
+ * @param fillIn            if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
+ *                          Alternatively, you can supply a struct to be filled by this function.
+ * @param status            fills in the outgoing error code. Don't count on NULL being returned if an error has
+ *                          occured. Check status instead.
+ * @return                  a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
+ * @stable ICU 2.0
+ */
+U_STABLE UResourceBundle* U_EXPORT2 
+ures_getByIndex(const UResourceBundle *resourceBundle, 
+                int32_t indexR, 
+                UResourceBundle *fillIn, 
+                UErrorCode *status);
+
+/**
+ * Returns the string in a given resource at the specified index.
+ *
+ * @param resourceBundle    a resource
+ * @param indexS            an index to the wanted string.
+ * @param len               fill in length of the string
+ * @param status            fills in the outgoing error code. If an error occured, we may return NULL, but don't
+ *                          count on it. Check status instead!
+ * @return                  a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+U_STABLE const UChar* U_EXPORT2 
+ures_getStringByIndex(const UResourceBundle *resourceBundle, 
+                      int32_t indexS, 
+                      int32_t* len, 
+                      UErrorCode *status);
+
+/**
+ * Returns a UTF-8 string from a resource at the specified index.
+ * The UTF-8 string may be returnable directly as a pointer, or
+ * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
+ * or equivalent.
+ *
+ * If forceCopy==TRUE, then the string is always written to the dest buffer
+ * and dest is returned.
+ *
+ * If forceCopy==FALSE, then the string is returned as a pointer if possible,
+ * without needing a dest buffer (it can be NULL). If the string needs to be
+ * copied or transformed, then it may be placed into dest at an arbitrary offset.
+ *
+ * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
+ *
+ * If the string is transformed from UTF-16, then a conversion error may occur
+ * if an unpaired surrogate is encountered. If the function is successful, then
+ * the output UTF-8 string is always well-formed.
+ *
+ * @param resB Resource bundle.
+ * @param index An index to the wanted string.
+ * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
+ * @param pLength Input: Capacity of destination buffer.
+ *               Output: Actual length of the UTF-8 string, not counting the
+ *               terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
+ *               Can be NULL, meaning capacity=0 and the string length is not
+ *               returned to the caller.
+ * @param forceCopy If TRUE, then the output string will always be written to
+ *                  dest, with U_BUFFER_OVERFLOW_ERROR and
+ *                  U_STRING_NOT_TERMINATED_WARNING set if appropriate.
+ *                  If FALSE, then the dest buffer may or may not contain a
+ *                  copy of the string. dest may or may not be modified.
+ *                  If a copy needs to be written, then the UErrorCode parameter
+ *                  indicates overflow etc. as usual.
+ * @param status Pointer to a standard ICU error code. Its input value must
+ *               pass the U_SUCCESS() test, or else the function returns
+ *               immediately. Check for U_FAILURE() on output or use with
+ *               function chaining. (See User Guide for details.)
+ * @return The pointer to the UTF-8 string. It may be dest, or at some offset
+ *         from dest (only if !forceCopy), or in unrelated memory.
+ *         Always NUL-terminated unless the string was written to dest and
+ *         length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
+ *
+ * @see ures_getStringByIndex
+ * @see u_strToUTF8
+ * @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ures_getUTF8StringByIndex(const UResourceBundle *resB,
+                          int32_t index,
+                          char *dest, int32_t *pLength,
+                          UBool forceCopy,
+                          UErrorCode *status);
+
+/**
+ * Returns a resource in a given resource that has a given key. This procedure works only with table
+ * resources. Features a fill-in parameter. 
+ *
+ * @param resourceBundle    a resource
+ * @param key               a key associated with the wanted resource
+ * @param fillIn            if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
+ *                          Alternatively, you can supply a struct to be filled by this function.
+ * @param status            fills in the outgoing error code.
+ * @return                  a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
+ * @stable ICU 2.0
+ */
+U_STABLE UResourceBundle* U_EXPORT2 
+ures_getByKey(const UResourceBundle *resourceBundle, 
+              const char* key, 
+              UResourceBundle *fillIn, 
+              UErrorCode *status);
+
+/**
+ * Returns a string in a given resource that has a given key. This procedure works only with table
+ * resources. 
+ *
+ * @param resB              a resource
+ * @param key               a key associated with the wanted string
+ * @param len               fill in length of the string
+ * @param status            fills in the outgoing error code. If an error occured, we may return NULL, but don't
+ *                          count on it. Check status instead!
+ * @return                  a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+U_STABLE const UChar* U_EXPORT2 
+ures_getStringByKey(const UResourceBundle *resB, 
+                    const char* key, 
+                    int32_t* len, 
+                    UErrorCode *status);
+
+/**
+ * Returns a UTF-8 string from a resource and a key.
+ * This function works only with table resources.
+ *
+ * The UTF-8 string may be returnable directly as a pointer, or
+ * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
+ * or equivalent.
+ *
+ * If forceCopy==TRUE, then the string is always written to the dest buffer
+ * and dest is returned.
+ *
+ * If forceCopy==FALSE, then the string is returned as a pointer if possible,
+ * without needing a dest buffer (it can be NULL). If the string needs to be
+ * copied or transformed, then it may be placed into dest at an arbitrary offset.
+ *
+ * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
+ *
+ * If the string is transformed from UTF-16, then a conversion error may occur
+ * if an unpaired surrogate is encountered. If the function is successful, then
+ * the output UTF-8 string is always well-formed.
+ *
+ * @param resB Resource bundle.
+ * @param key  A key associated with the wanted resource
+ * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
+ * @param pLength Input: Capacity of destination buffer.
+ *               Output: Actual length of the UTF-8 string, not counting the
+ *               terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
+ *               Can be NULL, meaning capacity=0 and the string length is not
+ *               returned to the caller.
+ * @param forceCopy If TRUE, then the output string will always be written to
+ *                  dest, with U_BUFFER_OVERFLOW_ERROR and
+ *                  U_STRING_NOT_TERMINATED_WARNING set if appropriate.
+ *                  If FALSE, then the dest buffer may or may not contain a
+ *                  copy of the string. dest may or may not be modified.
+ *                  If a copy needs to be written, then the UErrorCode parameter
+ *                  indicates overflow etc. as usual.
+ * @param status Pointer to a standard ICU error code. Its input value must
+ *               pass the U_SUCCESS() test, or else the function returns
+ *               immediately. Check for U_FAILURE() on output or use with
+ *               function chaining. (See User Guide for details.)
+ * @return The pointer to the UTF-8 string. It may be dest, or at some offset
+ *         from dest (only if !forceCopy), or in unrelated memory.
+ *         Always NUL-terminated unless the string was written to dest and
+ *         length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
+ *
+ * @see ures_getStringByKey
+ * @see u_strToUTF8
+ * @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ures_getUTF8StringByKey(const UResourceBundle *resB,
+                        const char *key,
+                        char *dest, int32_t *pLength,
+                        UBool forceCopy,
+                        UErrorCode *status);
+
+#ifdef XP_CPLUSPLUS
+#include "unicode/unistr.h"
+
+U_NAMESPACE_BEGIN
+/**
+ * returns a string from a string resource type
+ *
+ * @param resB    a resource
+ * @param status: fills in the outgoing error code
+ *                could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ *                could be a non-failing error 
+ *                e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return        a UnicodeString object. If there is an error, string is bogus
+ * @stable ICU 2.0
+ */
+inline UnicodeString 
+ures_getUnicodeString(const UResourceBundle *resB, 
+                      UErrorCode* status) 
+{
+    int32_t len = 0;
+    const UChar *r = ures_getString(resB, &len, status);
+    return UnicodeString(TRUE, r, len);
+}
+
+/**
+ * Returns the next string in a resource or NULL if there are no more resources 
+ * to iterate over. 
+ *
+ * @param resB              a resource
+ * @param key               fill in for key associated with this string
+ * @param status            fills in the outgoing error code
+ * @return an UnicodeString object.
+ * @stable ICU 2.0
+ */
+inline UnicodeString 
+ures_getNextUnicodeString(UResourceBundle *resB, 
+                          const char ** key, 
+                          UErrorCode* status) 
+{
+    int32_t len = 0;
+    const UChar* r = ures_getNextString(resB, &len, key, status);
+    return UnicodeString(TRUE, r, len);
+}
+
+/**
+ * Returns the string in a given resource at the specified index.
+ *
+ * @param resB              a resource
+ * @param index             an index to the wanted string.
+ * @param status            fills in the outgoing error code
+ * @return                  an UnicodeString object. If there is an error, string is bogus
+ * @stable ICU 2.0
+ */
+inline UnicodeString 
+ures_getUnicodeStringByIndex(const UResourceBundle *resB, 
+                             int32_t indexS, 
+                             UErrorCode* status) 
+{
+    int32_t len = 0;
+    const UChar* r = ures_getStringByIndex(resB, indexS, &len, status);
+    return UnicodeString(TRUE, r, len);
+}
+
+/**
+ * Returns a string in a resource that has a given key. This procedure works only with table
+ * resources. 
+ *
+ * @param resB              a resource
+ * @param key               a key associated with the wanted string
+ * @param status            fills in the outgoing error code
+ * @return                  an UnicodeString object. If there is an error, string is bogus
+ * @stable ICU 2.0
+ */
+inline UnicodeString 
+ures_getUnicodeStringByKey(const UResourceBundle *resB, 
+                           const char* key, 
+                           UErrorCode* status) 
+{
+    int32_t len = 0;
+    const UChar* r = ures_getStringByKey(resB, key, &len, status);
+    return UnicodeString(TRUE, r, len);
+}
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Create a string enumerator, owned by the caller, of all locales located within 
+ * the specified resource tree.
+ * @param packageName name of the tree, such as (NULL) or U_ICUDATA_ALIAS or  or "ICUDATA-coll"
+ * This call is similar to uloc_getAvailable().
+ * @param status error code
+ * @stable ICU 3.2
+ */
+U_STABLE UEnumeration* U_EXPORT2
+ures_openAvailableLocales(const char *packageName, UErrorCode *status);
+
+
+#endif /*_URES*/
+/*eof*/

Deleted: MacRuby/trunk/icu-1060/unicode/uscript.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uscript.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uscript.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,254 +0,0 @@
-/*
- **********************************************************************
- *   Copyright (C) 1997-2008, International Business Machines
- *   Corporation and others.  All Rights Reserved.
- **********************************************************************
- *
- * File USCRIPT.H
- *
- * Modification History:
- *
- *   Date        Name        Description
- *   07/06/2001    Ram         Creation.
- ******************************************************************************
- */
-
-#ifndef USCRIPT_H
-#define USCRIPT_H
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief C API: Unicode Script Information
- */
- 
-/**
- * Constants for ISO 15924 script codes.
- *
- * Many of these script codes - those from Unicode's ScriptNames.txt -
- * are character property values for Unicode's Script property.
- * See UAX #24 Script Names (http://www.unicode.org/reports/tr24/).
- *
- * Starting with ICU 3.6, constants for most ISO 15924 script codes
- * are included (currently excluding private-use codes Qaaa..Qabx).
- * For scripts for which there are codes in ISO 15924 but which are not
- * used in the Unicode Character Database (UCD), there are no Unicode characters
- * associated with those scripts.
- *
- * For example, there are no characters that have a UCD script code of
- * Hans or Hant. All Han ideographs have the Hani script code.
- * The Hans and Hant script codes are used with CLDR data.
- *
- * ISO 15924 script codes are included for use with CLDR and similar.
- *
- * @stable ICU 2.2
- */
-typedef enum UScriptCode {
-      USCRIPT_INVALID_CODE = -1,
-      USCRIPT_COMMON       =  0 , /* Zyyy */
-      USCRIPT_INHERITED    =  1,  /* Qaai */
-      USCRIPT_ARABIC       =  2,  /* Arab */
-      USCRIPT_ARMENIAN     =  3,  /* Armn */
-      USCRIPT_BENGALI      =  4,  /* Beng */
-      USCRIPT_BOPOMOFO     =  5,  /* Bopo */
-      USCRIPT_CHEROKEE     =  6,  /* Cher */
-      USCRIPT_COPTIC       =  7,  /* Copt */
-      USCRIPT_CYRILLIC     =  8,  /* Cyrl */
-      USCRIPT_DESERET      =  9,  /* Dsrt */
-      USCRIPT_DEVANAGARI   = 10,  /* Deva */
-      USCRIPT_ETHIOPIC     = 11,  /* Ethi */
-      USCRIPT_GEORGIAN     = 12,  /* Geor */
-      USCRIPT_GOTHIC       = 13,  /* Goth */
-      USCRIPT_GREEK        = 14,  /* Grek */
-      USCRIPT_GUJARATI     = 15,  /* Gujr */
-      USCRIPT_GURMUKHI     = 16,  /* Guru */
-      USCRIPT_HAN          = 17,  /* Hani */
-      USCRIPT_HANGUL       = 18,  /* Hang */
-      USCRIPT_HEBREW       = 19,  /* Hebr */
-      USCRIPT_HIRAGANA     = 20,  /* Hira */
-      USCRIPT_KANNADA      = 21,  /* Knda */
-      USCRIPT_KATAKANA     = 22,  /* Kana */
-      USCRIPT_KHMER        = 23,  /* Khmr */
-      USCRIPT_LAO          = 24,  /* Laoo */
-      USCRIPT_LATIN        = 25,  /* Latn */
-      USCRIPT_MALAYALAM    = 26,  /* Mlym */
-      USCRIPT_MONGOLIAN    = 27,  /* Mong */
-      USCRIPT_MYANMAR      = 28,  /* Mymr */
-      USCRIPT_OGHAM        = 29,  /* Ogam */
-      USCRIPT_OLD_ITALIC   = 30,  /* Ital */
-      USCRIPT_ORIYA        = 31,  /* Orya */
-      USCRIPT_RUNIC        = 32,  /* Runr */
-      USCRIPT_SINHALA      = 33,  /* Sinh */
-      USCRIPT_SYRIAC       = 34,  /* Syrc */
-      USCRIPT_TAMIL        = 35,  /* Taml */
-      USCRIPT_TELUGU       = 36,  /* Telu */
-      USCRIPT_THAANA       = 37,  /* Thaa */
-      USCRIPT_THAI         = 38,  /* Thai */
-      USCRIPT_TIBETAN      = 39,  /* Tibt */
-      /** Canadian_Aboriginal script. @stable ICU 2.6 */
-      USCRIPT_CANADIAN_ABORIGINAL = 40,  /* Cans */
-      /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
-      USCRIPT_UCAS         = USCRIPT_CANADIAN_ABORIGINAL,
-      USCRIPT_YI           = 41,  /* Yiii */
-      USCRIPT_TAGALOG      = 42,  /* Tglg */
-      USCRIPT_HANUNOO      = 43,  /* Hano */
-      USCRIPT_BUHID        = 44,  /* Buhd */
-      USCRIPT_TAGBANWA     = 45,  /* Tagb */
-
-      /* New scripts in Unicode 4 @stable ICU 2.6 */
-      USCRIPT_BRAILLE      = 46,  /* Brai */
-      USCRIPT_CYPRIOT      = 47,  /* Cprt */
-      USCRIPT_LIMBU        = 48,  /* Limb */
-      USCRIPT_LINEAR_B     = 49,  /* Linb */
-      USCRIPT_OSMANYA      = 50,  /* Osma */
-      USCRIPT_SHAVIAN      = 51,  /* Shaw */
-      USCRIPT_TAI_LE       = 52,  /* Tale */
-      USCRIPT_UGARITIC     = 53,  /* Ugar */
-
-      /** New script code in Unicode 4.0.1 @stable ICU 3.0 */
-      USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
-      
-      /* New scripts in Unicode 4.1 @stable ICU 3.4 */
-      USCRIPT_BUGINESE      = 55, /* Bugi */
-      USCRIPT_GLAGOLITIC    = 56, /* Glag */
-      USCRIPT_KHAROSHTHI    = 57, /* Khar */
-      USCRIPT_SYLOTI_NAGRI  = 58, /* Sylo */
-      USCRIPT_NEW_TAI_LUE   = 59, /* Talu */
-      USCRIPT_TIFINAGH      = 60, /* Tfng */
-      USCRIPT_OLD_PERSIAN   = 61, /* Xpeo */
-
-      /* New script codes from ISO 15924 @stable ICU 3.6 */
-      USCRIPT_BALINESE                      = 62, /* Bali */
-      USCRIPT_BATAK                         = 63, /* Batk */
-      USCRIPT_BLISSYMBOLS                   = 64, /* Blis */
-      USCRIPT_BRAHMI                        = 65, /* Brah */
-      USCRIPT_CHAM                          = 66, /* Cham */
-      USCRIPT_CIRTH                         = 67, /* Cirt */
-      USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC  = 68, /* Cyrs */
-      USCRIPT_DEMOTIC_EGYPTIAN              = 69, /* Egyd */
-      USCRIPT_HIERATIC_EGYPTIAN             = 70, /* Egyh */
-      USCRIPT_EGYPTIAN_HIEROGLYPHS          = 71, /* Egyp */
-      USCRIPT_KHUTSURI                      = 72, /* Geok */
-      USCRIPT_SIMPLIFIED_HAN                = 73, /* Hans */
-      USCRIPT_TRADITIONAL_HAN               = 74, /* Hant */
-      USCRIPT_PAHAWH_HMONG                  = 75, /* Hmng */
-      USCRIPT_OLD_HUNGARIAN                 = 76, /* Hung */
-      USCRIPT_HARAPPAN_INDUS                = 77, /* Inds */
-      USCRIPT_JAVANESE                      = 78, /* Java */
-      USCRIPT_KAYAH_LI                      = 79, /* Kali */
-      USCRIPT_LATIN_FRAKTUR                 = 80, /* Latf */
-      USCRIPT_LATIN_GAELIC                  = 81, /* Latg */
-      USCRIPT_LEPCHA                        = 82, /* Lepc */
-      USCRIPT_LINEAR_A                      = 83, /* Lina */
-      USCRIPT_MANDAEAN                      = 84, /* Mand */
-      USCRIPT_MAYAN_HIEROGLYPHS             = 85, /* Maya */
-      USCRIPT_MEROITIC                      = 86, /* Mero */
-      USCRIPT_NKO                           = 87, /* Nkoo */
-      USCRIPT_ORKHON                        = 88, /* Orkh */
-      USCRIPT_OLD_PERMIC                    = 89, /* Perm */
-      USCRIPT_PHAGS_PA                      = 90, /* Phag */
-      USCRIPT_PHOENICIAN                    = 91, /* Phnx */
-      USCRIPT_PHONETIC_POLLARD              = 92, /* Plrd */
-      USCRIPT_RONGORONGO                    = 93, /* Roro */
-      USCRIPT_SARATI                        = 94, /* Sara */
-      USCRIPT_ESTRANGELO_SYRIAC             = 95, /* Syre */
-      USCRIPT_WESTERN_SYRIAC                = 96, /* Syrj */
-      USCRIPT_EASTERN_SYRIAC                = 97, /* Syrn */
-      USCRIPT_TENGWAR                       = 98, /* Teng */
-      USCRIPT_VAI                           = 99, /* Vaii */
-      USCRIPT_VISIBLE_SPEECH                = 100, /* Visp */
-      USCRIPT_CUNEIFORM                     = 101,/* Xsux */
-      USCRIPT_UNWRITTEN_LANGUAGES           = 102,/* Zxxx */
-      USCRIPT_UNKNOWN                       = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
-
-      /* New script codes from ISO 15924 @stable ICU 4.0 */
-      USCRIPT_CARIAN                        = 104,/* Cari */
-      USCRIPT_JAPANESE                      = 105,/* Jpan */
-      USCRIPT_LANNA                         = 106,/* Lana */
-      USCRIPT_LYCIAN                        = 107,/* Lyci */
-      USCRIPT_LYDIAN                        = 108,/* Lydi */
-      USCRIPT_OL_CHIKI                      = 109,/* Olck */
-      USCRIPT_REJANG                        = 110,/* Rjng */
-      USCRIPT_SAURASHTRA                    = 111,/* Saur */
-      USCRIPT_SIGN_WRITING                  = 112,/* Sgnw */
-      USCRIPT_SUNDANESE                     = 113,/* Sund */
-      USCRIPT_MOON                          = 114,/* Moon */
-      USCRIPT_MEITEI_MAYEK                  = 115,/* Mtei */
-
-      /* New script codes from ISO 15924 @draft ICU 4.0 */
-      USCRIPT_IMPERIAL_ARAMAIC              = 116,/* Armi */
-      USCRIPT_AVESTAN                       = 117,/* Avst */
-      USCRIPT_CHAKMA                        = 118,/* Cakm */
-      USCRIPT_KOREAN                        = 119,/* Kore */
-      USCRIPT_KAITHI                        = 120,/* Kthi */
-      USCRIPT_MANICHAEAN                    = 121,/* Mani */
-      USCRIPT_INSCRIPTIONAL_PAHLAVI         = 122,/* Phli */
-      USCRIPT_PSALTER_PAHLAVI               = 123,/* Phlp */
-      USCRIPT_BOOK_PAHLAVI                  = 124,/* Phlv */
-      USCRIPT_INSCRIPTIONAL_PARTHIAN        = 125,/* Prti */
-      USCRIPT_SAMARITAN                     = 126,/* Samr */
-      USCRIPT_TAI_VIET                      = 127,/* Tavt */
-      USCRIPT_MATHEMATICAL_NOTATION         = 128,/* Zmth */
-      USCRIPT_SYMBOLS                       = 129,/* Zsym */
-
-      /* Private use codes from Qaaa - Qabx are not supported*/
-      USCRIPT_CODE_LIMIT    = 130
-} UScriptCode;
-
-/**
- * Gets script codes associated with the given locale or ISO 15924 abbreviation or name. 
- * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
- * Fills in USCRIPT_LATIN given "en" OR "en_US" 
- * If required capacity is greater than capacity of the destination buffer then the error code
- * is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned
- *
- * <p>Note: To search by short or long script alias only, use
- * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead.  This does
- * a fast lookup with no access of the locale data.
- * @param nameOrAbbrOrLocale name of the script, as given in
- * PropertyValueAliases.txt, or ISO 15924 code or locale
- * @param fillIn the UScriptCode buffer to fill in the script code
- * @param capacity the capacity (size) fo UScriptCode buffer passed in.
- * @param err the error status code.
- * @return The number of script codes filled in the buffer passed in 
- * @stable ICU 2.4
- */
-U_STABLE int32_t  U_EXPORT2 
-uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
-
-/**
- * Gets a script name associated with the given script code. 
- * Returns  "Malayam" given USCRIPT_MALAYALAM
- * @param scriptCode UScriptCode enum
- * @return script long name as given in
- * PropertyValueAliases.txt, or NULL if scriptCode is invalid
- * @stable ICU 2.4
- */
-U_STABLE const char*  U_EXPORT2 
-uscript_getName(UScriptCode scriptCode);
-
-/**
- * Gets a script name associated with the given script code. 
- * Returns  "Mlym" given USCRIPT_MALAYALAM
- * @param scriptCode UScriptCode enum
- * @return script abbreviated name as given in
- * PropertyValueAliases.txt, or NULL if scriptCode is invalid
- * @stable ICU 2.4
- */
-U_STABLE const char*  U_EXPORT2 
-uscript_getShortName(UScriptCode scriptCode);
-
-/** 
- * Gets the script code associated with the given codepoint.
- * Returns USCRIPT_MALAYALAM given 0x0D02 
- * @param codepoint UChar32 codepoint
- * @param err the error status code.
- * @return The UScriptCode, or 0 if codepoint is invalid 
- * @stable ICU 2.4
- */
-U_STABLE UScriptCode  U_EXPORT2 
-uscript_getScript(UChar32 codepoint, UErrorCode *err);
-
-#endif
-
-

Copied: MacRuby/trunk/icu-1060/unicode/uscript.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uscript.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uscript.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uscript.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,254 @@
+/*
+ **********************************************************************
+ *   Copyright (C) 1997-2008, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ *
+ * File USCRIPT.H
+ *
+ * Modification History:
+ *
+ *   Date        Name        Description
+ *   07/06/2001    Ram         Creation.
+ ******************************************************************************
+ */
+
+#ifndef USCRIPT_H
+#define USCRIPT_H
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Unicode Script Information
+ */
+ 
+/**
+ * Constants for ISO 15924 script codes.
+ *
+ * Many of these script codes - those from Unicode's ScriptNames.txt -
+ * are character property values for Unicode's Script property.
+ * See UAX #24 Script Names (http://www.unicode.org/reports/tr24/).
+ *
+ * Starting with ICU 3.6, constants for most ISO 15924 script codes
+ * are included (currently excluding private-use codes Qaaa..Qabx).
+ * For scripts for which there are codes in ISO 15924 but which are not
+ * used in the Unicode Character Database (UCD), there are no Unicode characters
+ * associated with those scripts.
+ *
+ * For example, there are no characters that have a UCD script code of
+ * Hans or Hant. All Han ideographs have the Hani script code.
+ * The Hans and Hant script codes are used with CLDR data.
+ *
+ * ISO 15924 script codes are included for use with CLDR and similar.
+ *
+ * @stable ICU 2.2
+ */
+typedef enum UScriptCode {
+      USCRIPT_INVALID_CODE = -1,
+      USCRIPT_COMMON       =  0 , /* Zyyy */
+      USCRIPT_INHERITED    =  1,  /* Qaai */
+      USCRIPT_ARABIC       =  2,  /* Arab */
+      USCRIPT_ARMENIAN     =  3,  /* Armn */
+      USCRIPT_BENGALI      =  4,  /* Beng */
+      USCRIPT_BOPOMOFO     =  5,  /* Bopo */
+      USCRIPT_CHEROKEE     =  6,  /* Cher */
+      USCRIPT_COPTIC       =  7,  /* Copt */
+      USCRIPT_CYRILLIC     =  8,  /* Cyrl */
+      USCRIPT_DESERET      =  9,  /* Dsrt */
+      USCRIPT_DEVANAGARI   = 10,  /* Deva */
+      USCRIPT_ETHIOPIC     = 11,  /* Ethi */
+      USCRIPT_GEORGIAN     = 12,  /* Geor */
+      USCRIPT_GOTHIC       = 13,  /* Goth */
+      USCRIPT_GREEK        = 14,  /* Grek */
+      USCRIPT_GUJARATI     = 15,  /* Gujr */
+      USCRIPT_GURMUKHI     = 16,  /* Guru */
+      USCRIPT_HAN          = 17,  /* Hani */
+      USCRIPT_HANGUL       = 18,  /* Hang */
+      USCRIPT_HEBREW       = 19,  /* Hebr */
+      USCRIPT_HIRAGANA     = 20,  /* Hira */
+      USCRIPT_KANNADA      = 21,  /* Knda */
+      USCRIPT_KATAKANA     = 22,  /* Kana */
+      USCRIPT_KHMER        = 23,  /* Khmr */
+      USCRIPT_LAO          = 24,  /* Laoo */
+      USCRIPT_LATIN        = 25,  /* Latn */
+      USCRIPT_MALAYALAM    = 26,  /* Mlym */
+      USCRIPT_MONGOLIAN    = 27,  /* Mong */
+      USCRIPT_MYANMAR      = 28,  /* Mymr */
+      USCRIPT_OGHAM        = 29,  /* Ogam */
+      USCRIPT_OLD_ITALIC   = 30,  /* Ital */
+      USCRIPT_ORIYA        = 31,  /* Orya */
+      USCRIPT_RUNIC        = 32,  /* Runr */
+      USCRIPT_SINHALA      = 33,  /* Sinh */
+      USCRIPT_SYRIAC       = 34,  /* Syrc */
+      USCRIPT_TAMIL        = 35,  /* Taml */
+      USCRIPT_TELUGU       = 36,  /* Telu */
+      USCRIPT_THAANA       = 37,  /* Thaa */
+      USCRIPT_THAI         = 38,  /* Thai */
+      USCRIPT_TIBETAN      = 39,  /* Tibt */
+      /** Canadian_Aboriginal script. @stable ICU 2.6 */
+      USCRIPT_CANADIAN_ABORIGINAL = 40,  /* Cans */
+      /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
+      USCRIPT_UCAS         = USCRIPT_CANADIAN_ABORIGINAL,
+      USCRIPT_YI           = 41,  /* Yiii */
+      USCRIPT_TAGALOG      = 42,  /* Tglg */
+      USCRIPT_HANUNOO      = 43,  /* Hano */
+      USCRIPT_BUHID        = 44,  /* Buhd */
+      USCRIPT_TAGBANWA     = 45,  /* Tagb */
+
+      /* New scripts in Unicode 4 @stable ICU 2.6 */
+      USCRIPT_BRAILLE      = 46,  /* Brai */
+      USCRIPT_CYPRIOT      = 47,  /* Cprt */
+      USCRIPT_LIMBU        = 48,  /* Limb */
+      USCRIPT_LINEAR_B     = 49,  /* Linb */
+      USCRIPT_OSMANYA      = 50,  /* Osma */
+      USCRIPT_SHAVIAN      = 51,  /* Shaw */
+      USCRIPT_TAI_LE       = 52,  /* Tale */
+      USCRIPT_UGARITIC     = 53,  /* Ugar */
+
+      /** New script code in Unicode 4.0.1 @stable ICU 3.0 */
+      USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
+      
+      /* New scripts in Unicode 4.1 @stable ICU 3.4 */
+      USCRIPT_BUGINESE      = 55, /* Bugi */
+      USCRIPT_GLAGOLITIC    = 56, /* Glag */
+      USCRIPT_KHAROSHTHI    = 57, /* Khar */
+      USCRIPT_SYLOTI_NAGRI  = 58, /* Sylo */
+      USCRIPT_NEW_TAI_LUE   = 59, /* Talu */
+      USCRIPT_TIFINAGH      = 60, /* Tfng */
+      USCRIPT_OLD_PERSIAN   = 61, /* Xpeo */
+
+      /* New script codes from ISO 15924 @stable ICU 3.6 */
+      USCRIPT_BALINESE                      = 62, /* Bali */
+      USCRIPT_BATAK                         = 63, /* Batk */
+      USCRIPT_BLISSYMBOLS                   = 64, /* Blis */
+      USCRIPT_BRAHMI                        = 65, /* Brah */
+      USCRIPT_CHAM                          = 66, /* Cham */
+      USCRIPT_CIRTH                         = 67, /* Cirt */
+      USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC  = 68, /* Cyrs */
+      USCRIPT_DEMOTIC_EGYPTIAN              = 69, /* Egyd */
+      USCRIPT_HIERATIC_EGYPTIAN             = 70, /* Egyh */
+      USCRIPT_EGYPTIAN_HIEROGLYPHS          = 71, /* Egyp */
+      USCRIPT_KHUTSURI                      = 72, /* Geok */
+      USCRIPT_SIMPLIFIED_HAN                = 73, /* Hans */
+      USCRIPT_TRADITIONAL_HAN               = 74, /* Hant */
+      USCRIPT_PAHAWH_HMONG                  = 75, /* Hmng */
+      USCRIPT_OLD_HUNGARIAN                 = 76, /* Hung */
+      USCRIPT_HARAPPAN_INDUS                = 77, /* Inds */
+      USCRIPT_JAVANESE                      = 78, /* Java */
+      USCRIPT_KAYAH_LI                      = 79, /* Kali */
+      USCRIPT_LATIN_FRAKTUR                 = 80, /* Latf */
+      USCRIPT_LATIN_GAELIC                  = 81, /* Latg */
+      USCRIPT_LEPCHA                        = 82, /* Lepc */
+      USCRIPT_LINEAR_A                      = 83, /* Lina */
+      USCRIPT_MANDAEAN                      = 84, /* Mand */
+      USCRIPT_MAYAN_HIEROGLYPHS             = 85, /* Maya */
+      USCRIPT_MEROITIC                      = 86, /* Mero */
+      USCRIPT_NKO                           = 87, /* Nkoo */
+      USCRIPT_ORKHON                        = 88, /* Orkh */
+      USCRIPT_OLD_PERMIC                    = 89, /* Perm */
+      USCRIPT_PHAGS_PA                      = 90, /* Phag */
+      USCRIPT_PHOENICIAN                    = 91, /* Phnx */
+      USCRIPT_PHONETIC_POLLARD              = 92, /* Plrd */
+      USCRIPT_RONGORONGO                    = 93, /* Roro */
+      USCRIPT_SARATI                        = 94, /* Sara */
+      USCRIPT_ESTRANGELO_SYRIAC             = 95, /* Syre */
+      USCRIPT_WESTERN_SYRIAC                = 96, /* Syrj */
+      USCRIPT_EASTERN_SYRIAC                = 97, /* Syrn */
+      USCRIPT_TENGWAR                       = 98, /* Teng */
+      USCRIPT_VAI                           = 99, /* Vaii */
+      USCRIPT_VISIBLE_SPEECH                = 100, /* Visp */
+      USCRIPT_CUNEIFORM                     = 101,/* Xsux */
+      USCRIPT_UNWRITTEN_LANGUAGES           = 102,/* Zxxx */
+      USCRIPT_UNKNOWN                       = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
+
+      /* New script codes from ISO 15924 @stable ICU 4.0 */
+      USCRIPT_CARIAN                        = 104,/* Cari */
+      USCRIPT_JAPANESE                      = 105,/* Jpan */
+      USCRIPT_LANNA                         = 106,/* Lana */
+      USCRIPT_LYCIAN                        = 107,/* Lyci */
+      USCRIPT_LYDIAN                        = 108,/* Lydi */
+      USCRIPT_OL_CHIKI                      = 109,/* Olck */
+      USCRIPT_REJANG                        = 110,/* Rjng */
+      USCRIPT_SAURASHTRA                    = 111,/* Saur */
+      USCRIPT_SIGN_WRITING                  = 112,/* Sgnw */
+      USCRIPT_SUNDANESE                     = 113,/* Sund */
+      USCRIPT_MOON                          = 114,/* Moon */
+      USCRIPT_MEITEI_MAYEK                  = 115,/* Mtei */
+
+      /* New script codes from ISO 15924 @draft ICU 4.0 */
+      USCRIPT_IMPERIAL_ARAMAIC              = 116,/* Armi */
+      USCRIPT_AVESTAN                       = 117,/* Avst */
+      USCRIPT_CHAKMA                        = 118,/* Cakm */
+      USCRIPT_KOREAN                        = 119,/* Kore */
+      USCRIPT_KAITHI                        = 120,/* Kthi */
+      USCRIPT_MANICHAEAN                    = 121,/* Mani */
+      USCRIPT_INSCRIPTIONAL_PAHLAVI         = 122,/* Phli */
+      USCRIPT_PSALTER_PAHLAVI               = 123,/* Phlp */
+      USCRIPT_BOOK_PAHLAVI                  = 124,/* Phlv */
+      USCRIPT_INSCRIPTIONAL_PARTHIAN        = 125,/* Prti */
+      USCRIPT_SAMARITAN                     = 126,/* Samr */
+      USCRIPT_TAI_VIET                      = 127,/* Tavt */
+      USCRIPT_MATHEMATICAL_NOTATION         = 128,/* Zmth */
+      USCRIPT_SYMBOLS                       = 129,/* Zsym */
+
+      /* Private use codes from Qaaa - Qabx are not supported*/
+      USCRIPT_CODE_LIMIT    = 130
+} UScriptCode;
+
+/**
+ * Gets script codes associated with the given locale or ISO 15924 abbreviation or name. 
+ * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
+ * Fills in USCRIPT_LATIN given "en" OR "en_US" 
+ * If required capacity is greater than capacity of the destination buffer then the error code
+ * is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned
+ *
+ * <p>Note: To search by short or long script alias only, use
+ * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead.  This does
+ * a fast lookup with no access of the locale data.
+ * @param nameOrAbbrOrLocale name of the script, as given in
+ * PropertyValueAliases.txt, or ISO 15924 code or locale
+ * @param fillIn the UScriptCode buffer to fill in the script code
+ * @param capacity the capacity (size) fo UScriptCode buffer passed in.
+ * @param err the error status code.
+ * @return The number of script codes filled in the buffer passed in 
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t  U_EXPORT2 
+uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
+
+/**
+ * Gets a script name associated with the given script code. 
+ * Returns  "Malayam" given USCRIPT_MALAYALAM
+ * @param scriptCode UScriptCode enum
+ * @return script long name as given in
+ * PropertyValueAliases.txt, or NULL if scriptCode is invalid
+ * @stable ICU 2.4
+ */
+U_STABLE const char*  U_EXPORT2 
+uscript_getName(UScriptCode scriptCode);
+
+/**
+ * Gets a script name associated with the given script code. 
+ * Returns  "Mlym" given USCRIPT_MALAYALAM
+ * @param scriptCode UScriptCode enum
+ * @return script abbreviated name as given in
+ * PropertyValueAliases.txt, or NULL if scriptCode is invalid
+ * @stable ICU 2.4
+ */
+U_STABLE const char*  U_EXPORT2 
+uscript_getShortName(UScriptCode scriptCode);
+
+/** 
+ * Gets the script code associated with the given codepoint.
+ * Returns USCRIPT_MALAYALAM given 0x0D02 
+ * @param codepoint UChar32 codepoint
+ * @param err the error status code.
+ * @return The UScriptCode, or 0 if codepoint is invalid 
+ * @stable ICU 2.4
+ */
+U_STABLE UScriptCode  U_EXPORT2 
+uscript_getScript(UChar32 codepoint, UErrorCode *err);
+
+#endif
+
+

Deleted: MacRuby/trunk/icu-1060/unicode/usearch.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/usearch.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/usearch.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,766 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
-**********************************************************************
-*   Date        Name        Description
-*  06/28/2001   synwee      Creation.
-**********************************************************************
-*/
-#ifndef USEARCH_H
-#define USEARCH_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/ucol.h"
-#include "unicode/ucoleitr.h"
-#include "unicode/ubrk.h"
-
-/**
- * \file
- * \brief C API: StringSearch
- *
- * C Apis for an engine that provides language-sensitive text searching based 
- * on the comparison rules defined in a <tt>UCollator</tt> data struct,
- * see <tt>ucol.h</tt>. This ensures that language eccentricity can be 
- * handled, e.g. for the German collator, characters &szlig; and SS will be matched 
- * if case is chosen to be ignored. 
- * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
- * "ICU Collation Design Document"</a> for more information.
- * <p> 
- * The algorithm implemented is a modified form of the Boyer Moore's search.
- * For more information  see 
- * <a href="http://icu-project.org/docs/papers/efficient_text_searching_in_java.html">
- * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i> 
- * in February, 1999, for further information on the algorithm.
- * <p>
- * There are 2 match options for selection:<br>
- * Let S' be the sub-string of a text string S between the offsets start and 
- * end <start, end>.
- * <br>
- * A pattern string P matches a text string S at the offsets <start, end> 
- * if
- * <pre> 
- * option 1. Some canonical equivalent of P matches some canonical equivalent 
- *           of S'
- * option 2. P matches S' and if P starts or ends with a combining mark, 
- *           there exists no non-ignorable combining mark before or after S' 
- *           in S respectively. 
- * </pre>
- * Option 2. will be the default.
- * <p>
- * This search has APIs similar to that of other text iteration mechanisms 
- * such as the break iterators in <tt>ubrk.h</tt>. Using these 
- * APIs, it is easy to scan through text looking for all occurances of 
- * a given pattern. This search iterator allows changing of direction by 
- * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>. 
- * Though a direction change can occur without calling <tt>reset</tt> first,  
- * this operation comes with some speed penalty.
- * Generally, match results in the forward direction will match the result 
- * matches in the backwards direction in the reverse order
- * <p>
- * <tt>usearch.h</tt> provides APIs to specify the starting position 
- * within the text string to be searched, e.g. <tt>usearch_setOffset</tt>,
- * <tt>usearch_preceding</tt> and <tt>usearch_following</tt>. Since the 
- * starting position will be set as it is specified, please take note that 
- * there are some dangerous positions which the search may render incorrect 
- * results:
- * <ul>
- * <li> The midst of a substring that requires normalization.
- * <li> If the following match is to be found, the position should not be the
- *      second character which requires to be swapped with the preceding 
- *      character. Vice versa, if the preceding match is to be found, 
- *      position to search from should not be the first character which 
- *      requires to be swapped with the next character. E.g certain Thai and
- *      Lao characters require swapping.
- * <li> If a following pattern match is to be found, any position within a 
- *      contracting sequence except the first will fail. Vice versa if a 
- *      preceding pattern match is to be found, a invalid starting point 
- *      would be any character within a contracting sequence except the last.
- * </ul>
- * <p>
- * A breakiterator can be used if only matches at logical breaks are desired.
- * Using a breakiterator will only give you results that exactly matches the
- * boundaries given by the breakiterator. For instance the pattern "e" will
- * not be found in the string "\u00e9" if a character break iterator is used.
- * <p>
- * Options are provided to handle overlapping matches. 
- * E.g. In English, overlapping matches produces the result 0 and 2 
- * for the pattern "abab" in the text "ababab", where else mutually 
- * exclusive matches only produce the result of 0.
- * <p>
- * Though collator attributes will be taken into consideration while 
- * performing matches, there are no APIs here for setting and getting the 
- * attributes. These attributes can be set by getting the collator
- * from <tt>usearch_getCollator</tt> and using the APIs in <tt>ucol.h</tt>.
- * Lastly to update String Search to the new collator attributes, 
- * usearch_reset() has to be called.
- * <p> 
- * Restriction: <br>
- * Currently there are no composite characters that consists of a
- * character with combining class > 0 before a character with combining 
- * class == 0. However, if such a character exists in the future, the 
- * search mechanism does not guarantee the results for option 1.
- * 
- * <p>
- * Example of use:<br>
- * <pre><code>
- * char *tgtstr = "The quick brown fox jumped over the lazy fox";
- * char *patstr = "fox";
- * UChar target[64];
- * UChar pattern[16];
- * UErrorCode status = U_ZERO_ERROR;
- * u_uastrcpy(target, tgtstr);
- * u_uastrcpy(pattern, patstr);
- *
- * UStringSearch *search = usearch_open(pattern, -1, target, -1, "en_US", 
- *                                  NULL, &status);
- * if (U_SUCCESS(status)) {
- *     for (int pos = usearch_first(search, &status); 
- *          pos != USEARCH_DONE; 
- *          pos = usearch_next(search, &status))
- *     {
- *         printf("Found match at %d pos, length is %d\n", pos, 
- *                                        usearch_getMatchLength(search));
- *     }
- * }
- *
- * usearch_close(search);
- * </code></pre>
- * @stable ICU 2.4
- */
-
-/**
-* DONE is returned by previous() and next() after all valid matches have 
-* been returned, and by first() and last() if there are no matches at all.
-* @stable ICU 2.4
-*/
-#define USEARCH_DONE -1
-
-/**
-* Data structure for searching
-* @stable ICU 2.4
-*/
-struct UStringSearch;
-/**
-* Data structure for searching
-* @stable ICU 2.4
-*/
-typedef struct UStringSearch UStringSearch;
-
-/**
-* @stable ICU 2.4
-*/
-typedef enum {
-    /** Option for overlapping matches */
-    USEARCH_OVERLAP,
-    /** 
-    Option for canonical matches. option 1 in header documentation.
-    The default value will be USEARCH_OFF
-    */
-    USEARCH_CANONICAL_MATCH,
-    USEARCH_ATTRIBUTE_COUNT
-} USearchAttribute;
-
-/**
-* @stable ICU 2.4
-*/
-typedef enum {
-    /** default value for any USearchAttribute */
-    USEARCH_DEFAULT = -1,
-    /** value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH */
-    USEARCH_OFF, 
-    /** value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH */
-    USEARCH_ON,
-    USEARCH_ATTRIBUTE_VALUE_COUNT
-} USearchAttributeValue;
-
-/* open and close ------------------------------------------------------ */
-
-/**
-* Creating a search iterator data struct using the argument locale language
-* rule set. A collator will be created in the process, which will be owned by
-* this search and will be deleted in <tt>usearch_close</tt>.
-* @param pattern for matching
-* @param patternlength length of the pattern, -1 for null-termination
-* @param text text string
-* @param textlength length of the text string, -1 for null-termination
-* @param locale name of locale for the rules to be used
-* @param breakiter A BreakIterator that will be used to restrict the points
-*                  at which matches are detected. If a match is found, but 
-*                  the match's start or end index is not a boundary as 
-*                  determined by the <tt>BreakIterator</tt>, the match will 
-*                  be rejected and another will be searched for. 
-*                  If this parameter is <tt>NULL</tt>, no break detection is 
-*                  attempted.
-* @param status for errors if it occurs. If pattern or text is NULL, or if
-*               patternlength or textlength is 0 then an 
-*               U_ILLEGAL_ARGUMENT_ERROR is returned.
-* @return search iterator data structure, or NULL if there is an error.
-* @stable ICU 2.4
-*/
-U_STABLE UStringSearch * U_EXPORT2 usearch_open(const UChar          *pattern, 
-                                              int32_t         patternlength, 
-                                        const UChar          *text, 
-                                              int32_t         textlength,
-                                        const char           *locale,
-                                              UBreakIterator *breakiter,
-                                              UErrorCode     *status);
-
-/**
-* Creating a search iterator data struct using the argument collator language
-* rule set. Note, user retains the ownership of this collator, thus the 
-* responsibility of deletion lies with the user.
-* NOTE: string search cannot be instantiated from a collator that has 
-* collate digits as numbers (CODAN) turned on.
-* @param pattern for matching
-* @param patternlength length of the pattern, -1 for null-termination
-* @param text text string
-* @param textlength length of the text string, -1 for null-termination
-* @param collator used for the language rules
-* @param breakiter A BreakIterator that will be used to restrict the points
-*                  at which matches are detected. If a match is found, but 
-*                  the match's start or end index is not a boundary as 
-*                  determined by the <tt>BreakIterator</tt>, the match will 
-*                  be rejected and another will be searched for. 
-*                  If this parameter is <tt>NULL</tt>, no break detection is 
-*                  attempted.
-* @param status for errors if it occurs. If collator, pattern or text is NULL, 
-*               or if patternlength or textlength is 0 then an 
-*               U_ILLEGAL_ARGUMENT_ERROR is returned.
-* @return search iterator data structure, or NULL if there is an error.
-* @stable ICU 2.4
-*/
-U_STABLE UStringSearch * U_EXPORT2 usearch_openFromCollator(
-                                         const UChar *pattern, 
-                                               int32_t         patternlength,
-                                         const UChar          *text, 
-                                               int32_t         textlength,
-                                         const UCollator      *collator,
-                                               UBreakIterator *breakiter,
-                                               UErrorCode     *status);
-
-/**
-* Destroying and cleaning up the search iterator data struct.
-* If a collator is created in <tt>usearch_open</tt>, it will be destroyed here.
-* @param searchiter data struct to clean up
-* @stable ICU 2.4
-*/
-U_STABLE void U_EXPORT2 usearch_close(UStringSearch *searchiter);
-
-/* get and set methods -------------------------------------------------- */
-
-/**
-* Sets the current position in the text string which the next search will 
-* start from. Clears previous states. 
-* This method takes the argument index and sets the position in the text 
-* string accordingly without checking if the index is pointing to a 
-* valid starting point to begin searching. 
-* Search positions that may render incorrect results are highlighted in the
-* header comments
-* @param strsrch search iterator data struct
-* @param position position to start next search from. If position is less
-*          than or greater than the text range for searching, 
-*          an U_INDEX_OUTOFBOUNDS_ERROR will be returned
-* @param status error status if any.
-* @stable ICU 2.4
-*/
-U_STABLE void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, 
-                                        int32_t    position,
-                                        UErrorCode    *status);
-
-/**
-* Return the current index in the string text being searched.
-* If the iteration has gone past the end of the text (or past the beginning 
-* for a backwards search), <tt>USEARCH_DONE</tt> is returned.
-* @param strsrch search iterator data struct
-* @see #USEARCH_DONE
-* @stable ICU 2.4
-*/
-U_STABLE int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch);
-    
-/**
-* Sets the text searching attributes located in the enum USearchAttribute
-* with values from the enum USearchAttributeValue.
-* <tt>USEARCH_DEFAULT</tt> can be used for all attributes for resetting.
-* @param strsrch search iterator data struct
-* @param attribute text attribute to be set
-* @param value text attribute value
-* @param status for errors if it occurs
-* @see #usearch_getAttribute
-* @stable ICU 2.4
-*/
-U_STABLE void U_EXPORT2 usearch_setAttribute(UStringSearch         *strsrch, 
-                                           USearchAttribute       attribute,
-                                           USearchAttributeValue  value,
-                                           UErrorCode            *status);
-
-/**    
-* Gets the text searching attributes.
-* @param strsrch search iterator data struct
-* @param attribute text attribute to be retrieve
-* @return text attribute value
-* @see #usearch_setAttribute
-* @stable ICU 2.4
-*/
-U_STABLE USearchAttributeValue U_EXPORT2 usearch_getAttribute(
-                                         const UStringSearch    *strsrch,
-                                               USearchAttribute  attribute);
-
-/**
-* Returns the index to the match in the text string that was searched.
-* This call returns a valid result only after a successful call to 
-* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>, 
-* or <tt>usearch_last</tt>.
-* Just after construction, or after a searching method returns 
-* <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
-* <p>
-* Use <tt>usearch_getMatchedLength</tt> to get the matched string length.
-* @param strsrch search iterator data struct
-* @return index to a substring within the text string that is being 
-*         searched.
-* @see #usearch_first
-* @see #usearch_next
-* @see #usearch_previous
-* @see #usearch_last
-* @see #USEARCH_DONE
-* @stable ICU 2.4
-*/
-U_STABLE int32_t U_EXPORT2 usearch_getMatchedStart(
-                                               const UStringSearch *strsrch);
-    
-/**
-* Returns the length of text in the string which matches the search pattern. 
-* This call returns a valid result only after a successful call to 
-* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>, 
-* or <tt>usearch_last</tt>.
-* Just after construction, or after a searching method returns 
-* <tt>USEARCH_DONE</tt>, this method will return 0.
-* @param strsrch search iterator data struct
-* @return The length of the match in the string text, or 0 if there is no 
-*         match currently.
-* @see #usearch_first
-* @see #usearch_next
-* @see #usearch_previous
-* @see #usearch_last
-* @see #USEARCH_DONE
-* @stable ICU 2.4
-*/
-U_STABLE int32_t U_EXPORT2 usearch_getMatchedLength(
-                                               const UStringSearch *strsrch);
-
-/**
-* Returns the text that was matched by the most recent call to 
-* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>, 
-* or <tt>usearch_last</tt>.
-* If the iterator is not pointing at a valid match (e.g. just after 
-* construction or after <tt>USEARCH_DONE</tt> has been returned, returns
-* an empty string. If result is not large enough to store the matched text,
-* result will be filled with the partial text and an U_BUFFER_OVERFLOW_ERROR 
-* will be returned in status. result will be null-terminated whenever 
-* possible. If the buffer fits the matched text exactly, a null-termination 
-* is not possible, then a U_STRING_NOT_TERMINATED_ERROR set in status.
-* Pre-flighting can be either done with length = 0 or the API 
-* <tt>usearch_getMatchLength</tt>.
-* @param strsrch search iterator data struct
-* @param result UChar buffer to store the matched string
-* @param resultCapacity length of the result buffer
-* @param status error returned if result is not large enough
-* @return exact length of the matched text, not counting the null-termination
-* @see #usearch_first
-* @see #usearch_next
-* @see #usearch_previous
-* @see #usearch_last
-* @see #USEARCH_DONE
-* @stable ICU 2.4
-*/
-U_STABLE int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch, 
-                                            UChar         *result, 
-                                            int32_t        resultCapacity, 
-                                            UErrorCode    *status);
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-/**
-* Set the BreakIterator that will be used to restrict the points at which 
-* matches are detected.
-* @param strsrch search iterator data struct
-* @param breakiter A BreakIterator that will be used to restrict the points
-*                  at which matches are detected. If a match is found, but 
-*                  the match's start or end index is not a boundary as 
-*                  determined by the <tt>BreakIterator</tt>, the match will 
-*                  be rejected and another will be searched for. 
-*                  If this parameter is <tt>NULL</tt>, no break detection is 
-*                  attempted.
-* @param status for errors if it occurs
-* @see #usearch_getBreakIterator
-* @stable ICU 2.4
-*/
-U_STABLE void U_EXPORT2 usearch_setBreakIterator(UStringSearch  *strsrch, 
-                                               UBreakIterator *breakiter,
-                                               UErrorCode     *status);
-
-/**
-* Returns the BreakIterator that is used to restrict the points at which 
-* matches are detected. This will be the same object that was passed to the 
-* constructor or to <tt>usearch_setBreakIterator</tt>. Note that 
-* <tt>NULL</tt> 
-* is a legal value; it means that break detection should not be attempted.
-* @param strsrch search iterator data struct
-* @return break iterator used
-* @see #usearch_setBreakIterator
-* @stable ICU 2.4
-*/
-U_STABLE const UBreakIterator * U_EXPORT2 usearch_getBreakIterator(
-                                              const UStringSearch *strsrch);
-    
-#endif
-    
-/**
-* Set the string text to be searched. Text iteration will hence begin at the 
-* start of the text string. This method is useful if you want to re-use an 
-* iterator to search for the same pattern within a different body of text.
-* @param strsrch search iterator data struct
-* @param text new string to look for match
-* @param textlength length of the new string, -1 for null-termination
-* @param status for errors if it occurs. If text is NULL, or textlength is 0 
-*               then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
-*               done to strsrch.
-* @see #usearch_getText
-* @stable ICU 2.4
-*/
-U_STABLE void U_EXPORT2 usearch_setText(      UStringSearch *strsrch, 
-                                      const UChar         *text,
-                                            int32_t        textlength,
-                                            UErrorCode    *status);
-
-/**
-* Return the string text to be searched.
-* @param strsrch search iterator data struct
-* @param length returned string text length
-* @return string text 
-* @see #usearch_setText
-* @stable ICU 2.4
-*/
-U_STABLE const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch, 
-                                               int32_t       *length);
-
-/**
-* Gets the collator used for the language rules. 
-* <p>
-* Deleting the returned <tt>UCollator</tt> before calling 
-* <tt>usearch_close</tt> would cause the string search to fail.
-* <tt>usearch_close</tt> will delete the collator if this search owns it.
-* @param strsrch search iterator data struct
-* @return collator
-* @stable ICU 2.4
-*/
-U_STABLE UCollator * U_EXPORT2 usearch_getCollator(
-                                               const UStringSearch *strsrch);
-
-/**
-* Sets the collator used for the language rules. User retains the ownership 
-* of this collator, thus the responsibility of deletion lies with the user.
-* This method causes internal data such as Boyer-Moore shift tables to  
-* be recalculated, but the iterator's position is unchanged.
-* @param strsrch search iterator data struct
-* @param collator to be used
-* @param status for errors if it occurs
-* @stable ICU 2.4
-*/
-U_STABLE void U_EXPORT2 usearch_setCollator(      UStringSearch *strsrch, 
-                                          const UCollator     *collator,
-                                                UErrorCode    *status);
-
-/**
-* Sets the pattern used for matching.
-* Internal data like the Boyer Moore table will be recalculated, but the 
-* iterator's position is unchanged.
-* @param strsrch search iterator data struct
-* @param pattern string
-* @param patternlength pattern length, -1 for null-terminated string
-* @param status for errors if it occurs. If text is NULL, or textlength is 0 
-*               then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
-*               done to strsrch.
-* @stable ICU 2.4
-*/
-U_STABLE void U_EXPORT2 usearch_setPattern(      UStringSearch *strsrch, 
-                                         const UChar         *pattern,
-                                               int32_t        patternlength,
-                                               UErrorCode    *status);
-
-/**
-* Gets the search pattern
-* @param strsrch search iterator data struct
-* @param length return length of the pattern, -1 indicates that the pattern 
-*               is null-terminated
-* @return pattern string
-* @stable ICU 2.4
-*/
-U_STABLE const UChar * U_EXPORT2 usearch_getPattern(
-                                               const UStringSearch *strsrch, 
-                                                     int32_t       *length);
-
-/* methods ------------------------------------------------------------- */
-
-/**
-* Returns the first index at which the string text matches the search 
-* pattern.  
-* The iterator is adjusted so that its current index (as returned by 
-* <tt>usearch_getOffset</tt>) is the match position if one was found.
-* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
-* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
-* @param strsrch search iterator data struct
-* @param status for errors if it occurs
-* @return The character index of the first match, or 
-* <tt>USEARCH_DONE</tt> if there are no matches.
-* @see #usearch_getOffset
-* @see #USEARCH_DONE
-* @stable ICU 2.4
-*/
-U_STABLE int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch, 
-                                           UErrorCode    *status);
-
-/**
-* Returns the first index greater than <tt>position</tt> at which the string 
-* text 
-* matches the search pattern. The iterator is adjusted so that its current 
-* index (as returned by <tt>usearch_getOffset</tt>) is the match position if 
-* one was found.
-* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
-* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
-* <p>
-* Search positions that may render incorrect results are highlighted in the
-* header comments. If position is less than or greater than the text range 
-* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned
-* @param strsrch search iterator data struct
-* @param position to start the search at
-* @param status for errors if it occurs
-* @return The character index of the first match following <tt>pos</tt>,
-*         or <tt>USEARCH_DONE</tt> if there are no matches.
-* @see #usearch_getOffset
-* @see #USEARCH_DONE
-* @stable ICU 2.4
-*/
-U_STABLE int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch, 
-                                               int32_t    position, 
-                                               UErrorCode    *status);
-    
-/**
-* Returns the last index in the target text at which it matches the search 
-* pattern. The iterator is adjusted so that its current 
-* index (as returned by <tt>usearch_getOffset</tt>) is the match position if 
-* one was found.
-* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
-* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
-* @param strsrch search iterator data struct
-* @param status for errors if it occurs
-* @return The index of the first match, or <tt>USEARCH_DONE</tt> if there 
-*         are no matches.
-* @see #usearch_getOffset
-* @see #USEARCH_DONE
-* @stable ICU 2.4
-*/
-U_STABLE int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch, 
-                                          UErrorCode    *status);
-
-/**
-* Returns the first index less than <tt>position</tt> at which the string text 
-* matches the search pattern. The iterator is adjusted so that its current 
-* index (as returned by <tt>usearch_getOffset</tt>) is the match position if 
-* one was found.
-* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
-* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
-* <p>
-* Search positions that may render incorrect results are highlighted in the
-* header comments. If position is less than or greater than the text range 
-* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned
-* @param strsrch search iterator data struct
-* @param position index position the search is to begin at
-* @param status for errors if it occurs
-* @return The character index of the first match preceding <tt>pos</tt>,
-*         or <tt>USEARCH_DONE</tt> if there are no matches.
-* @see #usearch_getOffset
-* @see #USEARCH_DONE
-* @stable ICU 2.4
-*/
-U_STABLE int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch, 
-                                               int32_t    position, 
-                                               UErrorCode    *status);
-    
-/**
-* Returns the index of the next point at which the string text matches the
-* search pattern, starting from the current position.
-* The iterator is adjusted so that its current 
-* index (as returned by <tt>usearch_getOffset</tt>) is the match position if 
-* one was found.
-* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
-* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
-* @param strsrch search iterator data struct
-* @param status for errors if it occurs
-* @return The index of the next match after the current position, or 
-*         <tt>USEARCH_DONE</tt> if there are no more matches.
-* @see #usearch_first
-* @see #usearch_getOffset
-* @see #USEARCH_DONE
-* @stable ICU 2.4
-*/
-U_STABLE int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch, 
-                                          UErrorCode    *status);
-
-/**
-* Returns the index of the previous point at which the string text matches
-* the search pattern, starting at the current position.
-* The iterator is adjusted so that its current 
-* index (as returned by <tt>usearch_getOffset</tt>) is the match position if 
-* one was found.
-* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
-* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
-* @param strsrch search iterator data struct
-* @param status for errors if it occurs
-* @return The index of the previous match before the current position,
-*         or <tt>USEARCH_DONE</tt> if there are no more matches.
-* @see #usearch_last
-* @see #usearch_getOffset
-* @see #USEARCH_DONE
-* @stable ICU 2.4
-*/
-U_STABLE int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch, 
-                                              UErrorCode    *status);
-    
-/** 
-* Reset the iteration.
-* Search will begin at the start of the text string if a forward iteration 
-* is initiated before a backwards iteration. Otherwise if a backwards 
-* iteration is initiated before a forwards iteration, the search will begin
-* at the end of the text string.
-* @param strsrch search iterator data struct
-* @see #usearch_first
-* @stable ICU 2.4
-*/
-U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
-
-/**
-  *  Simple forward search for the pattern, starting at a specified index,
-  *     and using using a default set search options.
-  *
-  *  This is an experimental function, and is not an official part of the
-  *      ICU API.
-  *
-  *  The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored.
-  *
-  *  The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and
-  *  any Break Iterator are ignored.
-  *
-  *  Matches obey the following constraints:
-  *
-  *      Characters at the start or end positions of a match that are ignorable
-  *      for collation are not included as part of the match, unless they
-  *      are part of a combining sequence, as described below.
-  *
-  *      A match will not include a partial combining sequence.  Combining
-  *      character sequences  are considered to be  inseperable units,
-  *      and either match the pattern completely, or are considered to not match
-  *      at all.  Thus, for example, an A followed a combining accent mark will 
-  *      not be found when searching for a plain (unaccented) A.   (unless
-  *      the collation strength has been set to ignore all accents).
-  *
-  *      When beginning a search, the initial starting position, startIdx,
-  *      is assumed to be an acceptable match boundary with respect to
-  *      combining characters.  A combining sequence that spans across the
-  *      starting point will not supress a match beginning at startIdx.
-  *
-  *      Characters that expand to multiple collation elements
-  *      (German sharp-S becoming 'ss', or the composed forms of accented
-  *      characters, for example) also must match completely.
-  *      Searching for a single 's' in a string containing only a sharp-s will 
-  *      find no match.
-  *
-  *
-  *  @param strsrch    the UStringSearch struct, which references both
-  *                    the text to be searched  and the pattern being sought.
-  *  @param startIdx   The index into the text to begin the search.
-  *  @param matchStart An out parameter, the starting index of the matched text.
-  *                    This parameter may be NULL.
-  *                    A value of -1 will be returned if no match was found.
-  *  @param matchLimit Out parameter, the index of the first position following the matched text.
-  *                    The matchLimit will be at a suitable position for beginning a subsequent search
-  *                    in the input text.
-  *                    This parameter may be NULL.
-  *                    A value of -1 will be returned if no match was found.
-  *          
-  *  @param status     Report any errors.  Note that no match found is not an error.
-  *  @return           TRUE if a match was found, FALSE otherwise.
-  *
-  *  @internal
-  */
-U_INTERNAL UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
-                                          int32_t        startIdx,
-                                          int32_t        *matchStart,
-                                          int32_t        *matchLimit,
-                                          UErrorCode     *status);
-
-/**
-  *  Simple backwards search for the pattern, starting at a specified index,
-  *     and using using a default set search options.
-  *
-  *  This is an experimental function, and is not an official part of the
-  *      ICU API.
-  *
-  *  The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored.
-  *
-  *  The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and
-  *  any Break Iterator are ignored.
-  *
-  *  Matches obey the following constraints:
-  *
-  *      Characters at the start or end positions of a match that are ignorable
-  *      for collation are not included as part of the match, unless they
-  *      are part of a combining sequence, as described below.
-  *
-  *      A match will not include a partial combining sequence.  Combining
-  *      character sequences  are considered to be  inseperable units,
-  *      and either match the pattern completely, or are considered to not match
-  *      at all.  Thus, for example, an A followed a combining accent mark will 
-  *      not be found when searching for a plain (unaccented) A.   (unless
-  *      the collation strength has been set to ignore all accents).
-  *
-  *      When beginning a search, the initial starting position, startIdx,
-  *      is assumed to be an acceptable match boundary with respect to
-  *      combining characters.  A combining sequence that spans across the
-  *      starting point will not supress a match beginning at startIdx.
-  *
-  *      Characters that expand to multiple collation elements
-  *      (German sharp-S becoming 'ss', or the composed forms of accented
-  *      characters, for example) also must match completely.
-  *      Searching for a single 's' in a string containing only a sharp-s will 
-  *      find no match.
-  *
-  *
-  *  @param strsrch    the UStringSearch struct, which references both
-  *                    the text to be searched  and the pattern being sought.
-  *  @param startIdx   The index into the text to begin the search.
-  *  @param matchStart An out parameter, the starting index of the matched text.
-  *                    This parameter may be NULL.
-  *                    A value of -1 will be returned if no match was found.
-  *  @param matchLimit Out parameter, the index of the first position following the matched text.
-  *                    The matchLimit will be at a suitable position for beginning a subsequent search
-  *                    in the input text.
-  *                    This parameter may be NULL.
-  *                    A value of -1 will be returned if no match was found.
-  *          
-  *  @param status     Report any errors.  Note that no match found is not an error.
-  *  @return           TRUE if a match was found, FALSE otherwise.
-  *
-  *  @internal
-  */
-U_INTERNAL UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
-                                                   int32_t        startIdx,
-                                                   int32_t        *matchStart,
-                                                   int32_t        *matchLimit,
-                                                   UErrorCode     *status);
-
-#endif /* #if !UCONFIG_NO_COLLATION  && !UCONFIG_NO_BREAK_ITERATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/usearch.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/usearch.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/usearch.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/usearch.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,766 @@
+/*
+**********************************************************************
+*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  06/28/2001   synwee      Creation.
+**********************************************************************
+*/
+#ifndef USEARCH_H
+#define USEARCH_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/ucol.h"
+#include "unicode/ucoleitr.h"
+#include "unicode/ubrk.h"
+
+/**
+ * \file
+ * \brief C API: StringSearch
+ *
+ * C Apis for an engine that provides language-sensitive text searching based 
+ * on the comparison rules defined in a <tt>UCollator</tt> data struct,
+ * see <tt>ucol.h</tt>. This ensures that language eccentricity can be 
+ * handled, e.g. for the German collator, characters &szlig; and SS will be matched 
+ * if case is chosen to be ignored. 
+ * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
+ * "ICU Collation Design Document"</a> for more information.
+ * <p> 
+ * The algorithm implemented is a modified form of the Boyer Moore's search.
+ * For more information  see 
+ * <a href="http://icu-project.org/docs/papers/efficient_text_searching_in_java.html">
+ * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i> 
+ * in February, 1999, for further information on the algorithm.
+ * <p>
+ * There are 2 match options for selection:<br>
+ * Let S' be the sub-string of a text string S between the offsets start and 
+ * end <start, end>.
+ * <br>
+ * A pattern string P matches a text string S at the offsets <start, end> 
+ * if
+ * <pre> 
+ * option 1. Some canonical equivalent of P matches some canonical equivalent 
+ *           of S'
+ * option 2. P matches S' and if P starts or ends with a combining mark, 
+ *           there exists no non-ignorable combining mark before or after S' 
+ *           in S respectively. 
+ * </pre>
+ * Option 2. will be the default.
+ * <p>
+ * This search has APIs similar to that of other text iteration mechanisms 
+ * such as the break iterators in <tt>ubrk.h</tt>. Using these 
+ * APIs, it is easy to scan through text looking for all occurances of 
+ * a given pattern. This search iterator allows changing of direction by 
+ * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>. 
+ * Though a direction change can occur without calling <tt>reset</tt> first,  
+ * this operation comes with some speed penalty.
+ * Generally, match results in the forward direction will match the result 
+ * matches in the backwards direction in the reverse order
+ * <p>
+ * <tt>usearch.h</tt> provides APIs to specify the starting position 
+ * within the text string to be searched, e.g. <tt>usearch_setOffset</tt>,
+ * <tt>usearch_preceding</tt> and <tt>usearch_following</tt>. Since the 
+ * starting position will be set as it is specified, please take note that 
+ * there are some dangerous positions which the search may render incorrect 
+ * results:
+ * <ul>
+ * <li> The midst of a substring that requires normalization.
+ * <li> If the following match is to be found, the position should not be the
+ *      second character which requires to be swapped with the preceding 
+ *      character. Vice versa, if the preceding match is to be found, 
+ *      position to search from should not be the first character which 
+ *      requires to be swapped with the next character. E.g certain Thai and
+ *      Lao characters require swapping.
+ * <li> If a following pattern match is to be found, any position within a 
+ *      contracting sequence except the first will fail. Vice versa if a 
+ *      preceding pattern match is to be found, a invalid starting point 
+ *      would be any character within a contracting sequence except the last.
+ * </ul>
+ * <p>
+ * A breakiterator can be used if only matches at logical breaks are desired.
+ * Using a breakiterator will only give you results that exactly matches the
+ * boundaries given by the breakiterator. For instance the pattern "e" will
+ * not be found in the string "\u00e9" if a character break iterator is used.
+ * <p>
+ * Options are provided to handle overlapping matches. 
+ * E.g. In English, overlapping matches produces the result 0 and 2 
+ * for the pattern "abab" in the text "ababab", where else mutually 
+ * exclusive matches only produce the result of 0.
+ * <p>
+ * Though collator attributes will be taken into consideration while 
+ * performing matches, there are no APIs here for setting and getting the 
+ * attributes. These attributes can be set by getting the collator
+ * from <tt>usearch_getCollator</tt> and using the APIs in <tt>ucol.h</tt>.
+ * Lastly to update String Search to the new collator attributes, 
+ * usearch_reset() has to be called.
+ * <p> 
+ * Restriction: <br>
+ * Currently there are no composite characters that consists of a
+ * character with combining class > 0 before a character with combining 
+ * class == 0. However, if such a character exists in the future, the 
+ * search mechanism does not guarantee the results for option 1.
+ * 
+ * <p>
+ * Example of use:<br>
+ * <pre><code>
+ * char *tgtstr = "The quick brown fox jumped over the lazy fox";
+ * char *patstr = "fox";
+ * UChar target[64];
+ * UChar pattern[16];
+ * UErrorCode status = U_ZERO_ERROR;
+ * u_uastrcpy(target, tgtstr);
+ * u_uastrcpy(pattern, patstr);
+ *
+ * UStringSearch *search = usearch_open(pattern, -1, target, -1, "en_US", 
+ *                                  NULL, &status);
+ * if (U_SUCCESS(status)) {
+ *     for (int pos = usearch_first(search, &status); 
+ *          pos != USEARCH_DONE; 
+ *          pos = usearch_next(search, &status))
+ *     {
+ *         printf("Found match at %d pos, length is %d\n", pos, 
+ *                                        usearch_getMatchLength(search));
+ *     }
+ * }
+ *
+ * usearch_close(search);
+ * </code></pre>
+ * @stable ICU 2.4
+ */
+
+/**
+* DONE is returned by previous() and next() after all valid matches have 
+* been returned, and by first() and last() if there are no matches at all.
+* @stable ICU 2.4
+*/
+#define USEARCH_DONE -1
+
+/**
+* Data structure for searching
+* @stable ICU 2.4
+*/
+struct UStringSearch;
+/**
+* Data structure for searching
+* @stable ICU 2.4
+*/
+typedef struct UStringSearch UStringSearch;
+
+/**
+* @stable ICU 2.4
+*/
+typedef enum {
+    /** Option for overlapping matches */
+    USEARCH_OVERLAP,
+    /** 
+    Option for canonical matches. option 1 in header documentation.
+    The default value will be USEARCH_OFF
+    */
+    USEARCH_CANONICAL_MATCH,
+    USEARCH_ATTRIBUTE_COUNT
+} USearchAttribute;
+
+/**
+* @stable ICU 2.4
+*/
+typedef enum {
+    /** default value for any USearchAttribute */
+    USEARCH_DEFAULT = -1,
+    /** value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH */
+    USEARCH_OFF, 
+    /** value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH */
+    USEARCH_ON,
+    USEARCH_ATTRIBUTE_VALUE_COUNT
+} USearchAttributeValue;
+
+/* open and close ------------------------------------------------------ */
+
+/**
+* Creating a search iterator data struct using the argument locale language
+* rule set. A collator will be created in the process, which will be owned by
+* this search and will be deleted in <tt>usearch_close</tt>.
+* @param pattern for matching
+* @param patternlength length of the pattern, -1 for null-termination
+* @param text text string
+* @param textlength length of the text string, -1 for null-termination
+* @param locale name of locale for the rules to be used
+* @param breakiter A BreakIterator that will be used to restrict the points
+*                  at which matches are detected. If a match is found, but 
+*                  the match's start or end index is not a boundary as 
+*                  determined by the <tt>BreakIterator</tt>, the match will 
+*                  be rejected and another will be searched for. 
+*                  If this parameter is <tt>NULL</tt>, no break detection is 
+*                  attempted.
+* @param status for errors if it occurs. If pattern or text is NULL, or if
+*               patternlength or textlength is 0 then an 
+*               U_ILLEGAL_ARGUMENT_ERROR is returned.
+* @return search iterator data structure, or NULL if there is an error.
+* @stable ICU 2.4
+*/
+U_STABLE UStringSearch * U_EXPORT2 usearch_open(const UChar          *pattern, 
+                                              int32_t         patternlength, 
+                                        const UChar          *text, 
+                                              int32_t         textlength,
+                                        const char           *locale,
+                                              UBreakIterator *breakiter,
+                                              UErrorCode     *status);
+
+/**
+* Creating a search iterator data struct using the argument collator language
+* rule set. Note, user retains the ownership of this collator, thus the 
+* responsibility of deletion lies with the user.
+* NOTE: string search cannot be instantiated from a collator that has 
+* collate digits as numbers (CODAN) turned on.
+* @param pattern for matching
+* @param patternlength length of the pattern, -1 for null-termination
+* @param text text string
+* @param textlength length of the text string, -1 for null-termination
+* @param collator used for the language rules
+* @param breakiter A BreakIterator that will be used to restrict the points
+*                  at which matches are detected. If a match is found, but 
+*                  the match's start or end index is not a boundary as 
+*                  determined by the <tt>BreakIterator</tt>, the match will 
+*                  be rejected and another will be searched for. 
+*                  If this parameter is <tt>NULL</tt>, no break detection is 
+*                  attempted.
+* @param status for errors if it occurs. If collator, pattern or text is NULL, 
+*               or if patternlength or textlength is 0 then an 
+*               U_ILLEGAL_ARGUMENT_ERROR is returned.
+* @return search iterator data structure, or NULL if there is an error.
+* @stable ICU 2.4
+*/
+U_STABLE UStringSearch * U_EXPORT2 usearch_openFromCollator(
+                                         const UChar *pattern, 
+                                               int32_t         patternlength,
+                                         const UChar          *text, 
+                                               int32_t         textlength,
+                                         const UCollator      *collator,
+                                               UBreakIterator *breakiter,
+                                               UErrorCode     *status);
+
+/**
+* Destroying and cleaning up the search iterator data struct.
+* If a collator is created in <tt>usearch_open</tt>, it will be destroyed here.
+* @param searchiter data struct to clean up
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_close(UStringSearch *searchiter);
+
+/* get and set methods -------------------------------------------------- */
+
+/**
+* Sets the current position in the text string which the next search will 
+* start from. Clears previous states. 
+* This method takes the argument index and sets the position in the text 
+* string accordingly without checking if the index is pointing to a 
+* valid starting point to begin searching. 
+* Search positions that may render incorrect results are highlighted in the
+* header comments
+* @param strsrch search iterator data struct
+* @param position position to start next search from. If position is less
+*          than or greater than the text range for searching, 
+*          an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+* @param status error status if any.
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, 
+                                        int32_t    position,
+                                        UErrorCode    *status);
+
+/**
+* Return the current index in the string text being searched.
+* If the iteration has gone past the end of the text (or past the beginning 
+* for a backwards search), <tt>USEARCH_DONE</tt> is returned.
+* @param strsrch search iterator data struct
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch);
+    
+/**
+* Sets the text searching attributes located in the enum USearchAttribute
+* with values from the enum USearchAttributeValue.
+* <tt>USEARCH_DEFAULT</tt> can be used for all attributes for resetting.
+* @param strsrch search iterator data struct
+* @param attribute text attribute to be set
+* @param value text attribute value
+* @param status for errors if it occurs
+* @see #usearch_getAttribute
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setAttribute(UStringSearch         *strsrch, 
+                                           USearchAttribute       attribute,
+                                           USearchAttributeValue  value,
+                                           UErrorCode            *status);
+
+/**    
+* Gets the text searching attributes.
+* @param strsrch search iterator data struct
+* @param attribute text attribute to be retrieve
+* @return text attribute value
+* @see #usearch_setAttribute
+* @stable ICU 2.4
+*/
+U_STABLE USearchAttributeValue U_EXPORT2 usearch_getAttribute(
+                                         const UStringSearch    *strsrch,
+                                               USearchAttribute  attribute);
+
+/**
+* Returns the index to the match in the text string that was searched.
+* This call returns a valid result only after a successful call to 
+* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>, 
+* or <tt>usearch_last</tt>.
+* Just after construction, or after a searching method returns 
+* <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
+* <p>
+* Use <tt>usearch_getMatchedLength</tt> to get the matched string length.
+* @param strsrch search iterator data struct
+* @return index to a substring within the text string that is being 
+*         searched.
+* @see #usearch_first
+* @see #usearch_next
+* @see #usearch_previous
+* @see #usearch_last
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_getMatchedStart(
+                                               const UStringSearch *strsrch);
+    
+/**
+* Returns the length of text in the string which matches the search pattern. 
+* This call returns a valid result only after a successful call to 
+* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>, 
+* or <tt>usearch_last</tt>.
+* Just after construction, or after a searching method returns 
+* <tt>USEARCH_DONE</tt>, this method will return 0.
+* @param strsrch search iterator data struct
+* @return The length of the match in the string text, or 0 if there is no 
+*         match currently.
+* @see #usearch_first
+* @see #usearch_next
+* @see #usearch_previous
+* @see #usearch_last
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_getMatchedLength(
+                                               const UStringSearch *strsrch);
+
+/**
+* Returns the text that was matched by the most recent call to 
+* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>, 
+* or <tt>usearch_last</tt>.
+* If the iterator is not pointing at a valid match (e.g. just after 
+* construction or after <tt>USEARCH_DONE</tt> has been returned, returns
+* an empty string. If result is not large enough to store the matched text,
+* result will be filled with the partial text and an U_BUFFER_OVERFLOW_ERROR 
+* will be returned in status. result will be null-terminated whenever 
+* possible. If the buffer fits the matched text exactly, a null-termination 
+* is not possible, then a U_STRING_NOT_TERMINATED_ERROR set in status.
+* Pre-flighting can be either done with length = 0 or the API 
+* <tt>usearch_getMatchLength</tt>.
+* @param strsrch search iterator data struct
+* @param result UChar buffer to store the matched string
+* @param resultCapacity length of the result buffer
+* @param status error returned if result is not large enough
+* @return exact length of the matched text, not counting the null-termination
+* @see #usearch_first
+* @see #usearch_next
+* @see #usearch_previous
+* @see #usearch_last
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch, 
+                                            UChar         *result, 
+                                            int32_t        resultCapacity, 
+                                            UErrorCode    *status);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+* Set the BreakIterator that will be used to restrict the points at which 
+* matches are detected.
+* @param strsrch search iterator data struct
+* @param breakiter A BreakIterator that will be used to restrict the points
+*                  at which matches are detected. If a match is found, but 
+*                  the match's start or end index is not a boundary as 
+*                  determined by the <tt>BreakIterator</tt>, the match will 
+*                  be rejected and another will be searched for. 
+*                  If this parameter is <tt>NULL</tt>, no break detection is 
+*                  attempted.
+* @param status for errors if it occurs
+* @see #usearch_getBreakIterator
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setBreakIterator(UStringSearch  *strsrch, 
+                                               UBreakIterator *breakiter,
+                                               UErrorCode     *status);
+
+/**
+* Returns the BreakIterator that is used to restrict the points at which 
+* matches are detected. This will be the same object that was passed to the 
+* constructor or to <tt>usearch_setBreakIterator</tt>. Note that 
+* <tt>NULL</tt> 
+* is a legal value; it means that break detection should not be attempted.
+* @param strsrch search iterator data struct
+* @return break iterator used
+* @see #usearch_setBreakIterator
+* @stable ICU 2.4
+*/
+U_STABLE const UBreakIterator * U_EXPORT2 usearch_getBreakIterator(
+                                              const UStringSearch *strsrch);
+    
+#endif
+    
+/**
+* Set the string text to be searched. Text iteration will hence begin at the 
+* start of the text string. This method is useful if you want to re-use an 
+* iterator to search for the same pattern within a different body of text.
+* @param strsrch search iterator data struct
+* @param text new string to look for match
+* @param textlength length of the new string, -1 for null-termination
+* @param status for errors if it occurs. If text is NULL, or textlength is 0 
+*               then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
+*               done to strsrch.
+* @see #usearch_getText
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setText(      UStringSearch *strsrch, 
+                                      const UChar         *text,
+                                            int32_t        textlength,
+                                            UErrorCode    *status);
+
+/**
+* Return the string text to be searched.
+* @param strsrch search iterator data struct
+* @param length returned string text length
+* @return string text 
+* @see #usearch_setText
+* @stable ICU 2.4
+*/
+U_STABLE const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch, 
+                                               int32_t       *length);
+
+/**
+* Gets the collator used for the language rules. 
+* <p>
+* Deleting the returned <tt>UCollator</tt> before calling 
+* <tt>usearch_close</tt> would cause the string search to fail.
+* <tt>usearch_close</tt> will delete the collator if this search owns it.
+* @param strsrch search iterator data struct
+* @return collator
+* @stable ICU 2.4
+*/
+U_STABLE UCollator * U_EXPORT2 usearch_getCollator(
+                                               const UStringSearch *strsrch);
+
+/**
+* Sets the collator used for the language rules. User retains the ownership 
+* of this collator, thus the responsibility of deletion lies with the user.
+* This method causes internal data such as Boyer-Moore shift tables to  
+* be recalculated, but the iterator's position is unchanged.
+* @param strsrch search iterator data struct
+* @param collator to be used
+* @param status for errors if it occurs
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setCollator(      UStringSearch *strsrch, 
+                                          const UCollator     *collator,
+                                                UErrorCode    *status);
+
+/**
+* Sets the pattern used for matching.
+* Internal data like the Boyer Moore table will be recalculated, but the 
+* iterator's position is unchanged.
+* @param strsrch search iterator data struct
+* @param pattern string
+* @param patternlength pattern length, -1 for null-terminated string
+* @param status for errors if it occurs. If text is NULL, or textlength is 0 
+*               then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
+*               done to strsrch.
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setPattern(      UStringSearch *strsrch, 
+                                         const UChar         *pattern,
+                                               int32_t        patternlength,
+                                               UErrorCode    *status);
+
+/**
+* Gets the search pattern
+* @param strsrch search iterator data struct
+* @param length return length of the pattern, -1 indicates that the pattern 
+*               is null-terminated
+* @return pattern string
+* @stable ICU 2.4
+*/
+U_STABLE const UChar * U_EXPORT2 usearch_getPattern(
+                                               const UStringSearch *strsrch, 
+                                                     int32_t       *length);
+
+/* methods ------------------------------------------------------------- */
+
+/**
+* Returns the first index at which the string text matches the search 
+* pattern.  
+* The iterator is adjusted so that its current index (as returned by 
+* <tt>usearch_getOffset</tt>) is the match position if one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The character index of the first match, or 
+* <tt>USEARCH_DONE</tt> if there are no matches.
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch, 
+                                           UErrorCode    *status);
+
+/**
+* Returns the first index greater than <tt>position</tt> at which the string 
+* text 
+* matches the search pattern. The iterator is adjusted so that its current 
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if 
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
+* <p>
+* Search positions that may render incorrect results are highlighted in the
+* header comments. If position is less than or greater than the text range 
+* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+* @param strsrch search iterator data struct
+* @param position to start the search at
+* @param status for errors if it occurs
+* @return The character index of the first match following <tt>pos</tt>,
+*         or <tt>USEARCH_DONE</tt> if there are no matches.
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch, 
+                                               int32_t    position, 
+                                               UErrorCode    *status);
+    
+/**
+* Returns the last index in the target text at which it matches the search 
+* pattern. The iterator is adjusted so that its current 
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if 
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The index of the first match, or <tt>USEARCH_DONE</tt> if there 
+*         are no matches.
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch, 
+                                          UErrorCode    *status);
+
+/**
+* Returns the first index less than <tt>position</tt> at which the string text 
+* matches the search pattern. The iterator is adjusted so that its current 
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if 
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
+* <p>
+* Search positions that may render incorrect results are highlighted in the
+* header comments. If position is less than or greater than the text range 
+* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+* @param strsrch search iterator data struct
+* @param position index position the search is to begin at
+* @param status for errors if it occurs
+* @return The character index of the first match preceding <tt>pos</tt>,
+*         or <tt>USEARCH_DONE</tt> if there are no matches.
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch, 
+                                               int32_t    position, 
+                                               UErrorCode    *status);
+    
+/**
+* Returns the index of the next point at which the string text matches the
+* search pattern, starting from the current position.
+* The iterator is adjusted so that its current 
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if 
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The index of the next match after the current position, or 
+*         <tt>USEARCH_DONE</tt> if there are no more matches.
+* @see #usearch_first
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch, 
+                                          UErrorCode    *status);
+
+/**
+* Returns the index of the previous point at which the string text matches
+* the search pattern, starting at the current position.
+* The iterator is adjusted so that its current 
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if 
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The index of the previous match before the current position,
+*         or <tt>USEARCH_DONE</tt> if there are no more matches.
+* @see #usearch_last
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch, 
+                                              UErrorCode    *status);
+    
+/** 
+* Reset the iteration.
+* Search will begin at the start of the text string if a forward iteration 
+* is initiated before a backwards iteration. Otherwise if a backwards 
+* iteration is initiated before a forwards iteration, the search will begin
+* at the end of the text string.
+* @param strsrch search iterator data struct
+* @see #usearch_first
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
+
+/**
+  *  Simple forward search for the pattern, starting at a specified index,
+  *     and using using a default set search options.
+  *
+  *  This is an experimental function, and is not an official part of the
+  *      ICU API.
+  *
+  *  The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored.
+  *
+  *  The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and
+  *  any Break Iterator are ignored.
+  *
+  *  Matches obey the following constraints:
+  *
+  *      Characters at the start or end positions of a match that are ignorable
+  *      for collation are not included as part of the match, unless they
+  *      are part of a combining sequence, as described below.
+  *
+  *      A match will not include a partial combining sequence.  Combining
+  *      character sequences  are considered to be  inseperable units,
+  *      and either match the pattern completely, or are considered to not match
+  *      at all.  Thus, for example, an A followed a combining accent mark will 
+  *      not be found when searching for a plain (unaccented) A.   (unless
+  *      the collation strength has been set to ignore all accents).
+  *
+  *      When beginning a search, the initial starting position, startIdx,
+  *      is assumed to be an acceptable match boundary with respect to
+  *      combining characters.  A combining sequence that spans across the
+  *      starting point will not supress a match beginning at startIdx.
+  *
+  *      Characters that expand to multiple collation elements
+  *      (German sharp-S becoming 'ss', or the composed forms of accented
+  *      characters, for example) also must match completely.
+  *      Searching for a single 's' in a string containing only a sharp-s will 
+  *      find no match.
+  *
+  *
+  *  @param strsrch    the UStringSearch struct, which references both
+  *                    the text to be searched  and the pattern being sought.
+  *  @param startIdx   The index into the text to begin the search.
+  *  @param matchStart An out parameter, the starting index of the matched text.
+  *                    This parameter may be NULL.
+  *                    A value of -1 will be returned if no match was found.
+  *  @param matchLimit Out parameter, the index of the first position following the matched text.
+  *                    The matchLimit will be at a suitable position for beginning a subsequent search
+  *                    in the input text.
+  *                    This parameter may be NULL.
+  *                    A value of -1 will be returned if no match was found.
+  *          
+  *  @param status     Report any errors.  Note that no match found is not an error.
+  *  @return           TRUE if a match was found, FALSE otherwise.
+  *
+  *  @internal
+  */
+U_INTERNAL UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
+                                          int32_t        startIdx,
+                                          int32_t        *matchStart,
+                                          int32_t        *matchLimit,
+                                          UErrorCode     *status);
+
+/**
+  *  Simple backwards search for the pattern, starting at a specified index,
+  *     and using using a default set search options.
+  *
+  *  This is an experimental function, and is not an official part of the
+  *      ICU API.
+  *
+  *  The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored.
+  *
+  *  The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and
+  *  any Break Iterator are ignored.
+  *
+  *  Matches obey the following constraints:
+  *
+  *      Characters at the start or end positions of a match that are ignorable
+  *      for collation are not included as part of the match, unless they
+  *      are part of a combining sequence, as described below.
+  *
+  *      A match will not include a partial combining sequence.  Combining
+  *      character sequences  are considered to be  inseperable units,
+  *      and either match the pattern completely, or are considered to not match
+  *      at all.  Thus, for example, an A followed a combining accent mark will 
+  *      not be found when searching for a plain (unaccented) A.   (unless
+  *      the collation strength has been set to ignore all accents).
+  *
+  *      When beginning a search, the initial starting position, startIdx,
+  *      is assumed to be an acceptable match boundary with respect to
+  *      combining characters.  A combining sequence that spans across the
+  *      starting point will not supress a match beginning at startIdx.
+  *
+  *      Characters that expand to multiple collation elements
+  *      (German sharp-S becoming 'ss', or the composed forms of accented
+  *      characters, for example) also must match completely.
+  *      Searching for a single 's' in a string containing only a sharp-s will 
+  *      find no match.
+  *
+  *
+  *  @param strsrch    the UStringSearch struct, which references both
+  *                    the text to be searched  and the pattern being sought.
+  *  @param startIdx   The index into the text to begin the search.
+  *  @param matchStart An out parameter, the starting index of the matched text.
+  *                    This parameter may be NULL.
+  *                    A value of -1 will be returned if no match was found.
+  *  @param matchLimit Out parameter, the index of the first position following the matched text.
+  *                    The matchLimit will be at a suitable position for beginning a subsequent search
+  *                    in the input text.
+  *                    This parameter may be NULL.
+  *                    A value of -1 will be returned if no match was found.
+  *          
+  *  @param status     Report any errors.  Note that no match found is not an error.
+  *  @return           TRUE if a match was found, FALSE otherwise.
+  *
+  *  @internal
+  */
+U_INTERNAL UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
+                                                   int32_t        startIdx,
+                                                   int32_t        *matchStart,
+                                                   int32_t        *matchLimit,
+                                                   UErrorCode     *status);
+
+#endif /* #if !UCONFIG_NO_COLLATION  && !UCONFIG_NO_BREAK_ITERATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/uset.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uset.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uset.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1052 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 2002-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  uset.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2002mar07
-*   created by: Markus W. Scherer
-*
-*   C version of UnicodeSet.
-*/
-
-
-/**
- * \file
- * \brief C API: Unicode Set
- *
- * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
- */
-
-#ifndef __USET_H__
-#define __USET_H__
-
-#include "unicode/utypes.h"
-#include "unicode/uchar.h"
-
-#ifndef UCNV_H
-struct USet;
-/**
- * A UnicodeSet.  Use the uset_* API to manipulate.  Create with
- * uset_open*, and destroy with uset_close.
- * @stable ICU 2.4
- */
-typedef struct USet USet;
-#endif
-
-/**
- * Bitmask values to be passed to uset_openPatternOptions() or
- * uset_applyPattern() taking an option parameter.
- * @stable ICU 2.4
- */
-enum {
-    /**
-     * Ignore white space within patterns unless quoted or escaped.
-     * @stable ICU 2.4
-     */
-    USET_IGNORE_SPACE = 1,  
-
-    /**
-     * Enable case insensitive matching.  E.g., "[ab]" with this flag
-     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
-     * match all except 'a', 'A', 'b', and 'B'. This performs a full
-     * closure over case mappings, e.g. U+017F for s.
-     *
-     * The resulting set is a superset of the input for the code points but
-     * not for the strings.
-     * It performs a case mapping closure of the code points and adds
-     * full case folding strings for the code points, and reduces strings of
-     * the original set to their full case folding equivalents.
-     *
-     * This is designed for case-insensitive matches, for example
-     * in regular expressions. The full code point case closure allows checking of
-     * an input character directly against the closure set.
-     * Strings are matched by comparing the case-folded form from the closure
-     * set with an incremental case folding of the string in question.
-     *
-     * The closure set will also contain single code points if the original
-     * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
-     * This is not necessary (that is, redundant) for the above matching method
-     * but results in the same closure sets regardless of whether the original
-     * set contained the code point or a string.
-     *
-     * @stable ICU 2.4
-     */
-    USET_CASE_INSENSITIVE = 2,  
-
-    /**
-     * Enable case insensitive matching.  E.g., "[ab]" with this flag
-     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
-     * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
-     * title-, and uppercase mappings as well as the case folding
-     * of each existing element in the set.
-     * @stable ICU 3.2
-     */
-    USET_ADD_CASE_MAPPINGS = 4,
-    
-    /**
-     * Enough for any single-code point set
-     * @internal
-     */
-    USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
-};
-
-/**
- * Argument values for whether span() and similar functions continue while
- * the current character is contained vs. not contained in the set.
- *
- * The functionality is straightforward for sets with only single code points,
- * without strings (which is the common case):
- * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE
- *   work the same.
- * - span() and spanBack() partition any string the same way when
- *   alternating between span(USET_SPAN_NOT_CONTAINED) and
- *   span(either "contained" condition).
- * - Using a complemented (inverted) set and the opposite span conditions
- *   yields the same results.
- *
- * When a set contains multi-code point strings, then these statements may not
- * be true, depending on the strings in the set (for example, whether they
- * overlap with each other) and the string that is processed.
- * For a set with strings:
- * - The complement of the set contains the opposite set of code points,
- *   but the same set of strings.
- *   Therefore, complementing both the set and the span conditions
- *   may yield different results.
- * - When starting spans at different positions in a string
- *   (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
- *   because a set string may start before the later position.
- * - span(USET_SPAN_SIMPLE) may be shorter than
- *   span(USET_SPAN_CONTAINED) because it will not recursively try
- *   all possible paths.
- *   For example, with a set which contains the three strings "xy", "xya" and "ax",
- *   span("xyax", USET_SPAN_CONTAINED) will return 4 but
- *   span("xyax", USET_SPAN_SIMPLE) will return 3.
- *   span(USET_SPAN_SIMPLE) will never be longer than
- *   span(USET_SPAN_CONTAINED).
- * - With either "contained" condition, span() and spanBack() may partition
- *   a string in different ways.
- *   For example, with a set which contains the two strings "ab" and "ba",
- *   and when processing the string "aba",
- *   span() will yield contained/not-contained boundaries of { 0, 2, 3 }
- *   while spanBack() will yield boundaries of { 0, 1, 3 }.
- *
- * Note: If it is important to get the same boundaries whether iterating forward
- * or backward through a string, then either only span() should be used and
- * the boundaries cached for backward operation, or an ICU BreakIterator
- * could be used.
- *
- * Note: Unpaired surrogates are treated like surrogate code points.
- * Similarly, set strings match only on code point boundaries,
- * never in the middle of a surrogate pair.
- * Illegal UTF-8 sequences are treated like U+FFFD.
- * When processing UTF-8 strings, malformed set strings
- * (strings with unpaired surrogates which cannot be converted to UTF-8)
- * are ignored.
- *
- * @stable ICU 4.0
- */
-typedef enum USetSpanCondition {
-    /**
-     * Continue a span() while there is no set element at the current position.
-     * Stops before the first set element (character or string).
-     * (For code points only, this is like while contains(current)==FALSE).
-     *
-     * When span() returns, the substring between where it started and the position
-     * it returned consists only of characters that are not in the set,
-     * and none of its strings overlap with the span.
-     *
-     * @stable ICU 4.0
-     */
-    USET_SPAN_NOT_CONTAINED = 0,
-    /**
-     * Continue a span() while there is a set element at the current position.
-     * (For characters only, this is like while contains(current)==TRUE).
-     *
-     * When span() returns, the substring between where it started and the position
-     * it returned consists only of set elements (characters or strings) that are in the set.
-     *
-     * If a set contains strings, then the span will be the longest substring
-     * matching any of the possible concatenations of set elements (characters or strings).
-     * (There must be a single, non-overlapping concatenation of characters or strings.)
-     * This is equivalent to a POSIX regular expression for (OR of each set element)*.
-     *
-     * @stable ICU 4.0
-     */
-    USET_SPAN_CONTAINED = 1,
-    /**
-     * Continue a span() while there is a set element at the current position.
-     * (For characters only, this is like while contains(current)==TRUE).
-     *
-     * When span() returns, the substring between where it started and the position
-     * it returned consists only of set elements (characters or strings) that are in the set.
-     *
-     * If a set only contains single characters, then this is the same
-     * as USET_SPAN_CONTAINED.
-     *
-     * If a set contains strings, then the span will be the longest substring
-     * with a match at each position with the longest single set element (character or string).
-     *
-     * Use this span condition together with other longest-match algorithms,
-     * such as ICU converters (ucnv_getUnicodeSet()).
-     *
-     * @stable ICU 4.0
-     */
-    USET_SPAN_SIMPLE = 2,
-    /**
-     * One more than the last span condition.
-     * @stable ICU 4.0
-     */
-    USET_SPAN_CONDITION_COUNT
-} USetSpanCondition;
-
-/**
- * A serialized form of a Unicode set.  Limited manipulations are
- * possible directly on a serialized set.  See below.
- * @stable ICU 2.4
- */
-typedef struct USerializedSet {
-    /**
-     * The serialized Unicode Set.
-     * @stable ICU 2.4
-     */
-    const uint16_t *array;
-    /**
-     * The length of the array that contains BMP characters.
-     * @stable ICU 2.4
-     */
-    int32_t bmpLength;
-    /**
-     * The total length of the array.
-     * @stable ICU 2.4
-     */
-    int32_t length;
-    /**
-     * A small buffer for the array to reduce memory allocations.
-     * @stable ICU 2.4
-     */
-    uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
-} USerializedSet;
-
-/*********************************************************************
- * USet API
- *********************************************************************/
-
-/**
- * Creates a USet object that contains the range of characters
- * start..end, inclusive.  If <code>start > end</code> 
- * then an empty set is created.
- * @param start first character of the range, inclusive
- * @param end last character of the range, inclusive
- * @return a newly created USet.  The caller must call uset_close() on
- * it when done.
- * @stable ICU 2.4
- */
-U_STABLE USet* U_EXPORT2
-uset_open(UChar32 start, UChar32 end);
-
-/**
- * Creates a set from the given pattern.  See the UnicodeSet class
- * description for the syntax of the pattern language.
- * @param pattern a string specifying what characters are in the set
- * @param patternLength the length of the pattern, or -1 if null
- * terminated
- * @param ec the error code
- * @stable ICU 2.4
- */
-U_STABLE USet* U_EXPORT2
-uset_openPattern(const UChar* pattern, int32_t patternLength,
-                 UErrorCode* ec);
-
-/**
- * Creates a set from the given pattern.  See the UnicodeSet class
- * description for the syntax of the pattern language.
- * @param pattern a string specifying what characters are in the set
- * @param patternLength the length of the pattern, or -1 if null
- * terminated
- * @param options bitmask for options to apply to the pattern.
- * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
- * @param ec the error code
- * @stable ICU 2.4
- */
-U_STABLE USet* U_EXPORT2
-uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
-                 uint32_t options,
-                 UErrorCode* ec);
-
-/**
- * Disposes of the storage used by a USet object.  This function should
- * be called exactly once for objects returned by uset_open().
- * @param set the object to dispose of
- * @stable ICU 2.4
- */
-U_STABLE void U_EXPORT2
-uset_close(USet* set);
-
-/**
- * Returns a copy of this object.
- * If this set is frozen, then the clone will be frozen as well.
- * Use uset_cloneAsThawed() for a mutable clone of a frozen set.
- * @param set the original set
- * @return the newly allocated copy of the set
- * @see uset_cloneAsThawed
- * @stable ICU 4.0
- */
-U_DRAFT USet * U_EXPORT2
-uset_clone(const USet *set);
-
-/**
- * Determines whether the set has been frozen (made immutable) or not.
- * See the ICU4J Freezable interface for details.
- * @param set the set
- * @return TRUE/FALSE for whether the set has been frozen
- * @see uset_freeze
- * @see uset_cloneAsThawed
- * @stable ICU 4.0
- */
-U_DRAFT UBool U_EXPORT2
-uset_isFrozen(const USet *set);
-
-/**
- * Freeze the set (make it immutable).
- * Once frozen, it cannot be unfrozen and is therefore thread-safe
- * until it is deleted.
- * See the ICU4J Freezable interface for details.
- * Freezing the set may also make some operations faster, for example
- * uset_contains() and uset_span().
- * A frozen set will not be modified. (It remains frozen.)
- * @param set the set
- * @return the same set, now frozen
- * @see uset_isFrozen
- * @see uset_cloneAsThawed
- * @stable ICU 4.0
- */
-U_DRAFT void U_EXPORT2
-uset_freeze(USet *set);
-
-/**
- * Clone the set and make the clone mutable.
- * See the ICU4J Freezable interface for details.
- * @param set the set
- * @return the mutable clone
- * @see uset_freeze
- * @see uset_isFrozen
- * @see uset_clone
- * @stable ICU 4.0
- */
-U_DRAFT USet * U_EXPORT2
-uset_cloneAsThawed(const USet *set);
-
-/**
- * Causes the USet object to represent the range <code>start - end</code>.
- * If <code>start > end</code> then this USet is set to an empty range.
- * A frozen set will not be modified.
- * @param set the object to set to the given range
- * @param start first character in the set, inclusive
- * @param end last character in the set, inclusive
- * @stable ICU 3.2
- */
-U_STABLE void U_EXPORT2
-uset_set(USet* set,
-         UChar32 start, UChar32 end);
-
-/**
- * Modifies the set to represent the set specified by the given
- * pattern. See the UnicodeSet class description for the syntax of 
- * the pattern language. See also the User Guide chapter about UnicodeSet.
- * <em>Empties the set passed before applying the pattern.</em>
- * A frozen set will not be modified.
- * @param set               The set to which the pattern is to be applied. 
- * @param pattern           A pointer to UChar string specifying what characters are in the set.
- *                          The character at pattern[0] must be a '['.
- * @param patternLength     The length of the UChar string. -1 if NUL terminated.
- * @param options           A bitmask for options to apply to the pattern.
- *                          Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
- * @param status            Returns an error if the pattern cannot be parsed.
- * @return                  Upon successful parse, the value is either
- *                          the index of the character after the closing ']' 
- *                          of the parsed pattern.
- *                          If the status code indicates failure, then the return value 
- *                          is the index of the error in the source.
- *
- * @stable ICU 2.8
- */
-U_STABLE int32_t U_EXPORT2 
-uset_applyPattern(USet *set,
-                  const UChar *pattern, int32_t patternLength,
-                  uint32_t options,
-                  UErrorCode *status);
-
-/**
- * Modifies the set to contain those code points which have the given value
- * for the given binary or enumerated property, as returned by
- * u_getIntPropertyValue.  Prior contents of this set are lost.
- * A frozen set will not be modified.
- *
- * @param set the object to contain the code points defined by the property
- *
- * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
- * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
- * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
- *
- * @param value a value in the range u_getIntPropertyMinValue(prop)..
- * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
- * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
- * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
- * categories such as [:L:] to be represented.
- *
- * @param ec error code input/output parameter
- *
- * @stable ICU 3.2
- */
-U_STABLE void U_EXPORT2
-uset_applyIntPropertyValue(USet* set,
-                           UProperty prop, int32_t value, UErrorCode* ec);
-
-/**
- * Modifies the set to contain those code points which have the
- * given value for the given property.  Prior contents of this
- * set are lost.
- * A frozen set will not be modified.
- *
- * @param set the object to contain the code points defined by the given
- * property and value alias
- *
- * @param prop a string specifying a property alias, either short or long.
- * The name is matched loosely.  See PropertyAliases.txt for names and a
- * description of loose matching.  If the value string is empty, then this
- * string is interpreted as either a General_Category value alias, a Script
- * value alias, a binary property alias, or a special ID.  Special IDs are
- * matched loosely and correspond to the following sets:
- *
- * "ANY" = [\\u0000-\\U0010FFFF],
- * "ASCII" = [\\u0000-\\u007F],
- * "Assigned" = [:^Cn:].
- *
- * @param propLength the length of the prop, or -1 if NULL
- *
- * @param value a string specifying a value alias, either short or long.
- * The name is matched loosely.  See PropertyValueAliases.txt for names
- * and a description of loose matching.  In addition to aliases listed,
- * numeric values and canonical combining classes may be expressed
- * numerically, e.g., ("nv", "0.5") or ("ccc", "220").  The value string
- * may also be empty.
- *
- * @param valueLength the length of the value, or -1 if NULL
- *
- * @param ec error code input/output parameter
- *
- * @stable ICU 3.2
- */
-U_STABLE void U_EXPORT2
-uset_applyPropertyAlias(USet* set,
-                        const UChar *prop, int32_t propLength,
-                        const UChar *value, int32_t valueLength,
-                        UErrorCode* ec);
-
-/**
- * Return true if the given position, in the given pattern, appears
- * to be the start of a UnicodeSet pattern.
- *
- * @param pattern a string specifying the pattern
- * @param patternLength the length of the pattern, or -1 if NULL
- * @param pos the given position
- * @stable ICU 3.2
- */
-U_STABLE UBool U_EXPORT2
-uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
-                      int32_t pos);
-
-/**
- * Returns a string representation of this set.  If the result of
- * calling this function is passed to a uset_openPattern(), it
- * will produce another set that is equal to this one.
- * @param set the set
- * @param result the string to receive the rules, may be NULL
- * @param resultCapacity the capacity of result, may be 0 if result is NULL
- * @param escapeUnprintable if TRUE then convert unprintable
- * character to their hex escape representations, \\uxxxx or
- * \\Uxxxxxxxx.  Unprintable characters are those other than
- * U+000A, U+0020..U+007E.
- * @param ec error code.
- * @return length of string, possibly larger than resultCapacity
- * @stable ICU 2.4
- */
-U_STABLE int32_t U_EXPORT2
-uset_toPattern(const USet* set,
-               UChar* result, int32_t resultCapacity,
-               UBool escapeUnprintable,
-               UErrorCode* ec);
-
-/**
- * Adds the given character to the given USet.  After this call,
- * uset_contains(set, c) will return TRUE.
- * A frozen set will not be modified.
- * @param set the object to which to add the character
- * @param c the character to add
- * @stable ICU 2.4
- */
-U_STABLE void U_EXPORT2
-uset_add(USet* set, UChar32 c);
-
-/**
- * Adds all of the elements in the specified set to this set if
- * they're not already present.  This operation effectively
- * modifies this set so that its value is the <i>union</i> of the two
- * sets.  The behavior of this operation is unspecified if the specified
- * collection is modified while the operation is in progress.
- * A frozen set will not be modified.
- *
- * @param set the object to which to add the set
- * @param additionalSet the source set whose elements are to be added to this set.
- * @stable ICU 2.6
- */
-U_STABLE void U_EXPORT2
-uset_addAll(USet* set, const USet *additionalSet);
-
-/**
- * Adds the given range of characters to the given USet.  After this call,
- * uset_contains(set, start, end) will return TRUE.
- * A frozen set will not be modified.
- * @param set the object to which to add the character
- * @param start the first character of the range to add, inclusive
- * @param end the last character of the range to add, inclusive
- * @stable ICU 2.2
- */
-U_STABLE void U_EXPORT2
-uset_addRange(USet* set, UChar32 start, UChar32 end);
-
-/**
- * Adds the given string to the given USet.  After this call,
- * uset_containsString(set, str, strLen) will return TRUE.
- * A frozen set will not be modified.
- * @param set the object to which to add the character
- * @param str the string to add
- * @param strLen the length of the string or -1 if null terminated.
- * @stable ICU 2.4
- */
-U_STABLE void U_EXPORT2
-uset_addString(USet* set, const UChar* str, int32_t strLen);
-
-/**
- * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
- * A frozen set will not be modified.
- * @param set the object to which to add the character
- * @param str the source string
- * @param strLen the length of the string or -1 if null terminated.
- * @stable ICU 3.4
- */
-U_STABLE void U_EXPORT2
-uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
-
-/**
- * Removes the given character from the given USet.  After this call,
- * uset_contains(set, c) will return FALSE.
- * A frozen set will not be modified.
- * @param set the object from which to remove the character
- * @param c the character to remove
- * @stable ICU 2.4
- */
-U_STABLE void U_EXPORT2
-uset_remove(USet* set, UChar32 c);
-
-/**
- * Removes the given range of characters from the given USet.  After this call,
- * uset_contains(set, start, end) will return FALSE.
- * A frozen set will not be modified.
- * @param set the object to which to add the character
- * @param start the first character of the range to remove, inclusive
- * @param end the last character of the range to remove, inclusive
- * @stable ICU 2.2
- */
-U_STABLE void U_EXPORT2
-uset_removeRange(USet* set, UChar32 start, UChar32 end);
-
-/**
- * Removes the given string to the given USet.  After this call,
- * uset_containsString(set, str, strLen) will return FALSE.
- * A frozen set will not be modified.
- * @param set the object to which to add the character
- * @param str the string to remove
- * @param strLen the length of the string or -1 if null terminated.
- * @stable ICU 2.4
- */
-U_STABLE void U_EXPORT2
-uset_removeString(USet* set, const UChar* str, int32_t strLen);
-
-/**
- * Removes from this set all of its elements that are contained in the
- * specified set.  This operation effectively modifies this
- * set so that its value is the <i>asymmetric set difference</i> of
- * the two sets.
- * A frozen set will not be modified.
- * @param set the object from which the elements are to be removed
- * @param removeSet the object that defines which elements will be
- * removed from this set
- * @stable ICU 3.2
- */
-U_STABLE void U_EXPORT2
-uset_removeAll(USet* set, const USet* removeSet);
-
-/**
- * Retain only the elements in this set that are contained in the
- * specified range.  If <code>start > end</code> then an empty range is
- * retained, leaving the set empty.  This is equivalent to
- * a boolean logic AND, or a set INTERSECTION.
- * A frozen set will not be modified.
- *
- * @param set the object for which to retain only the specified range
- * @param start first character, inclusive, of range to be retained
- * to this set.
- * @param end last character, inclusive, of range to be retained
- * to this set.
- * @stable ICU 3.2
- */
-U_STABLE void U_EXPORT2
-uset_retain(USet* set, UChar32 start, UChar32 end);
-
-/**
- * Retains only the elements in this set that are contained in the
- * specified set.  In other words, removes from this set all of
- * its elements that are not contained in the specified set.  This
- * operation effectively modifies this set so that its value is
- * the <i>intersection</i> of the two sets.
- * A frozen set will not be modified.
- *
- * @param set the object on which to perform the retain
- * @param retain set that defines which elements this set will retain
- * @stable ICU 3.2
- */
-U_STABLE void U_EXPORT2
-uset_retainAll(USet* set, const USet* retain);
-
-/**
- * Reallocate this objects internal structures to take up the least
- * possible space, without changing this object's value.
- * A frozen set will not be modified.
- *
- * @param set the object on which to perfrom the compact
- * @stable ICU 3.2
- */
-U_STABLE void U_EXPORT2
-uset_compact(USet* set);
-
-/**
- * Inverts this set.  This operation modifies this set so that
- * its value is its complement.  This operation does not affect
- * the multicharacter strings, if any.
- * A frozen set will not be modified.
- * @param set the set
- * @stable ICU 2.4
- */
-U_STABLE void U_EXPORT2
-uset_complement(USet* set);
-
-/**
- * Complements in this set all elements contained in the specified
- * set.  Any character in the other set will be removed if it is
- * in this set, or will be added if it is not in this set.
- * A frozen set will not be modified.
- *
- * @param set the set with which to complement
- * @param complement set that defines which elements will be xor'ed
- * from this set.
- * @stable ICU 3.2
- */
-U_STABLE void U_EXPORT2
-uset_complementAll(USet* set, const USet* complement);
-
-/**
- * Removes all of the elements from this set.  This set will be
- * empty after this call returns.
- * A frozen set will not be modified.
- * @param set the set
- * @stable ICU 2.4
- */
-U_STABLE void U_EXPORT2
-uset_clear(USet* set);
-
-/**
- * Returns TRUE if the given USet contains no characters and no
- * strings.
- * @param set the set
- * @return true if set is empty
- * @stable ICU 2.4
- */
-U_STABLE UBool U_EXPORT2
-uset_isEmpty(const USet* set);
-
-/**
- * Returns TRUE if the given USet contains the given character.
- * This function works faster with a frozen set.
- * @param set the set
- * @param c The codepoint to check for within the set
- * @return true if set contains c
- * @stable ICU 2.4
- */
-U_STABLE UBool U_EXPORT2
-uset_contains(const USet* set, UChar32 c);
-
-/**
- * Returns TRUE if the given USet contains all characters c
- * where start <= c && c <= end.
- * @param set the set
- * @param start the first character of the range to test, inclusive
- * @param end the last character of the range to test, inclusive
- * @return TRUE if set contains the range
- * @stable ICU 2.2
- */
-U_STABLE UBool U_EXPORT2
-uset_containsRange(const USet* set, UChar32 start, UChar32 end);
-
-/**
- * Returns TRUE if the given USet contains the given string.
- * @param set the set
- * @param str the string
- * @param strLen the length of the string or -1 if null terminated.
- * @return true if set contains str
- * @stable ICU 2.4
- */
-U_STABLE UBool U_EXPORT2
-uset_containsString(const USet* set, const UChar* str, int32_t strLen);
-
-/**
- * Returns the index of the given character within this set, where
- * the set is ordered by ascending code point.  If the character
- * is not in this set, return -1.  The inverse of this method is
- * <code>charAt()</code>.
- * @param set the set
- * @param c the character to obtain the index for
- * @return an index from 0..size()-1, or -1
- * @stable ICU 3.2
- */
-U_STABLE int32_t U_EXPORT2
-uset_indexOf(const USet* set, UChar32 c);
-
-/**
- * Returns the character at the given index within this set, where
- * the set is ordered by ascending code point.  If the index is
- * out of range, return (UChar32)-1.  The inverse of this method is
- * <code>indexOf()</code>.
- * @param set the set
- * @param index an index from 0..size()-1 to obtain the char for
- * @return the character at the given index, or (UChar32)-1.
- * @stable ICU 3.2
- */
-U_STABLE UChar32 U_EXPORT2
-uset_charAt(const USet* set, int32_t index);
-
-/**
- * Returns the number of characters and strings contained in the given
- * USet.
- * @param set the set
- * @return a non-negative integer counting the characters and strings
- * contained in set
- * @stable ICU 2.4
- */
-U_STABLE int32_t U_EXPORT2
-uset_size(const USet* set);
-
-/**
- * Returns the number of items in this set.  An item is either a range
- * of characters or a single multicharacter string.
- * @param set the set
- * @return a non-negative integer counting the character ranges
- * and/or strings contained in set
- * @stable ICU 2.4
- */
-U_STABLE int32_t U_EXPORT2
-uset_getItemCount(const USet* set);
-
-/**
- * Returns an item of this set.  An item is either a range of
- * characters or a single multicharacter string.
- * @param set the set
- * @param itemIndex a non-negative integer in the range 0..
- * uset_getItemCount(set)-1
- * @param start pointer to variable to receive first character
- * in range, inclusive
- * @param end pointer to variable to receive last character in range,
- * inclusive
- * @param str buffer to receive the string, may be NULL
- * @param strCapacity capacity of str, or 0 if str is NULL
- * @param ec error code
- * @return the length of the string (>= 2), or 0 if the item is a
- * range, in which case it is the range *start..*end, or -1 if
- * itemIndex is out of range
- * @stable ICU 2.4
- */
-U_STABLE int32_t U_EXPORT2
-uset_getItem(const USet* set, int32_t itemIndex,
-             UChar32* start, UChar32* end,
-             UChar* str, int32_t strCapacity,
-             UErrorCode* ec);
-
-/**
- * Returns true if set1 contains all the characters and strings
- * of set2. It answers the question, 'Is set1 a superset of set2?'
- * @param set1 set to be checked for containment
- * @param set2 set to be checked for containment
- * @return true if the test condition is met
- * @stable ICU 3.2
- */
-U_STABLE UBool U_EXPORT2
-uset_containsAll(const USet* set1, const USet* set2);
-
-/**
- * Returns true if this set contains all the characters
- * of the given string. This is does not check containment of grapheme
- * clusters, like uset_containsString.
- * @param set set of characters to be checked for containment
- * @param str string containing codepoints to be checked for containment
- * @param strLen the length of the string or -1 if null terminated.
- * @return true if the test condition is met
- * @stable ICU 3.4
- */
-U_STABLE UBool U_EXPORT2
-uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
-
-/**
- * Returns true if set1 contains none of the characters and strings
- * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
- * @param set1 set to be checked for containment
- * @param set2 set to be checked for containment
- * @return true if the test condition is met
- * @stable ICU 3.2
- */
-U_STABLE UBool U_EXPORT2
-uset_containsNone(const USet* set1, const USet* set2);
-
-/**
- * Returns true if set1 contains some of the characters and strings
- * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
- * @param set1 set to be checked for containment
- * @param set2 set to be checked for containment
- * @return true if the test condition is met
- * @stable ICU 3.2
- */
-U_STABLE UBool U_EXPORT2
-uset_containsSome(const USet* set1, const USet* set2);
-
-/**
- * Returns the length of the initial substring of the input string which
- * consists only of characters and strings that are contained in this set
- * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
- * or only of characters and strings that are not contained
- * in this set (USET_SPAN_NOT_CONTAINED).
- * See USetSpanCondition for details.
- * Similar to the strspn() C library function.
- * Unpaired surrogates are treated according to contains() of their surrogate code points.
- * This function works faster with a frozen set and with a non-negative string length argument.
- * @param set the set
- * @param s start of the string
- * @param length of the string; can be -1 for NUL-terminated
- * @param spanCondition specifies the containment condition
- * @return the length of the initial substring according to the spanCondition;
- *         0 if the start of the string does not fit the spanCondition
- * @stable ICU 4.0
- * @see USetSpanCondition
- */
-U_DRAFT int32_t U_EXPORT2
-uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
-
-/**
- * Returns the start of the trailing substring of the input string which
- * consists only of characters and strings that are contained in this set
- * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
- * or only of characters and strings that are not contained
- * in this set (USET_SPAN_NOT_CONTAINED).
- * See USetSpanCondition for details.
- * Unpaired surrogates are treated according to contains() of their surrogate code points.
- * This function works faster with a frozen set and with a non-negative string length argument.
- * @param set the set
- * @param s start of the string
- * @param length of the string; can be -1 for NUL-terminated
- * @param spanCondition specifies the containment condition
- * @return the start of the trailing substring according to the spanCondition;
- *         the string length if the end of the string does not fit the spanCondition
- * @stable ICU 4.0
- * @see USetSpanCondition
- */
-U_DRAFT int32_t U_EXPORT2
-uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
-
-/**
- * Returns the length of the initial substring of the input string which
- * consists only of characters and strings that are contained in this set
- * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
- * or only of characters and strings that are not contained
- * in this set (USET_SPAN_NOT_CONTAINED).
- * See USetSpanCondition for details.
- * Similar to the strspn() C library function.
- * Malformed byte sequences are treated according to contains(0xfffd).
- * This function works faster with a frozen set and with a non-negative string length argument.
- * @param set the set
- * @param s start of the string (UTF-8)
- * @param length of the string; can be -1 for NUL-terminated
- * @param spanCondition specifies the containment condition
- * @return the length of the initial substring according to the spanCondition;
- *         0 if the start of the string does not fit the spanCondition
- * @stable ICU 4.0
- * @see USetSpanCondition
- */
-U_DRAFT int32_t U_EXPORT2
-uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
-
-/**
- * Returns the start of the trailing substring of the input string which
- * consists only of characters and strings that are contained in this set
- * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
- * or only of characters and strings that are not contained
- * in this set (USET_SPAN_NOT_CONTAINED).
- * See USetSpanCondition for details.
- * Malformed byte sequences are treated according to contains(0xfffd).
- * This function works faster with a frozen set and with a non-negative string length argument.
- * @param set the set
- * @param s start of the string (UTF-8)
- * @param length of the string; can be -1 for NUL-terminated
- * @param spanCondition specifies the containment condition
- * @return the start of the trailing substring according to the spanCondition;
- *         the string length if the end of the string does not fit the spanCondition
- * @stable ICU 4.0
- * @see USetSpanCondition
- */
-U_DRAFT int32_t U_EXPORT2
-uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
-
-/**
- * Returns true if set1 contains all of the characters and strings
- * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
- * @param set1 set to be checked for containment
- * @param set2 set to be checked for containment
- * @return true if the test condition is met
- * @stable ICU 3.2
- */
-U_STABLE UBool U_EXPORT2
-uset_equals(const USet* set1, const USet* set2);
-
-/*********************************************************************
- * Serialized set API
- *********************************************************************/
-
-/**
- * Serializes this set into an array of 16-bit integers.  Serialization
- * (currently) only records the characters in the set; multicharacter
- * strings are ignored.
- *
- * The array
- * has following format (each line is one 16-bit integer):
- *
- *  length     = (n+2*m) | (m!=0?0x8000:0)
- *  bmpLength  = n; present if m!=0
- *  bmp[0]
- *  bmp[1]
- *  ...
- *  bmp[n-1]
- *  supp-high[0]
- *  supp-low[0]
- *  supp-high[1]
- *  supp-low[1]
- *  ...
- *  supp-high[m-1]
- *  supp-low[m-1]
- *
- * The array starts with a header.  After the header are n bmp
- * code points, then m supplementary code points.  Either n or m
- * or both may be zero.  n+2*m is always <= 0x7FFF.
- *
- * If there are no supplementary characters (if m==0) then the
- * header is one 16-bit integer, 'length', with value n.
- *
- * If there are supplementary characters (if m!=0) then the header
- * is two 16-bit integers.  The first, 'length', has value
- * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
- *
- * After the header the code points are stored in ascending order.
- * Supplementary code points are stored as most significant 16
- * bits followed by least significant 16 bits.
- *
- * @param set the set
- * @param dest pointer to buffer of destCapacity 16-bit integers.
- * May be NULL only if destCapacity is zero.
- * @param destCapacity size of dest, or zero.  Must not be negative.
- * @param pErrorCode pointer to the error code.  Will be set to
- * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF.  Will be set to
- * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
- * @return the total length of the serialized format, including
- * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
- * than U_BUFFER_OVERFLOW_ERROR.
- * @stable ICU 2.4
- */
-U_STABLE int32_t U_EXPORT2
-uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
-
-/**
- * Given a serialized array, fill in the given serialized set object.
- * @param fillSet pointer to result
- * @param src pointer to start of array
- * @param srcLength length of array
- * @return true if the given array is valid, otherwise false
- * @stable ICU 2.4
- */
-U_STABLE UBool U_EXPORT2
-uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
-
-/**
- * Set the USerializedSet to contain the given character (and nothing
- * else).
- * @param fillSet pointer to result
- * @param c The codepoint to set
- * @stable ICU 2.4
- */
-U_STABLE void U_EXPORT2
-uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
-
-/**
- * Returns TRUE if the given USerializedSet contains the given
- * character.
- * @param set the serialized set
- * @param c The codepoint to check for within the set
- * @return true if set contains c
- * @stable ICU 2.4
- */
-U_STABLE UBool U_EXPORT2
-uset_serializedContains(const USerializedSet* set, UChar32 c);
-
-/**
- * Returns the number of disjoint ranges of characters contained in
- * the given serialized set.  Ignores any strings contained in the
- * set.
- * @param set the serialized set
- * @return a non-negative integer counting the character ranges
- * contained in set
- * @stable ICU 2.4
- */
-U_STABLE int32_t U_EXPORT2
-uset_getSerializedRangeCount(const USerializedSet* set);
-
-/**
- * Returns a range of characters contained in the given serialized
- * set.
- * @param set the serialized set
- * @param rangeIndex a non-negative integer in the range 0..
- * uset_getSerializedRangeCount(set)-1
- * @param pStart pointer to variable to receive first character
- * in range, inclusive
- * @param pEnd pointer to variable to receive last character in range,
- * inclusive
- * @return true if rangeIndex is valid, otherwise false
- * @stable ICU 2.4
- */
-U_STABLE UBool U_EXPORT2
-uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
-                        UChar32* pStart, UChar32* pEnd);
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/uset.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uset.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uset.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uset.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1052 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2002-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uset.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002mar07
+*   created by: Markus W. Scherer
+*
+*   C version of UnicodeSet.
+*/
+
+
+/**
+ * \file
+ * \brief C API: Unicode Set
+ *
+ * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
+ */
+
+#ifndef __USET_H__
+#define __USET_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+
+#ifndef UCNV_H
+struct USet;
+/**
+ * A UnicodeSet.  Use the uset_* API to manipulate.  Create with
+ * uset_open*, and destroy with uset_close.
+ * @stable ICU 2.4
+ */
+typedef struct USet USet;
+#endif
+
+/**
+ * Bitmask values to be passed to uset_openPatternOptions() or
+ * uset_applyPattern() taking an option parameter.
+ * @stable ICU 2.4
+ */
+enum {
+    /**
+     * Ignore white space within patterns unless quoted or escaped.
+     * @stable ICU 2.4
+     */
+    USET_IGNORE_SPACE = 1,  
+
+    /**
+     * Enable case insensitive matching.  E.g., "[ab]" with this flag
+     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
+     * match all except 'a', 'A', 'b', and 'B'. This performs a full
+     * closure over case mappings, e.g. U+017F for s.
+     *
+     * The resulting set is a superset of the input for the code points but
+     * not for the strings.
+     * It performs a case mapping closure of the code points and adds
+     * full case folding strings for the code points, and reduces strings of
+     * the original set to their full case folding equivalents.
+     *
+     * This is designed for case-insensitive matches, for example
+     * in regular expressions. The full code point case closure allows checking of
+     * an input character directly against the closure set.
+     * Strings are matched by comparing the case-folded form from the closure
+     * set with an incremental case folding of the string in question.
+     *
+     * The closure set will also contain single code points if the original
+     * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
+     * This is not necessary (that is, redundant) for the above matching method
+     * but results in the same closure sets regardless of whether the original
+     * set contained the code point or a string.
+     *
+     * @stable ICU 2.4
+     */
+    USET_CASE_INSENSITIVE = 2,  
+
+    /**
+     * Enable case insensitive matching.  E.g., "[ab]" with this flag
+     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
+     * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
+     * title-, and uppercase mappings as well as the case folding
+     * of each existing element in the set.
+     * @stable ICU 3.2
+     */
+    USET_ADD_CASE_MAPPINGS = 4,
+    
+    /**
+     * Enough for any single-code point set
+     * @internal
+     */
+    USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
+};
+
+/**
+ * Argument values for whether span() and similar functions continue while
+ * the current character is contained vs. not contained in the set.
+ *
+ * The functionality is straightforward for sets with only single code points,
+ * without strings (which is the common case):
+ * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE
+ *   work the same.
+ * - span() and spanBack() partition any string the same way when
+ *   alternating between span(USET_SPAN_NOT_CONTAINED) and
+ *   span(either "contained" condition).
+ * - Using a complemented (inverted) set and the opposite span conditions
+ *   yields the same results.
+ *
+ * When a set contains multi-code point strings, then these statements may not
+ * be true, depending on the strings in the set (for example, whether they
+ * overlap with each other) and the string that is processed.
+ * For a set with strings:
+ * - The complement of the set contains the opposite set of code points,
+ *   but the same set of strings.
+ *   Therefore, complementing both the set and the span conditions
+ *   may yield different results.
+ * - When starting spans at different positions in a string
+ *   (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
+ *   because a set string may start before the later position.
+ * - span(USET_SPAN_SIMPLE) may be shorter than
+ *   span(USET_SPAN_CONTAINED) because it will not recursively try
+ *   all possible paths.
+ *   For example, with a set which contains the three strings "xy", "xya" and "ax",
+ *   span("xyax", USET_SPAN_CONTAINED) will return 4 but
+ *   span("xyax", USET_SPAN_SIMPLE) will return 3.
+ *   span(USET_SPAN_SIMPLE) will never be longer than
+ *   span(USET_SPAN_CONTAINED).
+ * - With either "contained" condition, span() and spanBack() may partition
+ *   a string in different ways.
+ *   For example, with a set which contains the two strings "ab" and "ba",
+ *   and when processing the string "aba",
+ *   span() will yield contained/not-contained boundaries of { 0, 2, 3 }
+ *   while spanBack() will yield boundaries of { 0, 1, 3 }.
+ *
+ * Note: If it is important to get the same boundaries whether iterating forward
+ * or backward through a string, then either only span() should be used and
+ * the boundaries cached for backward operation, or an ICU BreakIterator
+ * could be used.
+ *
+ * Note: Unpaired surrogates are treated like surrogate code points.
+ * Similarly, set strings match only on code point boundaries,
+ * never in the middle of a surrogate pair.
+ * Illegal UTF-8 sequences are treated like U+FFFD.
+ * When processing UTF-8 strings, malformed set strings
+ * (strings with unpaired surrogates which cannot be converted to UTF-8)
+ * are ignored.
+ *
+ * @stable ICU 4.0
+ */
+typedef enum USetSpanCondition {
+    /**
+     * Continue a span() while there is no set element at the current position.
+     * Stops before the first set element (character or string).
+     * (For code points only, this is like while contains(current)==FALSE).
+     *
+     * When span() returns, the substring between where it started and the position
+     * it returned consists only of characters that are not in the set,
+     * and none of its strings overlap with the span.
+     *
+     * @stable ICU 4.0
+     */
+    USET_SPAN_NOT_CONTAINED = 0,
+    /**
+     * Continue a span() while there is a set element at the current position.
+     * (For characters only, this is like while contains(current)==TRUE).
+     *
+     * When span() returns, the substring between where it started and the position
+     * it returned consists only of set elements (characters or strings) that are in the set.
+     *
+     * If a set contains strings, then the span will be the longest substring
+     * matching any of the possible concatenations of set elements (characters or strings).
+     * (There must be a single, non-overlapping concatenation of characters or strings.)
+     * This is equivalent to a POSIX regular expression for (OR of each set element)*.
+     *
+     * @stable ICU 4.0
+     */
+    USET_SPAN_CONTAINED = 1,
+    /**
+     * Continue a span() while there is a set element at the current position.
+     * (For characters only, this is like while contains(current)==TRUE).
+     *
+     * When span() returns, the substring between where it started and the position
+     * it returned consists only of set elements (characters or strings) that are in the set.
+     *
+     * If a set only contains single characters, then this is the same
+     * as USET_SPAN_CONTAINED.
+     *
+     * If a set contains strings, then the span will be the longest substring
+     * with a match at each position with the longest single set element (character or string).
+     *
+     * Use this span condition together with other longest-match algorithms,
+     * such as ICU converters (ucnv_getUnicodeSet()).
+     *
+     * @stable ICU 4.0
+     */
+    USET_SPAN_SIMPLE = 2,
+    /**
+     * One more than the last span condition.
+     * @stable ICU 4.0
+     */
+    USET_SPAN_CONDITION_COUNT
+} USetSpanCondition;
+
+/**
+ * A serialized form of a Unicode set.  Limited manipulations are
+ * possible directly on a serialized set.  See below.
+ * @stable ICU 2.4
+ */
+typedef struct USerializedSet {
+    /**
+     * The serialized Unicode Set.
+     * @stable ICU 2.4
+     */
+    const uint16_t *array;
+    /**
+     * The length of the array that contains BMP characters.
+     * @stable ICU 2.4
+     */
+    int32_t bmpLength;
+    /**
+     * The total length of the array.
+     * @stable ICU 2.4
+     */
+    int32_t length;
+    /**
+     * A small buffer for the array to reduce memory allocations.
+     * @stable ICU 2.4
+     */
+    uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
+} USerializedSet;
+
+/*********************************************************************
+ * USet API
+ *********************************************************************/
+
+/**
+ * Creates a USet object that contains the range of characters
+ * start..end, inclusive.  If <code>start > end</code> 
+ * then an empty set is created.
+ * @param start first character of the range, inclusive
+ * @param end last character of the range, inclusive
+ * @return a newly created USet.  The caller must call uset_close() on
+ * it when done.
+ * @stable ICU 2.4
+ */
+U_STABLE USet* U_EXPORT2
+uset_open(UChar32 start, UChar32 end);
+
+/**
+ * Creates a set from the given pattern.  See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_STABLE USet* U_EXPORT2
+uset_openPattern(const UChar* pattern, int32_t patternLength,
+                 UErrorCode* ec);
+
+/**
+ * Creates a set from the given pattern.  See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_STABLE USet* U_EXPORT2
+uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
+                 uint32_t options,
+                 UErrorCode* ec);
+
+/**
+ * Disposes of the storage used by a USet object.  This function should
+ * be called exactly once for objects returned by uset_open().
+ * @param set the object to dispose of
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_close(USet* set);
+
+/**
+ * Returns a copy of this object.
+ * If this set is frozen, then the clone will be frozen as well.
+ * Use uset_cloneAsThawed() for a mutable clone of a frozen set.
+ * @param set the original set
+ * @return the newly allocated copy of the set
+ * @see uset_cloneAsThawed
+ * @stable ICU 4.0
+ */
+U_DRAFT USet * U_EXPORT2
+uset_clone(const USet *set);
+
+/**
+ * Determines whether the set has been frozen (made immutable) or not.
+ * See the ICU4J Freezable interface for details.
+ * @param set the set
+ * @return TRUE/FALSE for whether the set has been frozen
+ * @see uset_freeze
+ * @see uset_cloneAsThawed
+ * @stable ICU 4.0
+ */
+U_DRAFT UBool U_EXPORT2
+uset_isFrozen(const USet *set);
+
+/**
+ * Freeze the set (make it immutable).
+ * Once frozen, it cannot be unfrozen and is therefore thread-safe
+ * until it is deleted.
+ * See the ICU4J Freezable interface for details.
+ * Freezing the set may also make some operations faster, for example
+ * uset_contains() and uset_span().
+ * A frozen set will not be modified. (It remains frozen.)
+ * @param set the set
+ * @return the same set, now frozen
+ * @see uset_isFrozen
+ * @see uset_cloneAsThawed
+ * @stable ICU 4.0
+ */
+U_DRAFT void U_EXPORT2
+uset_freeze(USet *set);
+
+/**
+ * Clone the set and make the clone mutable.
+ * See the ICU4J Freezable interface for details.
+ * @param set the set
+ * @return the mutable clone
+ * @see uset_freeze
+ * @see uset_isFrozen
+ * @see uset_clone
+ * @stable ICU 4.0
+ */
+U_DRAFT USet * U_EXPORT2
+uset_cloneAsThawed(const USet *set);
+
+/**
+ * Causes the USet object to represent the range <code>start - end</code>.
+ * If <code>start > end</code> then this USet is set to an empty range.
+ * A frozen set will not be modified.
+ * @param set the object to set to the given range
+ * @param start first character in the set, inclusive
+ * @param end last character in the set, inclusive
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_set(USet* set,
+         UChar32 start, UChar32 end);
+
+/**
+ * Modifies the set to represent the set specified by the given
+ * pattern. See the UnicodeSet class description for the syntax of 
+ * the pattern language. See also the User Guide chapter about UnicodeSet.
+ * <em>Empties the set passed before applying the pattern.</em>
+ * A frozen set will not be modified.
+ * @param set               The set to which the pattern is to be applied. 
+ * @param pattern           A pointer to UChar string specifying what characters are in the set.
+ *                          The character at pattern[0] must be a '['.
+ * @param patternLength     The length of the UChar string. -1 if NUL terminated.
+ * @param options           A bitmask for options to apply to the pattern.
+ *                          Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param status            Returns an error if the pattern cannot be parsed.
+ * @return                  Upon successful parse, the value is either
+ *                          the index of the character after the closing ']' 
+ *                          of the parsed pattern.
+ *                          If the status code indicates failure, then the return value 
+ *                          is the index of the error in the source.
+ *
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2 
+uset_applyPattern(USet *set,
+                  const UChar *pattern, int32_t patternLength,
+                  uint32_t options,
+                  UErrorCode *status);
+
+/**
+ * Modifies the set to contain those code points which have the given value
+ * for the given binary or enumerated property, as returned by
+ * u_getIntPropertyValue.  Prior contents of this set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to contain the code points defined by the property
+ *
+ * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
+ * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
+ *
+ * @param value a value in the range u_getIntPropertyMinValue(prop)..
+ * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
+ * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
+ * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_applyIntPropertyValue(USet* set,
+                           UProperty prop, int32_t value, UErrorCode* ec);
+
+/**
+ * Modifies the set to contain those code points which have the
+ * given value for the given property.  Prior contents of this
+ * set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to contain the code points defined by the given
+ * property and value alias
+ *
+ * @param prop a string specifying a property alias, either short or long.
+ * The name is matched loosely.  See PropertyAliases.txt for names and a
+ * description of loose matching.  If the value string is empty, then this
+ * string is interpreted as either a General_Category value alias, a Script
+ * value alias, a binary property alias, or a special ID.  Special IDs are
+ * matched loosely and correspond to the following sets:
+ *
+ * "ANY" = [\\u0000-\\U0010FFFF],
+ * "ASCII" = [\\u0000-\\u007F],
+ * "Assigned" = [:^Cn:].
+ *
+ * @param propLength the length of the prop, or -1 if NULL
+ *
+ * @param value a string specifying a value alias, either short or long.
+ * The name is matched loosely.  See PropertyValueAliases.txt for names
+ * and a description of loose matching.  In addition to aliases listed,
+ * numeric values and canonical combining classes may be expressed
+ * numerically, e.g., ("nv", "0.5") or ("ccc", "220").  The value string
+ * may also be empty.
+ *
+ * @param valueLength the length of the value, or -1 if NULL
+ *
+ * @param ec error code input/output parameter
+ *
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_applyPropertyAlias(USet* set,
+                        const UChar *prop, int32_t propLength,
+                        const UChar *value, int32_t valueLength,
+                        UErrorCode* ec);
+
+/**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a UnicodeSet pattern.
+ *
+ * @param pattern a string specifying the pattern
+ * @param patternLength the length of the pattern, or -1 if NULL
+ * @param pos the given position
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
+                      int32_t pos);
+
+/**
+ * Returns a string representation of this set.  If the result of
+ * calling this function is passed to a uset_openPattern(), it
+ * will produce another set that is equal to this one.
+ * @param set the set
+ * @param result the string to receive the rules, may be NULL
+ * @param resultCapacity the capacity of result, may be 0 if result is NULL
+ * @param escapeUnprintable if TRUE then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx.  Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @param ec error code.
+ * @return length of string, possibly larger than resultCapacity
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_toPattern(const USet* set,
+               UChar* result, int32_t resultCapacity,
+               UBool escapeUnprintable,
+               UErrorCode* ec);
+
+/**
+ * Adds the given character to the given USet.  After this call,
+ * uset_contains(set, c) will return TRUE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param c the character to add
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_add(USet* set, UChar32 c);
+
+/**
+ * Adds all of the elements in the specified set to this set if
+ * they're not already present.  This operation effectively
+ * modifies this set so that its value is the <i>union</i> of the two
+ * sets.  The behavior of this operation is unspecified if the specified
+ * collection is modified while the operation is in progress.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to which to add the set
+ * @param additionalSet the source set whose elements are to be added to this set.
+ * @stable ICU 2.6
+ */
+U_STABLE void U_EXPORT2
+uset_addAll(USet* set, const USet *additionalSet);
+
+/**
+ * Adds the given range of characters to the given USet.  After this call,
+ * uset_contains(set, start, end) will return TRUE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to add, inclusive
+ * @param end the last character of the range to add, inclusive
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uset_addRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Adds the given string to the given USet.  After this call,
+ * uset_containsString(set, str, strLen) will return TRUE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to add
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_addString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
+ * If this set already any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the source string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
+
+/**
+ * Removes the given character from the given USet.  After this call,
+ * uset_contains(set, c) will return FALSE.
+ * A frozen set will not be modified.
+ * @param set the object from which to remove the character
+ * @param c the character to remove
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_remove(USet* set, UChar32 c);
+
+/**
+ * Removes the given range of characters from the given USet.  After this call,
+ * uset_contains(set, start, end) will return FALSE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to remove, inclusive
+ * @param end the last character of the range to remove, inclusive
+ * @stable ICU 2.2
+ */
+U_STABLE void U_EXPORT2
+uset_removeRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Removes the given string to the given USet.  After this call,
+ * uset_containsString(set, str, strLen) will return FALSE.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to remove
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_removeString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Removes from this set all of its elements that are contained in the
+ * specified set.  This operation effectively modifies this
+ * set so that its value is the <i>asymmetric set difference</i> of
+ * the two sets.
+ * A frozen set will not be modified.
+ * @param set the object from which the elements are to be removed
+ * @param removeSet the object that defines which elements will be
+ * removed from this set
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_removeAll(USet* set, const USet* removeSet);
+
+/**
+ * Retain only the elements in this set that are contained in the
+ * specified range.  If <code>start > end</code> then an empty range is
+ * retained, leaving the set empty.  This is equivalent to
+ * a boolean logic AND, or a set INTERSECTION.
+ * A frozen set will not be modified.
+ *
+ * @param set the object for which to retain only the specified range
+ * @param start first character, inclusive, of range to be retained
+ * to this set.
+ * @param end last character, inclusive, of range to be retained
+ * to this set.
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_retain(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Retains only the elements in this set that are contained in the
+ * specified set.  In other words, removes from this set all of
+ * its elements that are not contained in the specified set.  This
+ * operation effectively modifies this set so that its value is
+ * the <i>intersection</i> of the two sets.
+ * A frozen set will not be modified.
+ *
+ * @param set the object on which to perform the retain
+ * @param retain set that defines which elements this set will retain
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_retainAll(USet* set, const USet* retain);
+
+/**
+ * Reallocate this objects internal structures to take up the least
+ * possible space, without changing this object's value.
+ * A frozen set will not be modified.
+ *
+ * @param set the object on which to perfrom the compact
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_compact(USet* set);
+
+/**
+ * Inverts this set.  This operation modifies this set so that
+ * its value is its complement.  This operation does not affect
+ * the multicharacter strings, if any.
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_complement(USet* set);
+
+/**
+ * Complements in this set all elements contained in the specified
+ * set.  Any character in the other set will be removed if it is
+ * in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param set the set with which to complement
+ * @param complement set that defines which elements will be xor'ed
+ * from this set.
+ * @stable ICU 3.2
+ */
+U_STABLE void U_EXPORT2
+uset_complementAll(USet* set, const USet* complement);
+
+/**
+ * Removes all of the elements from this set.  This set will be
+ * empty after this call returns.
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_clear(USet* set);
+
+/**
+ * Returns TRUE if the given USet contains no characters and no
+ * strings.
+ * @param set the set
+ * @return true if set is empty
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_isEmpty(const USet* set);
+
+/**
+ * Returns TRUE if the given USet contains the given character.
+ * This function works faster with a frozen set.
+ * @param set the set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_contains(const USet* set, UChar32 c);
+
+/**
+ * Returns TRUE if the given USet contains all characters c
+ * where start <= c && c <= end.
+ * @param set the set
+ * @param start the first character of the range to test, inclusive
+ * @param end the last character of the range to test, inclusive
+ * @return TRUE if set contains the range
+ * @stable ICU 2.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsRange(const USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Returns TRUE if the given USet contains the given string.
+ * @param set the set
+ * @param str the string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if set contains str
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsString(const USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Returns the index of the given character within this set, where
+ * the set is ordered by ascending code point.  If the character
+ * is not in this set, return -1.  The inverse of this method is
+ * <code>charAt()</code>.
+ * @param set the set
+ * @param c the character to obtain the index for
+ * @return an index from 0..size()-1, or -1
+ * @stable ICU 3.2
+ */
+U_STABLE int32_t U_EXPORT2
+uset_indexOf(const USet* set, UChar32 c);
+
+/**
+ * Returns the character at the given index within this set, where
+ * the set is ordered by ascending code point.  If the index is
+ * out of range, return (UChar32)-1.  The inverse of this method is
+ * <code>indexOf()</code>.
+ * @param set the set
+ * @param index an index from 0..size()-1 to obtain the char for
+ * @return the character at the given index, or (UChar32)-1.
+ * @stable ICU 3.2
+ */
+U_STABLE UChar32 U_EXPORT2
+uset_charAt(const USet* set, int32_t index);
+
+/**
+ * Returns the number of characters and strings contained in the given
+ * USet.
+ * @param set the set
+ * @return a non-negative integer counting the characters and strings
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_size(const USet* set);
+
+/**
+ * Returns the number of items in this set.  An item is either a range
+ * of characters or a single multicharacter string.
+ * @param set the set
+ * @return a non-negative integer counting the character ranges
+ * and/or strings contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getItemCount(const USet* set);
+
+/**
+ * Returns an item of this set.  An item is either a range of
+ * characters or a single multicharacter string.
+ * @param set the set
+ * @param itemIndex a non-negative integer in the range 0..
+ * uset_getItemCount(set)-1
+ * @param start pointer to variable to receive first character
+ * in range, inclusive
+ * @param end pointer to variable to receive last character in range,
+ * inclusive
+ * @param str buffer to receive the string, may be NULL
+ * @param strCapacity capacity of str, or 0 if str is NULL
+ * @param ec error code
+ * @return the length of the string (>= 2), or 0 if the item is a
+ * range, in which case it is the range *start..*end, or -1 if
+ * itemIndex is out of range
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getItem(const USet* set, int32_t itemIndex,
+             UChar32* start, UChar32* end,
+             UChar* str, int32_t strCapacity,
+             UErrorCode* ec);
+
+/**
+ * Returns true if set1 contains all the characters and strings
+ * of set2. It answers the question, 'Is set1 a superset of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsAll(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if this set contains all the characters
+ * of the given string. This is does not check containment of grapheme
+ * clusters, like uset_containsString.
+ * @param set set of characters to be checked for containment
+ * @param str string containing codepoints to be checked for containment
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if the test condition is met
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
+
+/**
+ * Returns true if set1 contains none of the characters and strings
+ * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsNone(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if set1 contains some of the characters and strings
+ * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_containsSome(const USet* set1, const USet* set2);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ *         0 if the start of the string does not fit the spanCondition
+ * @stable ICU 4.0
+ * @see USetSpanCondition
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ *         the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 4.0
+ * @see USetSpanCondition
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ *         0 if the start of the string does not fit the spanCondition
+ * @stable ICU 4.0
+ * @see USetSpanCondition
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ *         the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 4.0
+ * @see USetSpanCondition
+ */
+U_DRAFT int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns true if set1 contains all of the characters and strings
+ * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_STABLE UBool U_EXPORT2
+uset_equals(const USet* set1, const USet* set2);
+
+/*********************************************************************
+ * Serialized set API
+ *********************************************************************/
+
+/**
+ * Serializes this set into an array of 16-bit integers.  Serialization
+ * (currently) only records the characters in the set; multicharacter
+ * strings are ignored.
+ *
+ * The array
+ * has following format (each line is one 16-bit integer):
+ *
+ *  length     = (n+2*m) | (m!=0?0x8000:0)
+ *  bmpLength  = n; present if m!=0
+ *  bmp[0]
+ *  bmp[1]
+ *  ...
+ *  bmp[n-1]
+ *  supp-high[0]
+ *  supp-low[0]
+ *  supp-high[1]
+ *  supp-low[1]
+ *  ...
+ *  supp-high[m-1]
+ *  supp-low[m-1]
+ *
+ * The array starts with a header.  After the header are n bmp
+ * code points, then m supplementary code points.  Either n or m
+ * or both may be zero.  n+2*m is always <= 0x7FFF.
+ *
+ * If there are no supplementary characters (if m==0) then the
+ * header is one 16-bit integer, 'length', with value n.
+ *
+ * If there are supplementary characters (if m!=0) then the header
+ * is two 16-bit integers.  The first, 'length', has value
+ * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
+ *
+ * After the header the code points are stored in ascending order.
+ * Supplementary code points are stored as most significant 16
+ * bits followed by least significant 16 bits.
+ *
+ * @param set the set
+ * @param dest pointer to buffer of destCapacity 16-bit integers.
+ * May be NULL only if destCapacity is zero.
+ * @param destCapacity size of dest, or zero.  Must not be negative.
+ * @param pErrorCode pointer to the error code.  Will be set to
+ * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF.  Will be set to
+ * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
+ * @return the total length of the serialized format, including
+ * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
+ * than U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
+
+/**
+ * Given a serialized array, fill in the given serialized set object.
+ * @param fillSet pointer to result
+ * @param src pointer to start of array
+ * @param srcLength length of array
+ * @return true if the given array is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
+
+/**
+ * Set the USerializedSet to contain the given character (and nothing
+ * else).
+ * @param fillSet pointer to result
+ * @param c The codepoint to set
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
+
+/**
+ * Returns TRUE if the given USerializedSet contains the given
+ * character.
+ * @param set the serialized set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_serializedContains(const USerializedSet* set, UChar32 c);
+
+/**
+ * Returns the number of disjoint ranges of characters contained in
+ * the given serialized set.  Ignores any strings contained in the
+ * set.
+ * @param set the serialized set
+ * @return a non-negative integer counting the character ranges
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_STABLE int32_t U_EXPORT2
+uset_getSerializedRangeCount(const USerializedSet* set);
+
+/**
+ * Returns a range of characters contained in the given serialized
+ * set.
+ * @param set the serialized set
+ * @param rangeIndex a non-negative integer in the range 0..
+ * uset_getSerializedRangeCount(set)-1
+ * @param pStart pointer to variable to receive first character
+ * in range, inclusive
+ * @param pEnd pointer to variable to receive last character in range,
+ * inclusive
+ * @return true if rangeIndex is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
+                        UChar32* pStart, UChar32* pEnd);
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/usetiter.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/usetiter.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/usetiter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,318 +0,0 @@
-/*
-**********************************************************************
-* Copyright (c) 2002-2006, International Business Machines
-* Corporation and others.  All Rights Reserved.
-**********************************************************************
-*/
-#ifndef USETITER_H
-#define USETITER_H
-
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-
-/**
- * \file 
- * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
- */
-
-U_NAMESPACE_BEGIN
-
-class UnicodeSet;
-class UnicodeString;
-
-/**
- *
- * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
- * iterates over either code points or code point ranges.  After all
- * code points or ranges have been returned, it returns the
- * multicharacter strings of the UnicodSet, if any.
- *
- * This class is not intended to be subclassed.  Consider any fields
- *  or methods declared as "protected" to be private.  The use of
- *  protected in this class is an artifact of history.
- *
- * <p>To iterate over code points and strings, use a loop like this:
- * <pre>
- * UnicodeSetIterator it(set);
- * while (set.next()) {
- *     processItem(set.getString());
- * }
- * </pre>
- * <p>Each item in the set is accessed as a string.  Set elements
- *    consisting of single code points are returned as strings containing
- *    just the one code point.
- *
- * <p>To iterate over code point ranges, instead of individual code points,
- *    use a loop like this:
- * <pre>
- * UnicodeSetIterator it(set);
- * while (it.nextRange()) {
- *   if (it.isString()) {
- *     processString(it.getString());
- *   } else {
- *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
- *   }
- * }
- * </pre>
- * @author M. Davis
- * @stable ICU 2.4
- */
-class U_COMMON_API UnicodeSetIterator : public UObject {
-
- protected:
-
-    /**
-     * Value of <tt>codepoint</tt> if the iterator points to a string.
-     * If <tt>codepoint == IS_STRING</tt>, then examine
-     * <tt>string</tt> for the current iteration result.
-     * @stable ICU 2.4
-     */
-    enum { IS_STRING = -1 };
-
-    /**
-     * Current code point, or the special value <tt>IS_STRING</tt>, if
-     * the iterator points to a string.
-     * @stable ICU 2.4
-     */
-    UChar32 codepoint;
-
-    /**
-     * When iterating over ranges using <tt>nextRange()</tt>,
-     * <tt>codepointEnd</tt> contains the inclusive end of the
-     * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
-     * iterating over code points using <tt>next()</tt>, or if
-     * <tt>codepoint == IS_STRING</tt>, then the value of
-     * <tt>codepointEnd</tt> is undefined.
-     * @stable ICU 2.4
-     */
-    UChar32 codepointEnd;
-
-    /**
-     * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
-     * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
-     * value of <tt>string</tt> is undefined.
-     * @stable ICU 2.4
-     */
-    const UnicodeString* string;
-
- public:
-
-    /**
-     * Create an iterator over the given set.  The iterator is valid
-     * only so long as <tt>set</tt> is valid.
-     * @param set set to iterate over
-     * @stable ICU 2.4
-     */
-    UnicodeSetIterator(const UnicodeSet& set);
-
-    /**
-     * Create an iterator over nothing.  <tt>next()</tt> and
-     * <tt>nextRange()</tt> return false. This is a convenience
-     * constructor allowing the target to be set later.
-     * @stable ICU 2.4
-     */
-    UnicodeSetIterator();
-
-    /**
-     * Destructor.
-     * @stable ICU 2.4
-     */
-    virtual ~UnicodeSetIterator();
-
-    /**
-     * Returns true if the current element is a string.  If so, the
-     * caller can retrieve it with <tt>getString()</tt>.  If this
-     * method returns false, the current element is a code point or
-     * code point range, depending on whether <tt>next()</tt> or
-     * <tt>nextRange()</tt> was called.
-     * Elements of types string and codepoint can both be retrieved
-     * with the function <tt>getString()</tt>.
-     * Elements of type codepoint can also be retrieved with
-     * <tt>getCodepoint()</tt>.
-     * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
-     * of the range, and <tt>getCodepointEnd()</tt> returns the end
-     * of the range.
-     * @stable ICU 2.4
-     */
-    inline UBool isString() const;
-
-    /**
-     * Returns the current code point, if <tt>isString()</tt> returned
-     * false.  Otherwise returns an undefined result.
-     * @stable ICU 2.4
-     */
-    inline UChar32 getCodepoint() const;
-
-    /**
-     * Returns the end of the current code point range, if
-     * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
-     * called.  Otherwise returns an undefined result.
-     * @stable ICU 2.4
-     */
-    inline UChar32 getCodepointEnd() const;
-
-    /**
-     * Returns the current string, if <tt>isString()</tt> returned
-     * true.  If the current iteration item is a code point, a UnicodeString
-     * containing that single code point is returned.
-     *
-     * Ownership of the returned string remains with the iterator.
-     * The string is guaranteed to remain valid only until the iterator is
-     *   advanced to the next item, or until the iterator is deleted.
-     * 
-     * @stable ICU 2.4
-     */
-    const UnicodeString& getString();
-
-    /**
-     * Advances the iteration position to the next element in the set, 
-     * which can be either a single code point or a string.  
-     * If there are no more elements in the set, return false.
-     *
-     * <p>
-     * If <tt>isString() == TRUE</tt>, the value is a
-     * string, otherwise the value is a
-     * single code point.  Elements of either type can be retrieved
-     * with the function <tt>getString()</tt>, while elements of
-     * consisting of a single code point can be retrieved with
-     * <tt>getCodepoint()</tt>
-     *
-     * <p>The order of iteration is all code points in sorted order,
-     * followed by all strings sorted order.    Do not mix
-     * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
-     * calling <tt>reset()</tt> between them.  The results of doing so
-     * are undefined.
-     *
-     * @return true if there was another element in the set.
-     * @stable ICU 2.4
-     */
-    UBool next();
-
-    /**
-     * Returns the next element in the set, either a code point range
-     * or a string.  If there are no more elements in the set, return
-     * false.  If <tt>isString() == TRUE</tt>, the value is a
-     * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
-     * range of one or more code points from <tt>getCodepoint()</tt> to
-     * <tt>getCodepointeEnd()</tt> inclusive.
-     *
-     * <p>The order of iteration is all code points ranges in sorted
-     * order, followed by all strings sorted order.  Ranges are
-     * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
-     * is undefined unless <tt>isString() == TRUE</tt>.  Do not mix calls to
-     * <tt>next()</tt> and <tt>nextRange()</tt> without calling
-     * <tt>reset()</tt> between them.  The results of doing so are
-     * undefined.
-     *
-     * @return true if there was another element in the set.
-     * @stable ICU 2.4
-     */
-    UBool nextRange();
-
-    /**
-     * Sets this iterator to visit the elements of the given set and
-     * resets it to the start of that set.  The iterator is valid only
-     * so long as <tt>set</tt> is valid.
-     * @param set the set to iterate over.
-     * @stable ICU 2.4
-     */
-    void reset(const UnicodeSet& set);
-
-    /**
-     * Resets this iterator to the start of the set.
-     * @stable ICU 2.4
-     */
-    void reset();
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
-     *
-     * @stable ICU 2.4
-     */
-    static UClassID U_EXPORT2 getStaticClassID();
-
-    /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
-     *
-     * @stable ICU 2.4
-     */
-    virtual UClassID getDynamicClassID() const;
-
-    // ======================= PRIVATES ===========================
-
- protected:
-
-    // endElement and nextElements are really UChar32's, but we keep
-    // them as signed int32_t's so we can do comparisons with
-    // endElement set to -1.  Leave them as int32_t's.
-    /** The set
-     * @stable ICU 2.4
-     */
-    const UnicodeSet* set;
-    /** End range
-     * @stable ICU 2.4
-     */
-    int32_t endRange;
-    /** Range
-     * @stable ICU 2.4
-     */
-    int32_t range;
-    /** End element
-     * @stable ICU 2.4
-     */
-    int32_t endElement;
-    /** Next element
-     * @stable ICU 2.4
-     */
-    int32_t nextElement;
-    //UBool abbreviated;
-    /** Next string
-     * @stable ICU 2.4
-     */
-    int32_t nextString;
-    /** String count
-     * @stable ICU 2.4
-     */
-    int32_t stringCount;
-
-    /**
-     *  Points to the string to use when the caller asks for a
-     *  string and the current iteration item is a code point, not a string.
-     *  @internal
-     */
-    UnicodeString *cpString;
-
-    /** Copy constructor. Disallowed.
-     * @stable ICU 2.4
-     */
-    UnicodeSetIterator(const UnicodeSetIterator&); // disallow
-
-    /** Assignment operator. Disallowed.
-     * @stable ICU 2.4
-     */
-    UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
-
-    /** Load range
-     * @stable ICU 2.4
-     */
-    virtual void loadRange(int32_t range);
-
-};
-
-inline UBool UnicodeSetIterator::isString() const {
-    return codepoint == (UChar32)IS_STRING;
-}
-
-inline UChar32 UnicodeSetIterator::getCodepoint() const {
-    return codepoint;
-}
-
-inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
-    return codepointEnd;
-}
-
-
-U_NAMESPACE_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/usetiter.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/usetiter.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/usetiter.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/usetiter.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,318 @@
+/*
+**********************************************************************
+* Copyright (c) 2002-2006, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+*/
+#ifndef USETITER_H
+#define USETITER_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file 
+ * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeSet;
+class UnicodeString;
+
+/**
+ *
+ * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
+ * iterates over either code points or code point ranges.  After all
+ * code points or ranges have been returned, it returns the
+ * multicharacter strings of the UnicodSet, if any.
+ *
+ * This class is not intended to be subclassed.  Consider any fields
+ *  or methods declared as "protected" to be private.  The use of
+ *  protected in this class is an artifact of history.
+ *
+ * <p>To iterate over code points and strings, use a loop like this:
+ * <pre>
+ * UnicodeSetIterator it(set);
+ * while (set.next()) {
+ *     processItem(set.getString());
+ * }
+ * </pre>
+ * <p>Each item in the set is accessed as a string.  Set elements
+ *    consisting of single code points are returned as strings containing
+ *    just the one code point.
+ *
+ * <p>To iterate over code point ranges, instead of individual code points,
+ *    use a loop like this:
+ * <pre>
+ * UnicodeSetIterator it(set);
+ * while (it.nextRange()) {
+ *   if (it.isString()) {
+ *     processString(it.getString());
+ *   } else {
+ *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
+ *   }
+ * }
+ * </pre>
+ * @author M. Davis
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UnicodeSetIterator : public UObject {
+
+ protected:
+
+    /**
+     * Value of <tt>codepoint</tt> if the iterator points to a string.
+     * If <tt>codepoint == IS_STRING</tt>, then examine
+     * <tt>string</tt> for the current iteration result.
+     * @stable ICU 2.4
+     */
+    enum { IS_STRING = -1 };
+
+    /**
+     * Current code point, or the special value <tt>IS_STRING</tt>, if
+     * the iterator points to a string.
+     * @stable ICU 2.4
+     */
+    UChar32 codepoint;
+
+    /**
+     * When iterating over ranges using <tt>nextRange()</tt>,
+     * <tt>codepointEnd</tt> contains the inclusive end of the
+     * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
+     * iterating over code points using <tt>next()</tt>, or if
+     * <tt>codepoint == IS_STRING</tt>, then the value of
+     * <tt>codepointEnd</tt> is undefined.
+     * @stable ICU 2.4
+     */
+    UChar32 codepointEnd;
+
+    /**
+     * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
+     * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
+     * value of <tt>string</tt> is undefined.
+     * @stable ICU 2.4
+     */
+    const UnicodeString* string;
+
+ public:
+
+    /**
+     * Create an iterator over the given set.  The iterator is valid
+     * only so long as <tt>set</tt> is valid.
+     * @param set set to iterate over
+     * @stable ICU 2.4
+     */
+    UnicodeSetIterator(const UnicodeSet& set);
+
+    /**
+     * Create an iterator over nothing.  <tt>next()</tt> and
+     * <tt>nextRange()</tt> return false. This is a convenience
+     * constructor allowing the target to be set later.
+     * @stable ICU 2.4
+     */
+    UnicodeSetIterator();
+
+    /**
+     * Destructor.
+     * @stable ICU 2.4
+     */
+    virtual ~UnicodeSetIterator();
+
+    /**
+     * Returns true if the current element is a string.  If so, the
+     * caller can retrieve it with <tt>getString()</tt>.  If this
+     * method returns false, the current element is a code point or
+     * code point range, depending on whether <tt>next()</tt> or
+     * <tt>nextRange()</tt> was called.
+     * Elements of types string and codepoint can both be retrieved
+     * with the function <tt>getString()</tt>.
+     * Elements of type codepoint can also be retrieved with
+     * <tt>getCodepoint()</tt>.
+     * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
+     * of the range, and <tt>getCodepointEnd()</tt> returns the end
+     * of the range.
+     * @stable ICU 2.4
+     */
+    inline UBool isString() const;
+
+    /**
+     * Returns the current code point, if <tt>isString()</tt> returned
+     * false.  Otherwise returns an undefined result.
+     * @stable ICU 2.4
+     */
+    inline UChar32 getCodepoint() const;
+
+    /**
+     * Returns the end of the current code point range, if
+     * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
+     * called.  Otherwise returns an undefined result.
+     * @stable ICU 2.4
+     */
+    inline UChar32 getCodepointEnd() const;
+
+    /**
+     * Returns the current string, if <tt>isString()</tt> returned
+     * true.  If the current iteration item is a code point, a UnicodeString
+     * containing that single code point is returned.
+     *
+     * Ownership of the returned string remains with the iterator.
+     * The string is guaranteed to remain valid only until the iterator is
+     *   advanced to the next item, or until the iterator is deleted.
+     * 
+     * @stable ICU 2.4
+     */
+    const UnicodeString& getString();
+
+    /**
+     * Advances the iteration position to the next element in the set, 
+     * which can be either a single code point or a string.  
+     * If there are no more elements in the set, return false.
+     *
+     * <p>
+     * If <tt>isString() == TRUE</tt>, the value is a
+     * string, otherwise the value is a
+     * single code point.  Elements of either type can be retrieved
+     * with the function <tt>getString()</tt>, while elements of
+     * consisting of a single code point can be retrieved with
+     * <tt>getCodepoint()</tt>
+     *
+     * <p>The order of iteration is all code points in sorted order,
+     * followed by all strings sorted order.    Do not mix
+     * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
+     * calling <tt>reset()</tt> between them.  The results of doing so
+     * are undefined.
+     *
+     * @return true if there was another element in the set.
+     * @stable ICU 2.4
+     */
+    UBool next();
+
+    /**
+     * Returns the next element in the set, either a code point range
+     * or a string.  If there are no more elements in the set, return
+     * false.  If <tt>isString() == TRUE</tt>, the value is a
+     * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
+     * range of one or more code points from <tt>getCodepoint()</tt> to
+     * <tt>getCodepointeEnd()</tt> inclusive.
+     *
+     * <p>The order of iteration is all code points ranges in sorted
+     * order, followed by all strings sorted order.  Ranges are
+     * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
+     * is undefined unless <tt>isString() == TRUE</tt>.  Do not mix calls to
+     * <tt>next()</tt> and <tt>nextRange()</tt> without calling
+     * <tt>reset()</tt> between them.  The results of doing so are
+     * undefined.
+     *
+     * @return true if there was another element in the set.
+     * @stable ICU 2.4
+     */
+    UBool nextRange();
+
+    /**
+     * Sets this iterator to visit the elements of the given set and
+     * resets it to the start of that set.  The iterator is valid only
+     * so long as <tt>set</tt> is valid.
+     * @param set the set to iterate over.
+     * @stable ICU 2.4
+     */
+    void reset(const UnicodeSet& set);
+
+    /**
+     * Resets this iterator to the start of the set.
+     * @stable ICU 2.4
+     */
+    void reset();
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.4
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.4
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    // ======================= PRIVATES ===========================
+
+ protected:
+
+    // endElement and nextElements are really UChar32's, but we keep
+    // them as signed int32_t's so we can do comparisons with
+    // endElement set to -1.  Leave them as int32_t's.
+    /** The set
+     * @stable ICU 2.4
+     */
+    const UnicodeSet* set;
+    /** End range
+     * @stable ICU 2.4
+     */
+    int32_t endRange;
+    /** Range
+     * @stable ICU 2.4
+     */
+    int32_t range;
+    /** End element
+     * @stable ICU 2.4
+     */
+    int32_t endElement;
+    /** Next element
+     * @stable ICU 2.4
+     */
+    int32_t nextElement;
+    //UBool abbreviated;
+    /** Next string
+     * @stable ICU 2.4
+     */
+    int32_t nextString;
+    /** String count
+     * @stable ICU 2.4
+     */
+    int32_t stringCount;
+
+    /**
+     *  Points to the string to use when the caller asks for a
+     *  string and the current iteration item is a code point, not a string.
+     *  @internal
+     */
+    UnicodeString *cpString;
+
+    /** Copy constructor. Disallowed.
+     * @stable ICU 2.4
+     */
+    UnicodeSetIterator(const UnicodeSetIterator&); // disallow
+
+    /** Assignment operator. Disallowed.
+     * @stable ICU 2.4
+     */
+    UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
+
+    /** Load range
+     * @stable ICU 2.4
+     */
+    virtual void loadRange(int32_t range);
+
+};
+
+inline UBool UnicodeSetIterator::isString() const {
+    return codepoint == (UChar32)IS_STRING;
+}
+
+inline UChar32 UnicodeSetIterator::getCodepoint() const {
+    return codepoint;
+}
+
+inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
+    return codepointEnd;
+}
+
+
+U_NAMESPACE_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/ushape.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ushape.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ushape.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,263 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 2000-2007, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-******************************************************************************
-*   file name:  ushape.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2000jun29
-*   created by: Markus W. Scherer
-*/
-
-#ifndef __USHAPE_H__
-#define __USHAPE_H__
-
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief C API:  Arabic shaping
- * 
- */
-
-/**
- * Shape Arabic text on a character basis.
- *
- * <p>This function performs basic operations for "shaping" Arabic text. It is most
- * useful for use with legacy data formats and legacy display technology
- * (simple terminals). All operations are performed on Unicode characters.</p>
- *
- * <p>Text-based shaping means that some character code points in the text are
- * replaced by others depending on the context. It transforms one kind of text
- * into another. In comparison, modern displays for Arabic text select
- * appropriate, context-dependent font glyphs for each text element, which means
- * that they transform text into a glyph vector.</p>
- *
- * <p>Text transformations are necessary when modern display technology is not
- * available or when text needs to be transformed to or from legacy formats that
- * use "shaped" characters. Since the Arabic script is cursive, connecting
- * adjacent letters to each other, computers select images for each letter based
- * on the surrounding letters. This usually results in four images per Arabic
- * letter: initial, middle, final, and isolated forms. In Unicode, on the other
- * hand, letters are normally stored abstract, and a display system is expected
- * to select the necessary glyphs. (This makes searching and other text
- * processing easier because the same letter has only one code.) It is possible
- * to mimic this with text transformations because there are characters in
- * Unicode that are rendered as letters with a specific shape
- * (or cursive connectivity). They were included for interoperability with
- * legacy systems and codepages, and for unsophisticated display systems.</p>
- *
- * <p>A second kind of text transformations is supported for Arabic digits:
- * For compatibility with legacy codepages that only include European digits,
- * it is possible to replace one set of digits by another, changing the
- * character code points. These operations can be performed for either
- * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
- * digits (U+06f0...U+06f9).</p>
- *
- * <p>Some replacements may result in more or fewer characters (code points).
- * By default, this means that the destination buffer may receive text with a
- * length different from the source length. Some legacy systems rely on the
- * length of the text to be constant. They expect extra spaces to be added
- * or consumed either next to the affected character or at the end of the
- * text.</p>
- *
- * <p>For details about the available operations, see the description of the
- * <code>U_SHAPE_...</code> options.</p>
- *
- * @param source The input text.
- *
- * @param sourceLength The number of UChars in <code>source</code>.
- *
- * @param dest The destination buffer that will receive the results of the
- *             requested operations. It may be <code>NULL</code> only if
- *             <code>destSize</code> is 0. The source and destination must not
- *             overlap.
- *
- * @param destSize The size (capacity) of the destination buffer in UChars.
- *                 If <code>destSize</code> is 0, then no output is produced,
- *                 but the necessary buffer size is returned ("preflighting").
- *
- * @param options This is a 32-bit set of flags that specify the operations
- *                that are performed on the input text. If no error occurs,
- *                then the result will always be written to the destination
- *                buffer.
- *
- * @param pErrorCode must be a valid pointer to an error code value,
- *        which must not indicate a failure before the function call.
- *
- * @return The number of UChars written to the destination buffer.
- *         If an error occured, then no output was written, or it may be
- *         incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then
- *         the return value indicates the necessary destination buffer size.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_shapeArabic(const UChar *source, int32_t sourceLength,
-              UChar *dest, int32_t destSize,
-              uint32_t options,
-              UErrorCode *pErrorCode);
-
-/**
- * Memory option: allow the result to have a different length than the source.
- * @stable ICU 2.0
- */
-#define U_SHAPE_LENGTH_GROW_SHRINK              0
-
-/**
- * Memory option: the result must have the same length as the source.
- * If more room is necessary, then try to consume spaces next to modified characters.
- * @stable ICU 2.0
- */
-#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR        1
-
-/**
- * Memory option: the result must have the same length as the source.
- * If more room is necessary, then try to consume spaces at the end of the text.
- * @stable ICU 2.0
- */
-#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END      2
-
-/**
- * Memory option: the result must have the same length as the source.
- * If more room is necessary, then try to consume spaces at the beginning of the text.
- * @stable ICU 2.0
- */
-#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3
-
-/** Bit mask for memory options. @stable ICU 2.0 */
-#define U_SHAPE_LENGTH_MASK                     3
-
-
-/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */
-#define U_SHAPE_TEXT_DIRECTION_LOGICAL          0
-
-/**
- * Direction indicator:
- * the source is in visual LTR order,
- * the leftmost displayed character stored first.
- * @stable ICU 2.0
- */
-#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR       4
-
-/** Bit mask for direction indicators. @stable ICU 2.0 */
-#define U_SHAPE_TEXT_DIRECTION_MASK             4
-
-
-/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */
-#define U_SHAPE_LETTERS_NOOP                    0
-
-/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */
-#define U_SHAPE_LETTERS_SHAPE                   8
-
-/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */
-#define U_SHAPE_LETTERS_UNSHAPE                 0x10
-
-/**
- * Letter shaping option: replace abstract letter characters by "shaped" ones.
- * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters
- * are always "shaped" into the isolated form instead of the medial form
- * (selecting code points from the Arabic Presentation Forms-B block).
- * @stable ICU 2.0
- */
-#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18
-
-/** Bit mask for letter shaping options. @stable ICU 2.0 */
-#define U_SHAPE_LETTERS_MASK                    0x18
-
-
-/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */
-#define U_SHAPE_DIGITS_NOOP                     0
-
-/**
- * Digit shaping option:
- * Replace European digits (U+0030...) by Arabic-Indic digits.
- * @stable ICU 2.0
- */
-#define U_SHAPE_DIGITS_EN2AN                    0x20
-
-/**
- * Digit shaping option:
- * Replace Arabic-Indic digits by European digits (U+0030...).
- * @stable ICU 2.0
- */
-#define U_SHAPE_DIGITS_AN2EN                    0x40
-
-/**
- * Digit shaping option:
- * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
- * strongly directional character is an Arabic letter
- * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
- * The direction of "preceding" depends on the direction indicator option.
- * For the first characters, the preceding strongly directional character
- * (initial state) is assumed to be not an Arabic letter
- * (it is <code>U_LEFT_TO_RIGHT</code> [L] or <code>U_RIGHT_TO_LEFT</code> [R]).
- * @stable ICU 2.0
- */
-#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR          0x60
-
-/**
- * Digit shaping option:
- * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
- * strongly directional character is an Arabic letter
- * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
- * The direction of "preceding" depends on the direction indicator option.
- * For the first characters, the preceding strongly directional character
- * (initial state) is assumed to be an Arabic letter.
- * @stable ICU 2.0
- */
-#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL          0x80
-
-/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
-#define U_SHAPE_DIGITS_RESERVED                 0xa0
-
-/** Bit mask for digit shaping options. @stable ICU 2.0 */
-#define U_SHAPE_DIGITS_MASK                     0xe0
-
-
-/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */
-#define U_SHAPE_DIGIT_TYPE_AN                   0
-
-/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */
-#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED          0x100
-
-/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
-#define U_SHAPE_DIGIT_TYPE_RESERVED             0x200
-
-/** Bit mask for digit type options. @stable ICU 2.0 */
-#define U_SHAPE_DIGIT_TYPE_MASK                 0x3f00
-
-/** 
- * Tashkeel aggregation option:
- * Replaces any combination of U+0651 with one of
- * U+064C, U+064D, U+064E, U+064F, U+0650 with
- * U+FC5E, U+FC5F, U+FC60, U+FC61, U+FC62 consecutively.
- * @stable ICU 3.6
- */
-#define U_SHAPE_AGGREGATE_TASHKEEL              0x4000
-/** Tashkeel aggregation option: do not aggregate tashkeels. @stable ICU 3.6 */
-#define U_SHAPE_AGGREGATE_TASHKEEL_NOOP         0
-/** Bit mask for tashkeel aggregation. @stable ICU 3.6 */
-#define U_SHAPE_AGGREGATE_TASHKEEL_MASK         0x4000
-
-/** 
- * Presentation form option:
- * Don't replace Arabic Presentation Forms-A and Arabic Presentation Forms-B
- * characters with 0+06xx characters, before shaping.
- * @stable ICU 3.6
- */
-#define U_SHAPE_PRESERVE_PRESENTATION           0x8000
-/** Presentation form option: 
- * Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with 
- * their unshaped correspondants in range 0+06xx, before shaping.
- * @stable ICU 3.6 
- */
-#define U_SHAPE_PRESERVE_PRESENTATION_NOOP      0
-/** Bit mask for preserve presentation form. @stable ICU 3.6 */
-#define U_SHAPE_PRESERVE_PRESENTATION_MASK      0x8000
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/ushape.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ushape.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ushape.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ushape.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,263 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2000-2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  ushape.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2000jun29
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __USHAPE_H__
+#define __USHAPE_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API:  Arabic shaping
+ * 
+ */
+
+/**
+ * Shape Arabic text on a character basis.
+ *
+ * <p>This function performs basic operations for "shaping" Arabic text. It is most
+ * useful for use with legacy data formats and legacy display technology
+ * (simple terminals). All operations are performed on Unicode characters.</p>
+ *
+ * <p>Text-based shaping means that some character code points in the text are
+ * replaced by others depending on the context. It transforms one kind of text
+ * into another. In comparison, modern displays for Arabic text select
+ * appropriate, context-dependent font glyphs for each text element, which means
+ * that they transform text into a glyph vector.</p>
+ *
+ * <p>Text transformations are necessary when modern display technology is not
+ * available or when text needs to be transformed to or from legacy formats that
+ * use "shaped" characters. Since the Arabic script is cursive, connecting
+ * adjacent letters to each other, computers select images for each letter based
+ * on the surrounding letters. This usually results in four images per Arabic
+ * letter: initial, middle, final, and isolated forms. In Unicode, on the other
+ * hand, letters are normally stored abstract, and a display system is expected
+ * to select the necessary glyphs. (This makes searching and other text
+ * processing easier because the same letter has only one code.) It is possible
+ * to mimic this with text transformations because there are characters in
+ * Unicode that are rendered as letters with a specific shape
+ * (or cursive connectivity). They were included for interoperability with
+ * legacy systems and codepages, and for unsophisticated display systems.</p>
+ *
+ * <p>A second kind of text transformations is supported for Arabic digits:
+ * For compatibility with legacy codepages that only include European digits,
+ * it is possible to replace one set of digits by another, changing the
+ * character code points. These operations can be performed for either
+ * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
+ * digits (U+06f0...U+06f9).</p>
+ *
+ * <p>Some replacements may result in more or fewer characters (code points).
+ * By default, this means that the destination buffer may receive text with a
+ * length different from the source length. Some legacy systems rely on the
+ * length of the text to be constant. They expect extra spaces to be added
+ * or consumed either next to the affected character or at the end of the
+ * text.</p>
+ *
+ * <p>For details about the available operations, see the description of the
+ * <code>U_SHAPE_...</code> options.</p>
+ *
+ * @param source The input text.
+ *
+ * @param sourceLength The number of UChars in <code>source</code>.
+ *
+ * @param dest The destination buffer that will receive the results of the
+ *             requested operations. It may be <code>NULL</code> only if
+ *             <code>destSize</code> is 0. The source and destination must not
+ *             overlap.
+ *
+ * @param destSize The size (capacity) of the destination buffer in UChars.
+ *                 If <code>destSize</code> is 0, then no output is produced,
+ *                 but the necessary buffer size is returned ("preflighting").
+ *
+ * @param options This is a 32-bit set of flags that specify the operations
+ *                that are performed on the input text. If no error occurs,
+ *                then the result will always be written to the destination
+ *                buffer.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value,
+ *        which must not indicate a failure before the function call.
+ *
+ * @return The number of UChars written to the destination buffer.
+ *         If an error occured, then no output was written, or it may be
+ *         incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then
+ *         the return value indicates the necessary destination buffer size.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_shapeArabic(const UChar *source, int32_t sourceLength,
+              UChar *dest, int32_t destSize,
+              uint32_t options,
+              UErrorCode *pErrorCode);
+
+/**
+ * Memory option: allow the result to have a different length than the source.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_GROW_SHRINK              0
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces next to modified characters.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR        1
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the end of the text.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END      2
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the beginning of the text.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3
+
+/** Bit mask for memory options. @stable ICU 2.0 */
+#define U_SHAPE_LENGTH_MASK                     3
+
+
+/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */
+#define U_SHAPE_TEXT_DIRECTION_LOGICAL          0
+
+/**
+ * Direction indicator:
+ * the source is in visual LTR order,
+ * the leftmost displayed character stored first.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR       4
+
+/** Bit mask for direction indicators. @stable ICU 2.0 */
+#define U_SHAPE_TEXT_DIRECTION_MASK             4
+
+
+/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_NOOP                    0
+
+/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_SHAPE                   8
+
+/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_UNSHAPE                 0x10
+
+/**
+ * Letter shaping option: replace abstract letter characters by "shaped" ones.
+ * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters
+ * are always "shaped" into the isolated form instead of the medial form
+ * (selecting code points from the Arabic Presentation Forms-B block).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18
+
+/** Bit mask for letter shaping options. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_MASK                    0x18
+
+
+/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_NOOP                     0
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_EN2AN                    0x20
+
+/**
+ * Digit shaping option:
+ * Replace Arabic-Indic digits by European digits (U+0030...).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_AN2EN                    0x40
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
+ * strongly directional character is an Arabic letter
+ * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
+ * The direction of "preceding" depends on the direction indicator option.
+ * For the first characters, the preceding strongly directional character
+ * (initial state) is assumed to be not an Arabic letter
+ * (it is <code>U_LEFT_TO_RIGHT</code> [L] or <code>U_RIGHT_TO_LEFT</code> [R]).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR          0x60
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
+ * strongly directional character is an Arabic letter
+ * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
+ * The direction of "preceding" depends on the direction indicator option.
+ * For the first characters, the preceding strongly directional character
+ * (initial state) is assumed to be an Arabic letter.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL          0x80
+
+/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_RESERVED                 0xa0
+
+/** Bit mask for digit shaping options. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_MASK                     0xe0
+
+
+/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_AN                   0
+
+/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED          0x100
+
+/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_RESERVED             0x200
+
+/** Bit mask for digit type options. @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_MASK                 0x3f00
+
+/** 
+ * Tashkeel aggregation option:
+ * Replaces any combination of U+0651 with one of
+ * U+064C, U+064D, U+064E, U+064F, U+0650 with
+ * U+FC5E, U+FC5F, U+FC60, U+FC61, U+FC62 consecutively.
+ * @stable ICU 3.6
+ */
+#define U_SHAPE_AGGREGATE_TASHKEEL              0x4000
+/** Tashkeel aggregation option: do not aggregate tashkeels. @stable ICU 3.6 */
+#define U_SHAPE_AGGREGATE_TASHKEEL_NOOP         0
+/** Bit mask for tashkeel aggregation. @stable ICU 3.6 */
+#define U_SHAPE_AGGREGATE_TASHKEEL_MASK         0x4000
+
+/** 
+ * Presentation form option:
+ * Don't replace Arabic Presentation Forms-A and Arabic Presentation Forms-B
+ * characters with 0+06xx characters, before shaping.
+ * @stable ICU 3.6
+ */
+#define U_SHAPE_PRESERVE_PRESENTATION           0x8000
+/** Presentation form option: 
+ * Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with 
+ * their unshaped correspondants in range 0+06xx, before shaping.
+ * @stable ICU 3.6 
+ */
+#define U_SHAPE_PRESERVE_PRESENTATION_NOOP      0
+/** Bit mask for preserve presentation form. @stable ICU 3.6 */
+#define U_SHAPE_PRESERVE_PRESENTATION_MASK      0x8000
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/usprep.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/usprep.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/usprep.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,156 +0,0 @@
-/*
- *******************************************************************************
- *
- *   Copyright (C) 2003-2006, International Business Machines
- *   Corporation and others.  All Rights Reserved.
- *
- *******************************************************************************
- *   file name:  usprep.h
- *   encoding:   US-ASCII
- *   tab size:   8 (not used)
- *   indentation:4
- *
- *   created on: 2003jul2
- *   created by: Ram Viswanadha
- */
-
-#ifndef __USPREP_H__
-#define __USPREP_H__
-
-/**
- * \file 
- * \brief C API: Implements the StringPrep algorithm.
- */
-
-#include "unicode/utypes.h"
-/**
- *
- * StringPrep API implements the StingPrep framework as described by RFC 3454.
- * StringPrep prepares Unicode strings for use in network protocols.
- * Profiles of StingPrep are set of rules and data according to with the
- * Unicode Strings are prepared. Each profiles contains tables which describe
- * how a code point should be treated. The tables are broadly classied into
- * <ul>
- *     <li> Unassinged Table: Contains code points that are unassigned 
- *          in the Unicode Version supported by StringPrep. Currently 
- *          RFC 3454 supports Unicode 3.2. </li>
- *     <li> Prohibited Table: Contains code points that are prohibted from
- *          the output of the StringPrep processing function. </li>
- *     <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
- * </ul>
- * 
- * The procedure for preparing Unicode strings:
- * <ol>
- *      <li> Map: For each character in the input, check if it has a mapping
- *           and, if so, replace it with its mapping. </li>
- *      <li> Normalize: Possibly normalize the result of step 1 using Unicode
- *           normalization. </li>
- *      <li> Prohibit: Check for any characters that are not allowed in the
- *        output.  If any are found, return an error.</li>
- *      <li> Check bidi: Possibly check for right-to-left characters, and if
- *           any are found, make sure that the whole string satisfies the
- *           requirements for bidirectional strings.  If the string does not
- *           satisfy the requirements for bidirectional strings, return an
- *           error.  </li>
- * </ol>
- * @author Ram Viswanadha
- */
-#if !UCONFIG_NO_IDNA
-
-#include "unicode/parseerr.h"
-
-/**
- * The StringPrep profile
- * @stable ICU 2.8
- */
-typedef struct UStringPrepProfile UStringPrepProfile;
-
-
-/** 
- * Option to prohibit processing of unassigned code points in the input
- * 
- * @see  usprep_prepare
- * @stable ICU 2.8
- */
-#define USPREP_DEFAULT 0x0000
-
-/** 
- * Option to allow processing of unassigned code points in the input
- * 
- * @see  usprep_prepare
- * @stable ICU 2.8
- */
-#define USPREP_ALLOW_UNASSIGNED 0x0001
-
-
-/**
- * Creates a StringPrep profile from the data file.
- *
- * @param path      string containing the full path pointing to the directory
- *                  where the profile reside followed by the package name
- *                  e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system.
- *                  if NULL, ICU default data files will be used.
- * @param fileName  name of the profile file to be opened
- * @param status    ICU error code in/out parameter. Must not be NULL.
- *                  Must fulfill U_SUCCESS before the function call.
- * @return Pointer to UStringPrepProfile that is opened. Should be closed by
- * calling usprep_close()
- * @see usprep_close()
- * @stable ICU 2.8
- */
-U_STABLE UStringPrepProfile* U_EXPORT2
-usprep_open(const char* path, 
-            const char* fileName,
-            UErrorCode* status);
-
-
-/**
- * Closes the profile
- * @param profile The profile to close
- * @stable ICU 2.8
- */
-U_STABLE void U_EXPORT2
-usprep_close(UStringPrepProfile* profile);
-
-
-/**
- * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
- * checks for prohited and BiDi characters in the order defined by RFC 3454
- * depending on the options specified in the profile.
- *
- * @param prep          The profile to use 
- * @param src           Pointer to UChar buffer containing the string to prepare
- * @param srcLength     Number of characters in the source string
- * @param dest          Pointer to the destination buffer to receive the output
- * @param destCapacity  The capacity of destination array
- * @param options       A bit set of options:
- *
- *  - USPREP_NONE               Prohibit processing of unassigned code points in the input
- *
- *  - USPREP_ALLOW_UNASSIGNED   Treat the unassigned code points are in the input 
- *                              as normal Unicode code points.
- *
- * @param parseError        Pointer to UParseError struct to receive information on position 
- *                          of error if an error is encountered. Can be NULL.
- * @param status            ICU in/out error code parameter.
- *                          U_INVALID_CHAR_FOUND if src contains
- *                          unmatched single surrogates.
- *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
- *                          too many code points.
- *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
- * @return The number of UChars in the destination buffer
- * @stable ICU 2.8
- */
-
-U_STABLE int32_t U_EXPORT2
-usprep_prepare(   const UStringPrepProfile* prep,
-                  const UChar* src, int32_t srcLength, 
-                  UChar* dest, int32_t destCapacity,
-                  int32_t options,
-                  UParseError* parseError,
-                  UErrorCode* status );
-
-
-#endif /* #if !UCONFIG_NO_IDNA */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/usprep.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/usprep.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/usprep.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/usprep.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,156 @@
+/*
+ *******************************************************************************
+ *
+ *   Copyright (C) 2003-2006, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *
+ *******************************************************************************
+ *   file name:  usprep.h
+ *   encoding:   US-ASCII
+ *   tab size:   8 (not used)
+ *   indentation:4
+ *
+ *   created on: 2003jul2
+ *   created by: Ram Viswanadha
+ */
+
+#ifndef __USPREP_H__
+#define __USPREP_H__
+
+/**
+ * \file 
+ * \brief C API: Implements the StringPrep algorithm.
+ */
+
+#include "unicode/utypes.h"
+/**
+ *
+ * StringPrep API implements the StingPrep framework as described by RFC 3454.
+ * StringPrep prepares Unicode strings for use in network protocols.
+ * Profiles of StingPrep are set of rules and data according to with the
+ * Unicode Strings are prepared. Each profiles contains tables which describe
+ * how a code point should be treated. The tables are broadly classied into
+ * <ul>
+ *     <li> Unassinged Table: Contains code points that are unassigned 
+ *          in the Unicode Version supported by StringPrep. Currently 
+ *          RFC 3454 supports Unicode 3.2. </li>
+ *     <li> Prohibited Table: Contains code points that are prohibted from
+ *          the output of the StringPrep processing function. </li>
+ *     <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
+ * </ul>
+ * 
+ * The procedure for preparing Unicode strings:
+ * <ol>
+ *      <li> Map: For each character in the input, check if it has a mapping
+ *           and, if so, replace it with its mapping. </li>
+ *      <li> Normalize: Possibly normalize the result of step 1 using Unicode
+ *           normalization. </li>
+ *      <li> Prohibit: Check for any characters that are not allowed in the
+ *        output.  If any are found, return an error.</li>
+ *      <li> Check bidi: Possibly check for right-to-left characters, and if
+ *           any are found, make sure that the whole string satisfies the
+ *           requirements for bidirectional strings.  If the string does not
+ *           satisfy the requirements for bidirectional strings, return an
+ *           error.  </li>
+ * </ol>
+ * @author Ram Viswanadha
+ */
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/parseerr.h"
+
+/**
+ * The StringPrep profile
+ * @stable ICU 2.8
+ */
+typedef struct UStringPrepProfile UStringPrepProfile;
+
+
+/** 
+ * Option to prohibit processing of unassigned code points in the input
+ * 
+ * @see  usprep_prepare
+ * @stable ICU 2.8
+ */
+#define USPREP_DEFAULT 0x0000
+
+/** 
+ * Option to allow processing of unassigned code points in the input
+ * 
+ * @see  usprep_prepare
+ * @stable ICU 2.8
+ */
+#define USPREP_ALLOW_UNASSIGNED 0x0001
+
+
+/**
+ * Creates a StringPrep profile from the data file.
+ *
+ * @param path      string containing the full path pointing to the directory
+ *                  where the profile reside followed by the package name
+ *                  e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system.
+ *                  if NULL, ICU default data files will be used.
+ * @param fileName  name of the profile file to be opened
+ * @param status    ICU error code in/out parameter. Must not be NULL.
+ *                  Must fulfill U_SUCCESS before the function call.
+ * @return Pointer to UStringPrepProfile that is opened. Should be closed by
+ * calling usprep_close()
+ * @see usprep_close()
+ * @stable ICU 2.8
+ */
+U_STABLE UStringPrepProfile* U_EXPORT2
+usprep_open(const char* path, 
+            const char* fileName,
+            UErrorCode* status);
+
+
+/**
+ * Closes the profile
+ * @param profile The profile to close
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+usprep_close(UStringPrepProfile* profile);
+
+
+/**
+ * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
+ * checks for prohited and BiDi characters in the order defined by RFC 3454
+ * depending on the options specified in the profile.
+ *
+ * @param prep          The profile to use 
+ * @param src           Pointer to UChar buffer containing the string to prepare
+ * @param srcLength     Number of characters in the source string
+ * @param dest          Pointer to the destination buffer to receive the output
+ * @param destCapacity  The capacity of destination array
+ * @param options       A bit set of options:
+ *
+ *  - USPREP_NONE               Prohibit processing of unassigned code points in the input
+ *
+ *  - USPREP_ALLOW_UNASSIGNED   Treat the unassigned code points are in the input 
+ *                              as normal Unicode code points.
+ *
+ * @param parseError        Pointer to UParseError struct to receive information on position 
+ *                          of error if an error is encountered. Can be NULL.
+ * @param status            ICU in/out error code parameter.
+ *                          U_INVALID_CHAR_FOUND if src contains
+ *                          unmatched single surrogates.
+ *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ *                          too many code points.
+ *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The number of UChars in the destination buffer
+ * @stable ICU 2.8
+ */
+
+U_STABLE int32_t U_EXPORT2
+usprep_prepare(   const UStringPrepProfile* prep,
+                  const UChar* src, int32_t srcLength, 
+                  UChar* dest, int32_t destCapacity,
+                  int32_t options,
+                  UParseError* parseError,
+                  UErrorCode* status );
+
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/ustdio.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ustdio.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ustdio.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,904 +0,0 @@
-/*
-******************************************************************************
-*
-*   Copyright (C) 1998-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-******************************************************************************
-*
-* File ustdio.h
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   10/16/98    stephen     Creation.
-*   11/06/98    stephen     Modified per code review.
-*   03/12/99    stephen     Modified for new C API.
-*   07/19/99    stephen     Minor doc update.
-*   02/01/01    george      Added sprintf & sscanf with all of its variants
-******************************************************************************
-*/
-
-#ifndef USTDIO_H
-#define USTDIO_H
-
-#include <stdio.h>
-#include <stdarg.h>
-
-#include "unicode/utypes.h"
-#include "unicode/ucnv.h"
-#include "unicode/utrans.h"
-
-/*
-    TODO
- The following is a small list as to what is currently wrong/suggestions for
- ustdio.
-
- * Make sure that * in the scanf format specification works for all formats.
- * Each UFILE takes up at least 2KB.
-    Look into adding setvbuf() for configurable buffers.
- * This library does buffering. The OS should do this for us already. Check on
-    this, and remove it from this library, if this is the case. Double buffering
-    wastes a lot of time and space.
- * Test stdin and stdout with the u_f* functions
- * Testing should be done for reading and writing multi-byte encodings,
-    and make sure that a character that is contained across buffer boundries
-    works even for incomplete characters.
- * Make sure that the last character is flushed when the file/string is closed.
- * snprintf should follow the C99 standard for the return value, which is
-    return the number of characters (excluding the trailing '\0')
-    which would have been written to the destination string regardless
-    of available space. This is like pre-flighting.
- * Everything that uses %s should do what operator>> does for UnicodeString.
-    It should convert one byte at a time, and once a character is
-    converted then check to see if it's whitespace or in the scanset.
-    If it's whitespace or in the scanset, put all the bytes back (do nothing
-    for sprintf/sscanf).
- * If bad string data is encountered, make sure that the function fails
-    without memory leaks and the unconvertable characters are valid
-    substitution or are escaped characters.
- * u_fungetc() can't unget a character when it's at the beginning of the
-    internal conversion buffer. For example, read the buffer size # of
-    characters, and then ungetc to get the previous character that was
-    at the end of the last buffer.
- * u_fflush() and u_fclose should return an int32_t like C99 functions.
-    0 is returned if the operation was successful and EOF otherwise.
- * u_fsettransliterator does not support U_READ side of transliteration.
- * The format specifier should limit the size of a format or honor it in
-    order to prevent buffer overruns.  (e.g. %256.256d).
- * u_fread and u_fwrite don't exist. They're needed for reading and writing
-    data structures without any conversion.
- * u_file_read and u_file_write are used for writing strings. u_fgets and
-    u_fputs or u_fread and u_fwrite should be used to do this.
- * The width parameter for all scanf formats, including scanset, needs
-    better testing. This prevents buffer overflows.
- * Figure out what is suppose to happen when a codepage is changed midstream.
-    Maybe a flush or a rewind are good enough.
- * Make sure that a UFile opened with "rw" can be used after using
-    u_fflush with a u_frewind.
- * scanf(%i) should detect what type of number to use.
- * Add more testing of the alternate format, %#
- * Look at newline handling of fputs/puts
- * Think more about codeunit/codepoint error handling/support in %S,%s,%C,%c,%[]
- * Complete the file documentation with proper doxygen formatting.
-    See http://oss.software.ibm.com/pipermail/icu/2003-July/005647.html
-*/
-
-/**
- * \file
- * \brief C API: Unicode stdio-like API
- *
- * <h2>Unicode stdio-like C API</h2>
- *
- * <p>This API provides an stdio-like API wrapper around ICU's other
- * formatting and parsing APIs. It is meant to ease the transition of adding
- * Unicode support to a preexisting applications using stdio. The following
- * is a small list of noticable differences between stdio and ICU I/O's
- * ustdio implementation.</p>
- *
- * <ul>
- * <li>Locale specific formatting and parsing is only done with file IO.</li>
- * <li>u_fstropen can be used to simulate file IO with strings.
- * This is similar to the iostream API, and it allows locale specific
- * formatting and parsing to be used.</li>
- * <li>This API provides uniform formatting and parsing behavior between
- * platforms (unlike the standard stdio implementations found on various
- * platforms).</li>
- * <li>This API is better suited for text data handling than binary data
- * handling when compared to the typical stdio implementation.</li>
- * <li>You can specify a Transliterator while using the file IO.</li>
- * <li>You can specify a file's codepage separately from the default
- * system codepage.</li>
- * </ul>
- *
- * <h2>Formatting and Parsing Specification</h2>
- *
- * General printf format:<br>
- * %[format modifier][width][.precision][type modifier][format]
- * 
- * General scanf format:<br>
- * %[*][format modifier][width][type modifier][format]
- * 
-<table cellspacing="3">
-<tr><td>format</td><td>default<br>printf<br>type</td><td>default<br>scanf<br>type</td><td>description</td></tr>
-<tr><td>%E</td><td>double</td><td>float</td><td>Scientific with an uppercase exponent</td></tr>
-<tr><td>%e</td><td>double</td><td>float</td><td>Scientific with a lowercase exponent</td></tr>
-<tr><td>%G</td><td>double</td><td>float</td><td>Use %E or %f for best format</td></tr>
-<tr><td>%g</td><td>double</td><td>float</td><td>Use %e or %f for best format</td></tr>
-<tr><td>%f</td><td>double</td><td>float</td><td>Simple floating point without the exponent</td></tr>
-<tr><td>%X</td><td>int32_t</td><td>int32_t</td><td>ustdio special uppercase hex radix formatting</td></tr>
-<tr><td>%x</td><td>int32_t</td><td>int32_t</td><td>ustdio special lowercase hex radix formatting</td></tr>
-<tr><td>%d</td><td>int32_t</td><td>int32_t</td><td>Decimal format</td></tr>
-<tr><td>%i</td><td>int32_t</td><td>int32_t</td><td>Same as %d</td></tr>
-<tr><td>%n</td><td>int32_t</td><td>int32_t</td><td>count (write the number of UTF-16 codeunits read/written)</td></tr>
-<tr><td>%o</td><td>int32_t</td><td>int32_t</td><td>ustdio special octal radix formatting</td></tr>
-<tr><td>%u</td><td>uint32_t</td><td>uint32_t</td><td>Decimal format</td></tr>
-<tr><td>%p</td><td>void *</td><td>void *</td><td>Prints the pointer value</td></tr>
-<tr><td>%s</td><td>char *</td><td>char *</td><td>Use default converter or specified converter from fopen</td></tr>
-<tr><td>%c</td><td>char</td><td>char</td><td>Use default converter or specified converter from fopen<br>
-When width is specified for scanf, this acts like a non-NULL-terminated char * string.<br>
-By default, only one char is written.</td></tr>
-<tr><td>%S</td><td>UChar *</td><td>UChar *</td><td>Null terminated UTF-16 string</td></tr>
-<tr><td>%C</td><td>UChar</td><td>UChar</td><td>16-bit Unicode code unit<br>
-When width is specified for scanf, this acts like a non-NULL-terminated UChar * string<br>
-By default, only one codepoint is written.</td></tr>
-<tr><td>%[]</td><td>&nbsp;</td><td>UChar *</td><td>Null terminated UTF-16 string which contains the filtered set of characters specified by the UnicodeSet</td></tr>
-<tr><td>%%</td><td>&nbsp;</td><td>&nbsp;</td><td>Show a percent sign</td></tr>
-</table>
-
-Format modifiers
-<table>
-<tr><td>modifier</td><td>formats</td><td>type</td><td>comments</td></tr>
-<tr><td>%h</td><td>%d, %i, %o, %x</td><td>int16_t</td><td>short format</td></tr>
-<tr><td>%h</td><td>%u</td><td>uint16_t</td><td>short format</td></tr>
-<tr><td>%h</td><td>c</td><td>char</td><td><b>(Unimplemented)</b> Use invariant converter</td></tr>
-<tr><td>%h</td><td>s</td><td>char *</td><td><b>(Unimplemented)</b> Use invariant converter</td></tr>
-<tr><td>%h</td><td>C</td><td>char</td><td><b>(Unimplemented)</b> 8-bit Unicode code unit</td></tr>
-<tr><td>%h</td><td>S</td><td>char *</td><td><b>(Unimplemented)</b> Null terminated UTF-8 string</td></tr>
-<tr><td>%l</td><td>%d, %i, %o, %x</td><td>int32_t</td><td>long format (no effect)</td></tr>
-<tr><td>%l</td><td>%u</td><td>uint32_t</td><td>long format (no effect)</td></tr>
-<tr><td>%l</td><td>c</td><td>N/A</td><td><b>(Unimplemented)</b> Reserved for future implementation</td></tr>
-<tr><td>%l</td><td>s</td><td>N/A</td><td><b>(Unimplemented)</b> Reserved for future implementation</td></tr>
-<tr><td>%l</td><td>C</td><td>UChar32</td><td><b>(Unimplemented)</b> 32-bit Unicode code unit</td></tr>
-<tr><td>%l</td><td>S</td><td>UChar32 *</td><td><b>(Unimplemented)</b> Null terminated UTF-32 string</td></tr>
-<tr><td>%ll</td><td>%d, %i, %o, %x</td><td>int64_t</td><td>long long format</td></tr>
-<tr><td>%ll</td><td>%u</td><td>uint64_t</td><td><b>(Unimplemented)</b> long long format</td></tr>
-<tr><td>%-</td><td><i>all</i></td><td>N/A</td><td>Left justify</td></tr>
-<tr><td>%+</td><td>%d, %i, %o, %x, %e, %f, %g, %E, %G</td><td>N/A</td><td>Always show the plus or minus sign. Needs data for plus sign.</td></tr>
-<tr><td>% </td><td>%d, %i, %o, %x, %e, %f, %g, %E, %G</td><td>N/A</td><td>Instead of a "+" output a blank character for positive numbers.</td></tr>
-<tr><td>%#</td><td>%d, %i, %o, %x, %e, %f, %g, %E, %G</td><td>N/A</td><td>Precede octal value with 0, hex with 0x and show the 
-                decimal point for floats.</td></tr>
-<tr><td>%<i>n</i></td><td><i>all</i></td><td>N/A</td><td>Width of input/output. num is an actual number from 0 to 
-                some large number.</td></tr>
-<tr><td>%.<i>n</i></td><td>%e, %f, %g, %E, %F, %G</td><td>N/A</td><td>Significant digits precision. num is an actual number from
-                0 to some large number.<br>If * is used in printf, then the precision is passed in as an argument before the number to be formatted.</td></tr>
-</table>
-
-printf modifier
-%*  int32_t     Next argument after this one specifies the width
-
-scanf modifier
-%*  N/A         This field is scanned, but not stored
-
-<p>If you are using this C API instead of the ustream.h API for C++,
-you can use one of the following u_fprintf examples to display a UnicodeString.</p>
-
-<pre><code>
-    UFILE *out = u_finit(stdout, NULL, NULL);
-    UnicodeString string1("string 1");
-    UnicodeString string2("string 2");
-    u_fprintf(out, "%S\n", string1.getTerminatedBuffer());
-    u_fprintf(out, "%.*S\n", string2.length(), string2.getBuffer());
-    u_fclose(out);
-</code></pre>
-
- */
-
-
-/**
- * When an end of file is encountered, this value can be returned.
- * @see u_fgetc
- * @stable 3.0
- */
-#define U_EOF 0xFFFF
-
-/** Forward declaration of a Unicode-aware file @stable 3.0 */
-typedef struct UFILE UFILE;
-
-/**
- * Enum for which direction of stream a transliterator applies to.
- * @see u_fsettransliterator
- * @stable ICU 3.0
- */
-typedef enum { 
-   U_READ = 1,
-   U_WRITE = 2, 
-   U_READWRITE =3  /* == (U_READ | U_WRITE) */ 
-} UFileDirection;
-
-/**
- * Open a UFILE.
- * A UFILE is a wrapper around a FILE* that is locale and codepage aware.
- * That is, data written to a UFILE will be formatted using the conventions
- * specified by that UFILE's Locale; this data will be in the character set
- * specified by that UFILE's codepage.
- * @param filename The name of the file to open.
- * @param perm The read/write permission for the UFILE; one of "r", "w", "rw"
- * @param locale The locale whose conventions will be used to format 
- * and parse output. If this parameter is NULL, the default locale will 
- * be used.
- * @param codepage The codepage in which data will be written to and
- * read from the file. If this paramter is NULL the system default codepage
- * will be used.
- * @return A new UFILE, or NULL if an error occurred.
- * @draft 3.0
- */
-U_DRAFT UFILE* U_EXPORT2
-u_fopen(const char    *filename,
-    const char    *perm,
-    const char    *locale,
-    const char    *codepage);
-
-/**
- * Open a UFILE on top of an existing FILE* stream.
- * @param f The FILE* to which this UFILE will attach.
- * @param locale The locale whose conventions will be used to format 
- * and parse output. If this parameter is NULL, the default locale will 
- * be used.
- * @param codepage The codepage in which data will be written to and
- * read from the file. If this paramter is NULL, data will be written and
- * read using the default codepage for <TT>locale</TT>, unless <TT>locale</TT>
- * is NULL, in which case the system default codepage will be used.
- * @return A new UFILE, or NULL if an error occurred.
- * @draft 3.0
- */
-U_DRAFT UFILE* U_EXPORT2
-u_finit(FILE        *f,
-    const char    *locale,
-    const char    *codepage);
-
-/**
- * Create a UFILE that can be used for localized formatting or parsing.
- * The u_sprintf and u_sscanf functions do not read or write numbers for a
- * specific locale. The ustdio.h file functions can be used on this UFILE.
- * The string is usable once u_fclose or u_fflush has been called on the
- * returned UFILE.
- * @param stringBuf The string used for reading or writing.
- * @param capacity The number of code units available for use in stringBuf
- * @param locale The locale whose conventions will be used to format 
- * and parse output. If this parameter is NULL, the default locale will 
- * be used.
- * @return A new UFILE, or NULL if an error occurred.
- * @draft 3.0
- */
-U_DRAFT UFILE* U_EXPORT2
-u_fstropen(UChar      *stringBuf,
-           int32_t     capacity,
-           const char *locale);
-
-/**
- * Close a UFILE.
- * @param file The UFILE to close.
- * @draft 3.0
- */
-U_DRAFT void U_EXPORT2
-u_fclose(UFILE *file);
-
-/**
- * Tests if the UFILE is at the end of the file stream.
- * @param f The UFILE from which to read.
- * @return Returns TRUE after the first read operation that attempts to
- * read past the end of the file. It returns FALSE if the current position is
- * not end of file.
- * @draft 3.0
-*/
-U_DRAFT UBool U_EXPORT2
-u_feof(UFILE  *f);
-
-/**
- * Flush output of a UFILE. Implies a flush of
- * converter/transliterator state. (That is, a logical break is
- * made in the output stream - for example if a different type of
- * output is desired.)  The underlying OS level file is also flushed.
- * @param file The UFILE to flush.
- * @draft 3.0
- */
-U_DRAFT void U_EXPORT2
-u_fflush(UFILE *file);
-
-/**
- * Rewind the file pointer to the beginning of the file.
- * @param file The UFILE to rewind.
- * @draft 3.0
- */
-U_DRAFT void
-u_frewind(UFILE *file);
-
-/**
- * Get the FILE* associated with a UFILE.
- * @param f The UFILE
- * @return A FILE*, owned by the UFILE.  The FILE <EM>must not</EM> be closed.
- * @draft 3.0
- */
-U_DRAFT FILE* U_EXPORT2
-u_fgetfile(UFILE *f);
-
-#if !UCONFIG_NO_FORMATTING
-
-/**
- * Get the locale whose conventions are used to format and parse output.
- * This is the same locale passed in the preceding call to<TT>u_fsetlocale</TT>
- * or <TT>u_fopen</TT>.
- * @param file The UFILE to set.
- * @return The locale whose conventions are used to format and parse output.
- * @draft 3.0
- */
-U_DRAFT const char* U_EXPORT2
-u_fgetlocale(UFILE *file);
-
-/**
- * Set the locale whose conventions will be used to format and parse output.
- * @param locale The locale whose conventions will be used to format 
- * and parse output.
- * @param file The UFILE to query.
- * @return NULL if successful, otherwise a negative number.
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_fsetlocale(UFILE      *file,
-             const char *locale);
-
-#endif
-
-/**
- * Get the codepage in which data is written to and read from the UFILE.
- * This is the same codepage passed in the preceding call to 
- * <TT>u_fsetcodepage</TT> or <TT>u_fopen</TT>.
- * @param file The UFILE to query.
- * @return The codepage in which data is written to and read from the UFILE,
- * or NULL if an error occurred.
- * @draft 3.0
- */
-U_DRAFT const char* U_EXPORT2
-u_fgetcodepage(UFILE *file);
-
-/**
- * Set the codepage in which data will be written to and read from the UFILE.
- * All Unicode data written to the UFILE will be converted to this codepage
- * before it is written to the underlying FILE*. It it generally a bad idea to
- * mix codepages within a file. This should only be called right
- * after opening the <TT>UFile</TT>, or after calling <TT>u_frewind</TT>.
- * @param codepage The codepage in which data will be written to 
- * and read from the file. For example <TT>"latin-1"</TT> or <TT>"ibm-943</TT>.
- * A value of NULL means the default codepage for the UFILE's current 
- * locale will be used.
- * @param file The UFILE to set.
- * @return 0 if successful, otherwise a negative number.
- * @see u_frewind
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_fsetcodepage(const char   *codepage,
-               UFILE        *file);
-
-
-/**
- * Returns an alias to the converter being used for this file.
- * @param f The UFILE to get the value from
- * @return alias to the converter
- * @draft 3.0
- */
-U_DRAFT UConverter* U_EXPORT2 u_fgetConverter(UFILE *f);
-
-#if !UCONFIG_NO_FORMATTING
-
-/* Output functions */
-
-/**
- * Write formatted data to a UFILE.
- * @param f The UFILE to which to write.
- * @param patternSpecification A pattern specifying how <TT>u_fprintf</TT> will
- * interpret the variable arguments received and format the data.
- * @return The number of Unicode characters written to <TT>f</TT>.
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_fprintf(UFILE         *f,
-          const char    *patternSpecification,
-          ... );
-
-/**
- * Write formatted data to a UFILE.
- * This is identical to <TT>u_fprintf</TT>, except that it will
- * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
- * @param f The UFILE to which to write.
- * @param patternSpecification A pattern specifying how <TT>u_fprintf</TT> will
- * interpret the variable arguments received and format the data.
- * @param ap The argument list to use.
- * @return The number of Unicode characters written to <TT>f</TT>.
- * @see u_fprintf
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_vfprintf(UFILE        *f,
-           const char   *patternSpecification,
-           va_list      ap);
-
-/**
- * Write formatted data to a UFILE.
- * @param f The UFILE to which to write.
- * @param patternSpecification A pattern specifying how <TT>u_fprintf</TT> will
- * interpret the variable arguments received and format the data.
- * @return The number of Unicode characters written to <TT>f</TT>.
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_fprintf_u(UFILE       *f,
-            const UChar *patternSpecification,
-            ... );
-
-/**
- * Write formatted data to a UFILE.
- * This is identical to <TT>u_fprintf_u</TT>, except that it will
- * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
- * @param f The UFILE to which to write.
- * @param patternSpecification A pattern specifying how <TT>u_fprintf</TT> will
- * interpret the variable arguments received and format the data.
- * @param ap The argument list to use.
- * @return The number of Unicode characters written to <TT>f</TT>.
- * @see u_fprintf_u
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_vfprintf_u(UFILE      *f,
-            const UChar *patternSpecification,
-            va_list     ap);
-#endif
-/**
- * Write a Unicode to a UFILE.  The null (U+0000) terminated UChar*
- * <TT>s</TT> will be written to <TT>f</TT>, excluding the NULL terminator.
- * A newline will be added to <TT>f</TT>.
- * @param s The UChar* to write.
- * @param f The UFILE to which to write.
- * @return A non-negative number if successful, EOF otherwise.
- * @see u_file_write
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_fputs(const UChar *s,
-        UFILE       *f);
-
-/**
- * Write a UChar to a UFILE.
- * @param uc The UChar to write.
- * @param f The UFILE to which to write.
- * @return The character written if successful, EOF otherwise.
- * @draft 3.0
- */
-U_DRAFT UChar32 U_EXPORT2
-u_fputc(UChar32  uc,
-        UFILE  *f);
-
-/**
- * Write Unicode to a UFILE.
- * The ustring passed in will be converted to the UFILE's underlying
- * codepage before it is written.
- * @param ustring A pointer to the Unicode data to write.
- * @param count The number of Unicode characters to write
- * @param f The UFILE to which to write.
- * @return The number of Unicode characters written.
- * @see u_fputs
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_file_write(const UChar    *ustring, 
-             int32_t        count, 
-             UFILE          *f);
-
-
-/* Input functions */
-#if !UCONFIG_NO_FORMATTING
-
-/**
- * Read formatted data from a UFILE.
- * @param f The UFILE from which to read.
- * @param patternSpecification A pattern specifying how <TT>u_fscanf</TT> will
- * interpret the variable arguments received and parse the data.
- * @return The number of items successfully converted and assigned, or EOF
- * if an error occurred.
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_fscanf(UFILE      *f,
-         const char *patternSpecification,
-         ... );
-
-/**
- * Read formatted data from a UFILE.
- * This is identical to <TT>u_fscanf</TT>, except that it will
- * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
- * @param f The UFILE from which to read.
- * @param patternSpecification A pattern specifying how <TT>u_fscanf</TT> will
- * interpret the variable arguments received and parse the data.
- * @param ap The argument list to use.
- * @return The number of items successfully converted and assigned, or EOF
- * if an error occurred.
- * @see u_fscanf
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_vfscanf(UFILE         *f,
-          const char    *patternSpecification,
-          va_list        ap);
-
-/**
- * Read formatted data from a UFILE.
- * @param f The UFILE from which to read.
- * @param patternSpecification A pattern specifying how <TT>u_fscanf</TT> will
- * interpret the variable arguments received and parse the data.
- * @return The number of items successfully converted and assigned, or EOF
- * if an error occurred.
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_fscanf_u(UFILE        *f,
-           const UChar  *patternSpecification,
-           ... );
-
-/**
- * Read formatted data from a UFILE.
- * This is identical to <TT>u_fscanf_u</TT>, except that it will
- * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
- * @param f The UFILE from which to read.
- * @param patternSpecification A pattern specifying how <TT>u_fscanf</TT> will
- * interpret the variable arguments received and parse the data.
- * @param ap The argument list to use.
- * @return The number of items successfully converted and assigned, or EOF
- * if an error occurred.
- * @see u_fscanf_u
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_vfscanf_u(UFILE       *f,
-            const UChar *patternSpecification,
-            va_list      ap);
-#endif
-
-/**
- * Read one line of text into a UChar* string from a UFILE. The newline
- * at the end of the line is read into the string. The string is always
- * null terminated
- * @param f The UFILE from which to read.
- * @param n The maximum number of characters - 1 to read.
- * @param s The UChar* to receive the read data.  Characters will be
- * stored successively in <TT>s</TT> until a newline or EOF is
- * reached. A null character (U+0000) will be appended to <TT>s</TT>.
- * @return A pointer to <TT>s</TT>, or NULL if no characters were available.
- * @draft 3.0
- */
-U_DRAFT UChar* U_EXPORT2
-u_fgets(UChar  *s,
-        int32_t n,
-        UFILE  *f);
-
-/**
- * Read a UChar from a UFILE. It is recommended that <TT>u_fgetcx</TT>
- * used instead for proper parsing functions, but sometimes reading
- * code units is needed instead of codepoints.
- *
- * @param f The UFILE from which to read.
- * @return The UChar value read, or U+FFFF if no character was available.
- * @draft 3.0
- */
-U_DRAFT UChar U_EXPORT2
-u_fgetc(UFILE   *f);
-
-/**
- * Read a UChar32 from a UFILE.
- *
- * @param f The UFILE from which to read.
- * @return The UChar32 value read, or U_EOF if no character was
- * available, or U+FFFFFFFF if an ill-formed character was
- * encountered.
- * @see u_unescape()
- * @draft 3.0
- */
-U_DRAFT UChar32 U_EXPORT2
-u_fgetcx(UFILE  *f);
-
-/**
- * Unget a UChar from a UFILE.
- * If this function is not the first to operate on <TT>f</TT> after a call
- * to <TT>u_fgetc</TT>, the results are undefined.
- * If this function is passed a character that was not recieved from the
- * previous <TT>u_fgetc</TT> or <TT>u_fgetcx</TT> call, the results are undefined.
- * @param c The UChar to put back on the stream.
- * @param f The UFILE to receive <TT>c</TT>.
- * @return The UChar32 value put back if successful, U_EOF otherwise.
- * @draft 3.0
- */
-U_DRAFT UChar32 U_EXPORT2
-u_fungetc(UChar32   c,
-      UFILE        *f);
-
-/**
- * Read Unicode from a UFILE.
- * Bytes will be converted from the UFILE's underlying codepage, with
- * subsequent conversion to Unicode. The data will not be NULL terminated.
- * @param chars A pointer to receive the Unicode data.
- * @param count The number of Unicode characters to read.
- * @param f The UFILE from which to read.
- * @return The number of Unicode characters read.
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_file_read(UChar        *chars, 
-        int32_t        count, 
-        UFILE         *f);
-
-#if !UCONFIG_NO_TRANSLITERATION
-
-/**
- * Set a transliterator on the UFILE. The transliterator will be owned by the
- * UFILE. 
- * @param file The UFILE to set transliteration on
- * @param adopt The UTransliterator to set. Can be NULL, which will
- * mean that no transliteration is used.
- * @param direction either U_READ, U_WRITE, or U_READWRITE - sets
- *  which direction the transliterator is to be applied to. If
- * U_READWRITE, the "Read" transliteration will be in the inverse
- * direction.
- * @param status ICU error code.
- * @return The previously set transliterator, owned by the
- * caller. If U_READWRITE is specified, only the WRITE transliterator
- * is returned. In most cases, the caller should call utrans_close()
- * on the result of this function.
- * @draft 3.0
- */
-U_DRAFT UTransliterator* U_EXPORT2
-u_fsettransliterator(UFILE *file, UFileDirection direction,
-                     UTransliterator *adopt, UErrorCode *status);
-
-#endif
-
-
-/* Output string functions */
-#if !UCONFIG_NO_FORMATTING
-
-
-/**
- * Write formatted data to a Unicode string.
- *
- * @param buffer The Unicode String to which to write.
- * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
- * interpret the variable arguments received and format the data.
- * @return The number of Unicode code units written to <TT>buffer</TT>. This
- * does not include the terminating null character.
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_sprintf(UChar       *buffer,
-        const char    *patternSpecification,
-        ... );
-
-/**
- * Write formatted data to a Unicode string. When the number of code units
- * required to store the data exceeds <TT>count</TT>, then <TT>count</TT> code
- * units of data are stored in <TT>buffer</TT> and a negative value is
- * returned. When the number of code units required to store the data equals
- * <TT>count</TT>, the string is not null terminated and <TT>count</TT> is
- * returned.
- *
- * @param buffer The Unicode String to which to write.
- * @param count The number of code units to read.
- * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
- * interpret the variable arguments received and format the data.
- * @return The number of Unicode characters that would have been written to
- * <TT>buffer</TT> had count been sufficiently large. This does not include
- * the terminating null character.
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_snprintf(UChar      *buffer,
-        int32_t       count,
-        const char    *patternSpecification,
-        ... );
-
-/**
- * Write formatted data to a Unicode string.
- * This is identical to <TT>u_sprintf</TT>, except that it will
- * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
- *
- * @param buffer The Unicode string to which to write.
- * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
- * interpret the variable arguments received and format the data.
- * @param ap The argument list to use.
- * @return The number of Unicode characters written to <TT>buffer</TT>.
- * @see u_sprintf
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_vsprintf(UChar      *buffer,
-        const char    *patternSpecification,
-        va_list        ap);
-
-/**
- * Write formatted data to a Unicode string.
- * This is identical to <TT>u_snprintf</TT>, except that it will
- * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.<br><br>
- * When the number of code units required to store the data exceeds
- * <TT>count</TT>, then <TT>count</TT> code units of data are stored in
- * <TT>buffer</TT> and a negative value is returned. When the number of code
- * units required to store the data equals <TT>count</TT>, the string is not
- * null terminated and <TT>count</TT> is returned.
- *
- * @param buffer The Unicode string to which to write.
- * @param count The number of code units to read.
- * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
- * interpret the variable arguments received and format the data.
- * @param ap The argument list to use.
- * @return The number of Unicode characters that would have been written to
- * <TT>buffer</TT> had count been sufficiently large.
- * @see u_sprintf
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_vsnprintf(UChar     *buffer,
-        int32_t       count,
-        const char    *patternSpecification,
-        va_list        ap);
-
-/**
- * Write formatted data to a Unicode string.
- *
- * @param buffer The Unicode string to which to write.
- * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
- * interpret the variable arguments received and format the data.
- * @return The number of Unicode characters written to <TT>buffer</TT>.
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_sprintf_u(UChar      *buffer,
-        const UChar    *patternSpecification,
-        ... );
-
-/**
- * Write formatted data to a Unicode string. When the number of code units
- * required to store the data exceeds <TT>count</TT>, then <TT>count</TT> code
- * units of data are stored in <TT>buffer</TT> and a negative value is
- * returned. When the number of code units required to store the data equals
- * <TT>count</TT>, the string is not null terminated and <TT>count</TT> is
- * returned.
- *
- * @param buffer The Unicode string to which to write.
- * @param count The number of code units to read.
- * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
- * interpret the variable arguments received and format the data.
- * @return The number of Unicode characters that would have been written to
- * <TT>buffer</TT> had count been sufficiently large.
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_snprintf_u(UChar     *buffer,
-        int32_t        count,
-        const UChar    *patternSpecification,
-        ... );
-
-/**
- * Write formatted data to a Unicode string.
- * This is identical to <TT>u_sprintf_u</TT>, except that it will
- * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
- *
- * @param buffer The Unicode string to which to write.
- * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
- * interpret the variable arguments received and format the data.
- * @param ap The argument list to use.
- * @return The number of Unicode characters written to <TT>f</TT>.
- * @see u_sprintf_u
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_vsprintf_u(UChar     *buffer,
-        const UChar    *patternSpecification,
-        va_list        ap);
-
-/**
- * Write formatted data to a Unicode string.
- * This is identical to <TT>u_snprintf_u</TT>, except that it will
- * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
- * When the number of code units required to store the data exceeds
- * <TT>count</TT>, then <TT>count</TT> code units of data are stored in
- * <TT>buffer</TT> and a negative value is returned. When the number of code
- * units required to store the data equals <TT>count</TT>, the string is not
- * null terminated and <TT>count</TT> is returned.
- *
- * @param buffer The Unicode string to which to write.
- * @param count The number of code units to read.
- * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
- * interpret the variable arguments received and format the data.
- * @param ap The argument list to use.
- * @return The number of Unicode characters that would have been written to
- * <TT>f</TT> had count been sufficiently large.
- * @see u_sprintf_u
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_vsnprintf_u(UChar *buffer,
-        int32_t         count,
-        const UChar     *patternSpecification,
-        va_list         ap);
-
-/* Input string functions */
-
-/**
- * Read formatted data from a Unicode string.
- *
- * @param buffer The Unicode string from which to read.
- * @param patternSpecification A pattern specifying how <TT>u_sscanf</TT> will
- * interpret the variable arguments received and parse the data.
- * @return The number of items successfully converted and assigned, or EOF
- * if an error occurred.
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_sscanf(const UChar   *buffer,
-        const char     *patternSpecification,
-        ... );
-
-/**
- * Read formatted data from a Unicode string.
- * This is identical to <TT>u_sscanf</TT>, except that it will
- * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
- *
- * @param buffer The Unicode string from which to read.
- * @param patternSpecification A pattern specifying how <TT>u_sscanf</TT> will
- * interpret the variable arguments received and parse the data.
- * @param ap The argument list to use.
- * @return The number of items successfully converted and assigned, or EOF
- * if an error occurred.
- * @see u_sscanf
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_vsscanf(const UChar  *buffer,
-        const char     *patternSpecification,
-        va_list        ap);
-
-/**
- * Read formatted data from a Unicode string.
- *
- * @param buffer The Unicode string from which to read.
- * @param patternSpecification A pattern specifying how <TT>u_sscanf</TT> will
- * interpret the variable arguments received and parse the data.
- * @return The number of items successfully converted and assigned, or EOF
- * if an error occurred.
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_sscanf_u(const UChar  *buffer,
-        const UChar     *patternSpecification,
-        ... );
-
-/**
- * Read formatted data from a Unicode string.
- * This is identical to <TT>u_sscanf_u</TT>, except that it will
- * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
- *
- * @param buffer The Unicode string from which to read.
- * @param patternSpecification A pattern specifying how <TT>u_sscanf</TT> will
- * interpret the variable arguments received and parse the data.
- * @param ap The argument list to use.
- * @return The number of items successfully converted and assigned, or EOF
- * if an error occurred.
- * @see u_sscanf_u
- * @draft 3.0
- */
-U_DRAFT int32_t U_EXPORT2
-u_vsscanf_u(const UChar *buffer,
-        const UChar     *patternSpecification,
-        va_list         ap);
-
-#endif
-#endif
-
-

Copied: MacRuby/trunk/icu-1060/unicode/ustdio.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ustdio.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ustdio.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ustdio.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,904 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 1998-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File ustdio.h
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   10/16/98    stephen     Creation.
+*   11/06/98    stephen     Modified per code review.
+*   03/12/99    stephen     Modified for new C API.
+*   07/19/99    stephen     Minor doc update.
+*   02/01/01    george      Added sprintf & sscanf with all of its variants
+******************************************************************************
+*/
+
+#ifndef USTDIO_H
+#define USTDIO_H
+
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "unicode/utypes.h"
+#include "unicode/ucnv.h"
+#include "unicode/utrans.h"
+
+/*
+    TODO
+ The following is a small list as to what is currently wrong/suggestions for
+ ustdio.
+
+ * Make sure that * in the scanf format specification works for all formats.
+ * Each UFILE takes up at least 2KB.
+    Look into adding setvbuf() for configurable buffers.
+ * This library does buffering. The OS should do this for us already. Check on
+    this, and remove it from this library, if this is the case. Double buffering
+    wastes a lot of time and space.
+ * Test stdin and stdout with the u_f* functions
+ * Testing should be done for reading and writing multi-byte encodings,
+    and make sure that a character that is contained across buffer boundries
+    works even for incomplete characters.
+ * Make sure that the last character is flushed when the file/string is closed.
+ * snprintf should follow the C99 standard for the return value, which is
+    return the number of characters (excluding the trailing '\0')
+    which would have been written to the destination string regardless
+    of available space. This is like pre-flighting.
+ * Everything that uses %s should do what operator>> does for UnicodeString.
+    It should convert one byte at a time, and once a character is
+    converted then check to see if it's whitespace or in the scanset.
+    If it's whitespace or in the scanset, put all the bytes back (do nothing
+    for sprintf/sscanf).
+ * If bad string data is encountered, make sure that the function fails
+    without memory leaks and the unconvertable characters are valid
+    substitution or are escaped characters.
+ * u_fungetc() can't unget a character when it's at the beginning of the
+    internal conversion buffer. For example, read the buffer size # of
+    characters, and then ungetc to get the previous character that was
+    at the end of the last buffer.
+ * u_fflush() and u_fclose should return an int32_t like C99 functions.
+    0 is returned if the operation was successful and EOF otherwise.
+ * u_fsettransliterator does not support U_READ side of transliteration.
+ * The format specifier should limit the size of a format or honor it in
+    order to prevent buffer overruns.  (e.g. %256.256d).
+ * u_fread and u_fwrite don't exist. They're needed for reading and writing
+    data structures without any conversion.
+ * u_file_read and u_file_write are used for writing strings. u_fgets and
+    u_fputs or u_fread and u_fwrite should be used to do this.
+ * The width parameter for all scanf formats, including scanset, needs
+    better testing. This prevents buffer overflows.
+ * Figure out what is suppose to happen when a codepage is changed midstream.
+    Maybe a flush or a rewind are good enough.
+ * Make sure that a UFile opened with "rw" can be used after using
+    u_fflush with a u_frewind.
+ * scanf(%i) should detect what type of number to use.
+ * Add more testing of the alternate format, %#
+ * Look at newline handling of fputs/puts
+ * Think more about codeunit/codepoint error handling/support in %S,%s,%C,%c,%[]
+ * Complete the file documentation with proper doxygen formatting.
+    See http://oss.software.ibm.com/pipermail/icu/2003-July/005647.html
+*/
+
+/**
+ * \file
+ * \brief C API: Unicode stdio-like API
+ *
+ * <h2>Unicode stdio-like C API</h2>
+ *
+ * <p>This API provides an stdio-like API wrapper around ICU's other
+ * formatting and parsing APIs. It is meant to ease the transition of adding
+ * Unicode support to a preexisting applications using stdio. The following
+ * is a small list of noticable differences between stdio and ICU I/O's
+ * ustdio implementation.</p>
+ *
+ * <ul>
+ * <li>Locale specific formatting and parsing is only done with file IO.</li>
+ * <li>u_fstropen can be used to simulate file IO with strings.
+ * This is similar to the iostream API, and it allows locale specific
+ * formatting and parsing to be used.</li>
+ * <li>This API provides uniform formatting and parsing behavior between
+ * platforms (unlike the standard stdio implementations found on various
+ * platforms).</li>
+ * <li>This API is better suited for text data handling than binary data
+ * handling when compared to the typical stdio implementation.</li>
+ * <li>You can specify a Transliterator while using the file IO.</li>
+ * <li>You can specify a file's codepage separately from the default
+ * system codepage.</li>
+ * </ul>
+ *
+ * <h2>Formatting and Parsing Specification</h2>
+ *
+ * General printf format:<br>
+ * %[format modifier][width][.precision][type modifier][format]
+ * 
+ * General scanf format:<br>
+ * %[*][format modifier][width][type modifier][format]
+ * 
+<table cellspacing="3">
+<tr><td>format</td><td>default<br>printf<br>type</td><td>default<br>scanf<br>type</td><td>description</td></tr>
+<tr><td>%E</td><td>double</td><td>float</td><td>Scientific with an uppercase exponent</td></tr>
+<tr><td>%e</td><td>double</td><td>float</td><td>Scientific with a lowercase exponent</td></tr>
+<tr><td>%G</td><td>double</td><td>float</td><td>Use %E or %f for best format</td></tr>
+<tr><td>%g</td><td>double</td><td>float</td><td>Use %e or %f for best format</td></tr>
+<tr><td>%f</td><td>double</td><td>float</td><td>Simple floating point without the exponent</td></tr>
+<tr><td>%X</td><td>int32_t</td><td>int32_t</td><td>ustdio special uppercase hex radix formatting</td></tr>
+<tr><td>%x</td><td>int32_t</td><td>int32_t</td><td>ustdio special lowercase hex radix formatting</td></tr>
+<tr><td>%d</td><td>int32_t</td><td>int32_t</td><td>Decimal format</td></tr>
+<tr><td>%i</td><td>int32_t</td><td>int32_t</td><td>Same as %d</td></tr>
+<tr><td>%n</td><td>int32_t</td><td>int32_t</td><td>count (write the number of UTF-16 codeunits read/written)</td></tr>
+<tr><td>%o</td><td>int32_t</td><td>int32_t</td><td>ustdio special octal radix formatting</td></tr>
+<tr><td>%u</td><td>uint32_t</td><td>uint32_t</td><td>Decimal format</td></tr>
+<tr><td>%p</td><td>void *</td><td>void *</td><td>Prints the pointer value</td></tr>
+<tr><td>%s</td><td>char *</td><td>char *</td><td>Use default converter or specified converter from fopen</td></tr>
+<tr><td>%c</td><td>char</td><td>char</td><td>Use default converter or specified converter from fopen<br>
+When width is specified for scanf, this acts like a non-NULL-terminated char * string.<br>
+By default, only one char is written.</td></tr>
+<tr><td>%S</td><td>UChar *</td><td>UChar *</td><td>Null terminated UTF-16 string</td></tr>
+<tr><td>%C</td><td>UChar</td><td>UChar</td><td>16-bit Unicode code unit<br>
+When width is specified for scanf, this acts like a non-NULL-terminated UChar * string<br>
+By default, only one codepoint is written.</td></tr>
+<tr><td>%[]</td><td>&nbsp;</td><td>UChar *</td><td>Null terminated UTF-16 string which contains the filtered set of characters specified by the UnicodeSet</td></tr>
+<tr><td>%%</td><td>&nbsp;</td><td>&nbsp;</td><td>Show a percent sign</td></tr>
+</table>
+
+Format modifiers
+<table>
+<tr><td>modifier</td><td>formats</td><td>type</td><td>comments</td></tr>
+<tr><td>%h</td><td>%d, %i, %o, %x</td><td>int16_t</td><td>short format</td></tr>
+<tr><td>%h</td><td>%u</td><td>uint16_t</td><td>short format</td></tr>
+<tr><td>%h</td><td>c</td><td>char</td><td><b>(Unimplemented)</b> Use invariant converter</td></tr>
+<tr><td>%h</td><td>s</td><td>char *</td><td><b>(Unimplemented)</b> Use invariant converter</td></tr>
+<tr><td>%h</td><td>C</td><td>char</td><td><b>(Unimplemented)</b> 8-bit Unicode code unit</td></tr>
+<tr><td>%h</td><td>S</td><td>char *</td><td><b>(Unimplemented)</b> Null terminated UTF-8 string</td></tr>
+<tr><td>%l</td><td>%d, %i, %o, %x</td><td>int32_t</td><td>long format (no effect)</td></tr>
+<tr><td>%l</td><td>%u</td><td>uint32_t</td><td>long format (no effect)</td></tr>
+<tr><td>%l</td><td>c</td><td>N/A</td><td><b>(Unimplemented)</b> Reserved for future implementation</td></tr>
+<tr><td>%l</td><td>s</td><td>N/A</td><td><b>(Unimplemented)</b> Reserved for future implementation</td></tr>
+<tr><td>%l</td><td>C</td><td>UChar32</td><td><b>(Unimplemented)</b> 32-bit Unicode code unit</td></tr>
+<tr><td>%l</td><td>S</td><td>UChar32 *</td><td><b>(Unimplemented)</b> Null terminated UTF-32 string</td></tr>
+<tr><td>%ll</td><td>%d, %i, %o, %x</td><td>int64_t</td><td>long long format</td></tr>
+<tr><td>%ll</td><td>%u</td><td>uint64_t</td><td><b>(Unimplemented)</b> long long format</td></tr>
+<tr><td>%-</td><td><i>all</i></td><td>N/A</td><td>Left justify</td></tr>
+<tr><td>%+</td><td>%d, %i, %o, %x, %e, %f, %g, %E, %G</td><td>N/A</td><td>Always show the plus or minus sign. Needs data for plus sign.</td></tr>
+<tr><td>% </td><td>%d, %i, %o, %x, %e, %f, %g, %E, %G</td><td>N/A</td><td>Instead of a "+" output a blank character for positive numbers.</td></tr>
+<tr><td>%#</td><td>%d, %i, %o, %x, %e, %f, %g, %E, %G</td><td>N/A</td><td>Precede octal value with 0, hex with 0x and show the 
+                decimal point for floats.</td></tr>
+<tr><td>%<i>n</i></td><td><i>all</i></td><td>N/A</td><td>Width of input/output. num is an actual number from 0 to 
+                some large number.</td></tr>
+<tr><td>%.<i>n</i></td><td>%e, %f, %g, %E, %F, %G</td><td>N/A</td><td>Significant digits precision. num is an actual number from
+                0 to some large number.<br>If * is used in printf, then the precision is passed in as an argument before the number to be formatted.</td></tr>
+</table>
+
+printf modifier
+%*  int32_t     Next argument after this one specifies the width
+
+scanf modifier
+%*  N/A         This field is scanned, but not stored
+
+<p>If you are using this C API instead of the ustream.h API for C++,
+you can use one of the following u_fprintf examples to display a UnicodeString.</p>
+
+<pre><code>
+    UFILE *out = u_finit(stdout, NULL, NULL);
+    UnicodeString string1("string 1");
+    UnicodeString string2("string 2");
+    u_fprintf(out, "%S\n", string1.getTerminatedBuffer());
+    u_fprintf(out, "%.*S\n", string2.length(), string2.getBuffer());
+    u_fclose(out);
+</code></pre>
+
+ */
+
+
+/**
+ * When an end of file is encountered, this value can be returned.
+ * @see u_fgetc
+ * @stable 3.0
+ */
+#define U_EOF 0xFFFF
+
+/** Forward declaration of a Unicode-aware file @stable 3.0 */
+typedef struct UFILE UFILE;
+
+/**
+ * Enum for which direction of stream a transliterator applies to.
+ * @see u_fsettransliterator
+ * @stable ICU 3.0
+ */
+typedef enum { 
+   U_READ = 1,
+   U_WRITE = 2, 
+   U_READWRITE =3  /* == (U_READ | U_WRITE) */ 
+} UFileDirection;
+
+/**
+ * Open a UFILE.
+ * A UFILE is a wrapper around a FILE* that is locale and codepage aware.
+ * That is, data written to a UFILE will be formatted using the conventions
+ * specified by that UFILE's Locale; this data will be in the character set
+ * specified by that UFILE's codepage.
+ * @param filename The name of the file to open.
+ * @param perm The read/write permission for the UFILE; one of "r", "w", "rw"
+ * @param locale The locale whose conventions will be used to format 
+ * and parse output. If this parameter is NULL, the default locale will 
+ * be used.
+ * @param codepage The codepage in which data will be written to and
+ * read from the file. If this paramter is NULL the system default codepage
+ * will be used.
+ * @return A new UFILE, or NULL if an error occurred.
+ * @draft 3.0
+ */
+U_DRAFT UFILE* U_EXPORT2
+u_fopen(const char    *filename,
+    const char    *perm,
+    const char    *locale,
+    const char    *codepage);
+
+/**
+ * Open a UFILE on top of an existing FILE* stream.
+ * @param f The FILE* to which this UFILE will attach.
+ * @param locale The locale whose conventions will be used to format 
+ * and parse output. If this parameter is NULL, the default locale will 
+ * be used.
+ * @param codepage The codepage in which data will be written to and
+ * read from the file. If this paramter is NULL, data will be written and
+ * read using the default codepage for <TT>locale</TT>, unless <TT>locale</TT>
+ * is NULL, in which case the system default codepage will be used.
+ * @return A new UFILE, or NULL if an error occurred.
+ * @draft 3.0
+ */
+U_DRAFT UFILE* U_EXPORT2
+u_finit(FILE        *f,
+    const char    *locale,
+    const char    *codepage);
+
+/**
+ * Create a UFILE that can be used for localized formatting or parsing.
+ * The u_sprintf and u_sscanf functions do not read or write numbers for a
+ * specific locale. The ustdio.h file functions can be used on this UFILE.
+ * The string is usable once u_fclose or u_fflush has been called on the
+ * returned UFILE.
+ * @param stringBuf The string used for reading or writing.
+ * @param capacity The number of code units available for use in stringBuf
+ * @param locale The locale whose conventions will be used to format 
+ * and parse output. If this parameter is NULL, the default locale will 
+ * be used.
+ * @return A new UFILE, or NULL if an error occurred.
+ * @draft 3.0
+ */
+U_DRAFT UFILE* U_EXPORT2
+u_fstropen(UChar      *stringBuf,
+           int32_t     capacity,
+           const char *locale);
+
+/**
+ * Close a UFILE.
+ * @param file The UFILE to close.
+ * @draft 3.0
+ */
+U_DRAFT void U_EXPORT2
+u_fclose(UFILE *file);
+
+/**
+ * Tests if the UFILE is at the end of the file stream.
+ * @param f The UFILE from which to read.
+ * @return Returns TRUE after the first read operation that attempts to
+ * read past the end of the file. It returns FALSE if the current position is
+ * not end of file.
+ * @draft 3.0
+*/
+U_DRAFT UBool U_EXPORT2
+u_feof(UFILE  *f);
+
+/**
+ * Flush output of a UFILE. Implies a flush of
+ * converter/transliterator state. (That is, a logical break is
+ * made in the output stream - for example if a different type of
+ * output is desired.)  The underlying OS level file is also flushed.
+ * @param file The UFILE to flush.
+ * @draft 3.0
+ */
+U_DRAFT void U_EXPORT2
+u_fflush(UFILE *file);
+
+/**
+ * Rewind the file pointer to the beginning of the file.
+ * @param file The UFILE to rewind.
+ * @draft 3.0
+ */
+U_DRAFT void
+u_frewind(UFILE *file);
+
+/**
+ * Get the FILE* associated with a UFILE.
+ * @param f The UFILE
+ * @return A FILE*, owned by the UFILE.  The FILE <EM>must not</EM> be closed.
+ * @draft 3.0
+ */
+U_DRAFT FILE* U_EXPORT2
+u_fgetfile(UFILE *f);
+
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * Get the locale whose conventions are used to format and parse output.
+ * This is the same locale passed in the preceding call to<TT>u_fsetlocale</TT>
+ * or <TT>u_fopen</TT>.
+ * @param file The UFILE to set.
+ * @return The locale whose conventions are used to format and parse output.
+ * @draft 3.0
+ */
+U_DRAFT const char* U_EXPORT2
+u_fgetlocale(UFILE *file);
+
+/**
+ * Set the locale whose conventions will be used to format and parse output.
+ * @param locale The locale whose conventions will be used to format 
+ * and parse output.
+ * @param file The UFILE to query.
+ * @return NULL if successful, otherwise a negative number.
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_fsetlocale(UFILE      *file,
+             const char *locale);
+
+#endif
+
+/**
+ * Get the codepage in which data is written to and read from the UFILE.
+ * This is the same codepage passed in the preceding call to 
+ * <TT>u_fsetcodepage</TT> or <TT>u_fopen</TT>.
+ * @param file The UFILE to query.
+ * @return The codepage in which data is written to and read from the UFILE,
+ * or NULL if an error occurred.
+ * @draft 3.0
+ */
+U_DRAFT const char* U_EXPORT2
+u_fgetcodepage(UFILE *file);
+
+/**
+ * Set the codepage in which data will be written to and read from the UFILE.
+ * All Unicode data written to the UFILE will be converted to this codepage
+ * before it is written to the underlying FILE*. It it generally a bad idea to
+ * mix codepages within a file. This should only be called right
+ * after opening the <TT>UFile</TT>, or after calling <TT>u_frewind</TT>.
+ * @param codepage The codepage in which data will be written to 
+ * and read from the file. For example <TT>"latin-1"</TT> or <TT>"ibm-943</TT>.
+ * A value of NULL means the default codepage for the UFILE's current 
+ * locale will be used.
+ * @param file The UFILE to set.
+ * @return 0 if successful, otherwise a negative number.
+ * @see u_frewind
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_fsetcodepage(const char   *codepage,
+               UFILE        *file);
+
+
+/**
+ * Returns an alias to the converter being used for this file.
+ * @param f The UFILE to get the value from
+ * @return alias to the converter
+ * @draft 3.0
+ */
+U_DRAFT UConverter* U_EXPORT2 u_fgetConverter(UFILE *f);
+
+#if !UCONFIG_NO_FORMATTING
+
+/* Output functions */
+
+/**
+ * Write formatted data to a UFILE.
+ * @param f The UFILE to which to write.
+ * @param patternSpecification A pattern specifying how <TT>u_fprintf</TT> will
+ * interpret the variable arguments received and format the data.
+ * @return The number of Unicode characters written to <TT>f</TT>.
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_fprintf(UFILE         *f,
+          const char    *patternSpecification,
+          ... );
+
+/**
+ * Write formatted data to a UFILE.
+ * This is identical to <TT>u_fprintf</TT>, except that it will
+ * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
+ * @param f The UFILE to which to write.
+ * @param patternSpecification A pattern specifying how <TT>u_fprintf</TT> will
+ * interpret the variable arguments received and format the data.
+ * @param ap The argument list to use.
+ * @return The number of Unicode characters written to <TT>f</TT>.
+ * @see u_fprintf
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_vfprintf(UFILE        *f,
+           const char   *patternSpecification,
+           va_list      ap);
+
+/**
+ * Write formatted data to a UFILE.
+ * @param f The UFILE to which to write.
+ * @param patternSpecification A pattern specifying how <TT>u_fprintf</TT> will
+ * interpret the variable arguments received and format the data.
+ * @return The number of Unicode characters written to <TT>f</TT>.
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_fprintf_u(UFILE       *f,
+            const UChar *patternSpecification,
+            ... );
+
+/**
+ * Write formatted data to a UFILE.
+ * This is identical to <TT>u_fprintf_u</TT>, except that it will
+ * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
+ * @param f The UFILE to which to write.
+ * @param patternSpecification A pattern specifying how <TT>u_fprintf</TT> will
+ * interpret the variable arguments received and format the data.
+ * @param ap The argument list to use.
+ * @return The number of Unicode characters written to <TT>f</TT>.
+ * @see u_fprintf_u
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_vfprintf_u(UFILE      *f,
+            const UChar *patternSpecification,
+            va_list     ap);
+#endif
+/**
+ * Write a Unicode to a UFILE.  The null (U+0000) terminated UChar*
+ * <TT>s</TT> will be written to <TT>f</TT>, excluding the NULL terminator.
+ * A newline will be added to <TT>f</TT>.
+ * @param s The UChar* to write.
+ * @param f The UFILE to which to write.
+ * @return A non-negative number if successful, EOF otherwise.
+ * @see u_file_write
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_fputs(const UChar *s,
+        UFILE       *f);
+
+/**
+ * Write a UChar to a UFILE.
+ * @param uc The UChar to write.
+ * @param f The UFILE to which to write.
+ * @return The character written if successful, EOF otherwise.
+ * @draft 3.0
+ */
+U_DRAFT UChar32 U_EXPORT2
+u_fputc(UChar32  uc,
+        UFILE  *f);
+
+/**
+ * Write Unicode to a UFILE.
+ * The ustring passed in will be converted to the UFILE's underlying
+ * codepage before it is written.
+ * @param ustring A pointer to the Unicode data to write.
+ * @param count The number of Unicode characters to write
+ * @param f The UFILE to which to write.
+ * @return The number of Unicode characters written.
+ * @see u_fputs
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_file_write(const UChar    *ustring, 
+             int32_t        count, 
+             UFILE          *f);
+
+
+/* Input functions */
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * Read formatted data from a UFILE.
+ * @param f The UFILE from which to read.
+ * @param patternSpecification A pattern specifying how <TT>u_fscanf</TT> will
+ * interpret the variable arguments received and parse the data.
+ * @return The number of items successfully converted and assigned, or EOF
+ * if an error occurred.
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_fscanf(UFILE      *f,
+         const char *patternSpecification,
+         ... );
+
+/**
+ * Read formatted data from a UFILE.
+ * This is identical to <TT>u_fscanf</TT>, except that it will
+ * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
+ * @param f The UFILE from which to read.
+ * @param patternSpecification A pattern specifying how <TT>u_fscanf</TT> will
+ * interpret the variable arguments received and parse the data.
+ * @param ap The argument list to use.
+ * @return The number of items successfully converted and assigned, or EOF
+ * if an error occurred.
+ * @see u_fscanf
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_vfscanf(UFILE         *f,
+          const char    *patternSpecification,
+          va_list        ap);
+
+/**
+ * Read formatted data from a UFILE.
+ * @param f The UFILE from which to read.
+ * @param patternSpecification A pattern specifying how <TT>u_fscanf</TT> will
+ * interpret the variable arguments received and parse the data.
+ * @return The number of items successfully converted and assigned, or EOF
+ * if an error occurred.
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_fscanf_u(UFILE        *f,
+           const UChar  *patternSpecification,
+           ... );
+
+/**
+ * Read formatted data from a UFILE.
+ * This is identical to <TT>u_fscanf_u</TT>, except that it will
+ * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
+ * @param f The UFILE from which to read.
+ * @param patternSpecification A pattern specifying how <TT>u_fscanf</TT> will
+ * interpret the variable arguments received and parse the data.
+ * @param ap The argument list to use.
+ * @return The number of items successfully converted and assigned, or EOF
+ * if an error occurred.
+ * @see u_fscanf_u
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_vfscanf_u(UFILE       *f,
+            const UChar *patternSpecification,
+            va_list      ap);
+#endif
+
+/**
+ * Read one line of text into a UChar* string from a UFILE. The newline
+ * at the end of the line is read into the string. The string is always
+ * null terminated
+ * @param f The UFILE from which to read.
+ * @param n The maximum number of characters - 1 to read.
+ * @param s The UChar* to receive the read data.  Characters will be
+ * stored successively in <TT>s</TT> until a newline or EOF is
+ * reached. A null character (U+0000) will be appended to <TT>s</TT>.
+ * @return A pointer to <TT>s</TT>, or NULL if no characters were available.
+ * @draft 3.0
+ */
+U_DRAFT UChar* U_EXPORT2
+u_fgets(UChar  *s,
+        int32_t n,
+        UFILE  *f);
+
+/**
+ * Read a UChar from a UFILE. It is recommended that <TT>u_fgetcx</TT>
+ * used instead for proper parsing functions, but sometimes reading
+ * code units is needed instead of codepoints.
+ *
+ * @param f The UFILE from which to read.
+ * @return The UChar value read, or U+FFFF if no character was available.
+ * @draft 3.0
+ */
+U_DRAFT UChar U_EXPORT2
+u_fgetc(UFILE   *f);
+
+/**
+ * Read a UChar32 from a UFILE.
+ *
+ * @param f The UFILE from which to read.
+ * @return The UChar32 value read, or U_EOF if no character was
+ * available, or U+FFFFFFFF if an ill-formed character was
+ * encountered.
+ * @see u_unescape()
+ * @draft 3.0
+ */
+U_DRAFT UChar32 U_EXPORT2
+u_fgetcx(UFILE  *f);
+
+/**
+ * Unget a UChar from a UFILE.
+ * If this function is not the first to operate on <TT>f</TT> after a call
+ * to <TT>u_fgetc</TT>, the results are undefined.
+ * If this function is passed a character that was not recieved from the
+ * previous <TT>u_fgetc</TT> or <TT>u_fgetcx</TT> call, the results are undefined.
+ * @param c The UChar to put back on the stream.
+ * @param f The UFILE to receive <TT>c</TT>.
+ * @return The UChar32 value put back if successful, U_EOF otherwise.
+ * @draft 3.0
+ */
+U_DRAFT UChar32 U_EXPORT2
+u_fungetc(UChar32   c,
+      UFILE        *f);
+
+/**
+ * Read Unicode from a UFILE.
+ * Bytes will be converted from the UFILE's underlying codepage, with
+ * subsequent conversion to Unicode. The data will not be NULL terminated.
+ * @param chars A pointer to receive the Unicode data.
+ * @param count The number of Unicode characters to read.
+ * @param f The UFILE from which to read.
+ * @return The number of Unicode characters read.
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_file_read(UChar        *chars, 
+        int32_t        count, 
+        UFILE         *f);
+
+#if !UCONFIG_NO_TRANSLITERATION
+
+/**
+ * Set a transliterator on the UFILE. The transliterator will be owned by the
+ * UFILE. 
+ * @param file The UFILE to set transliteration on
+ * @param adopt The UTransliterator to set. Can be NULL, which will
+ * mean that no transliteration is used.
+ * @param direction either U_READ, U_WRITE, or U_READWRITE - sets
+ *  which direction the transliterator is to be applied to. If
+ * U_READWRITE, the "Read" transliteration will be in the inverse
+ * direction.
+ * @param status ICU error code.
+ * @return The previously set transliterator, owned by the
+ * caller. If U_READWRITE is specified, only the WRITE transliterator
+ * is returned. In most cases, the caller should call utrans_close()
+ * on the result of this function.
+ * @draft 3.0
+ */
+U_DRAFT UTransliterator* U_EXPORT2
+u_fsettransliterator(UFILE *file, UFileDirection direction,
+                     UTransliterator *adopt, UErrorCode *status);
+
+#endif
+
+
+/* Output string functions */
+#if !UCONFIG_NO_FORMATTING
+
+
+/**
+ * Write formatted data to a Unicode string.
+ *
+ * @param buffer The Unicode String to which to write.
+ * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
+ * interpret the variable arguments received and format the data.
+ * @return The number of Unicode code units written to <TT>buffer</TT>. This
+ * does not include the terminating null character.
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_sprintf(UChar       *buffer,
+        const char    *patternSpecification,
+        ... );
+
+/**
+ * Write formatted data to a Unicode string. When the number of code units
+ * required to store the data exceeds <TT>count</TT>, then <TT>count</TT> code
+ * units of data are stored in <TT>buffer</TT> and a negative value is
+ * returned. When the number of code units required to store the data equals
+ * <TT>count</TT>, the string is not null terminated and <TT>count</TT> is
+ * returned.
+ *
+ * @param buffer The Unicode String to which to write.
+ * @param count The number of code units to read.
+ * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
+ * interpret the variable arguments received and format the data.
+ * @return The number of Unicode characters that would have been written to
+ * <TT>buffer</TT> had count been sufficiently large. This does not include
+ * the terminating null character.
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_snprintf(UChar      *buffer,
+        int32_t       count,
+        const char    *patternSpecification,
+        ... );
+
+/**
+ * Write formatted data to a Unicode string.
+ * This is identical to <TT>u_sprintf</TT>, except that it will
+ * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
+ *
+ * @param buffer The Unicode string to which to write.
+ * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
+ * interpret the variable arguments received and format the data.
+ * @param ap The argument list to use.
+ * @return The number of Unicode characters written to <TT>buffer</TT>.
+ * @see u_sprintf
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_vsprintf(UChar      *buffer,
+        const char    *patternSpecification,
+        va_list        ap);
+
+/**
+ * Write formatted data to a Unicode string.
+ * This is identical to <TT>u_snprintf</TT>, except that it will
+ * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.<br><br>
+ * When the number of code units required to store the data exceeds
+ * <TT>count</TT>, then <TT>count</TT> code units of data are stored in
+ * <TT>buffer</TT> and a negative value is returned. When the number of code
+ * units required to store the data equals <TT>count</TT>, the string is not
+ * null terminated and <TT>count</TT> is returned.
+ *
+ * @param buffer The Unicode string to which to write.
+ * @param count The number of code units to read.
+ * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
+ * interpret the variable arguments received and format the data.
+ * @param ap The argument list to use.
+ * @return The number of Unicode characters that would have been written to
+ * <TT>buffer</TT> had count been sufficiently large.
+ * @see u_sprintf
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_vsnprintf(UChar     *buffer,
+        int32_t       count,
+        const char    *patternSpecification,
+        va_list        ap);
+
+/**
+ * Write formatted data to a Unicode string.
+ *
+ * @param buffer The Unicode string to which to write.
+ * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
+ * interpret the variable arguments received and format the data.
+ * @return The number of Unicode characters written to <TT>buffer</TT>.
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_sprintf_u(UChar      *buffer,
+        const UChar    *patternSpecification,
+        ... );
+
+/**
+ * Write formatted data to a Unicode string. When the number of code units
+ * required to store the data exceeds <TT>count</TT>, then <TT>count</TT> code
+ * units of data are stored in <TT>buffer</TT> and a negative value is
+ * returned. When the number of code units required to store the data equals
+ * <TT>count</TT>, the string is not null terminated and <TT>count</TT> is
+ * returned.
+ *
+ * @param buffer The Unicode string to which to write.
+ * @param count The number of code units to read.
+ * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
+ * interpret the variable arguments received and format the data.
+ * @return The number of Unicode characters that would have been written to
+ * <TT>buffer</TT> had count been sufficiently large.
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_snprintf_u(UChar     *buffer,
+        int32_t        count,
+        const UChar    *patternSpecification,
+        ... );
+
+/**
+ * Write formatted data to a Unicode string.
+ * This is identical to <TT>u_sprintf_u</TT>, except that it will
+ * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
+ *
+ * @param buffer The Unicode string to which to write.
+ * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
+ * interpret the variable arguments received and format the data.
+ * @param ap The argument list to use.
+ * @return The number of Unicode characters written to <TT>f</TT>.
+ * @see u_sprintf_u
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_vsprintf_u(UChar     *buffer,
+        const UChar    *patternSpecification,
+        va_list        ap);
+
+/**
+ * Write formatted data to a Unicode string.
+ * This is identical to <TT>u_snprintf_u</TT>, except that it will
+ * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
+ * When the number of code units required to store the data exceeds
+ * <TT>count</TT>, then <TT>count</TT> code units of data are stored in
+ * <TT>buffer</TT> and a negative value is returned. When the number of code
+ * units required to store the data equals <TT>count</TT>, the string is not
+ * null terminated and <TT>count</TT> is returned.
+ *
+ * @param buffer The Unicode string to which to write.
+ * @param count The number of code units to read.
+ * @param patternSpecification A pattern specifying how <TT>u_sprintf</TT> will
+ * interpret the variable arguments received and format the data.
+ * @param ap The argument list to use.
+ * @return The number of Unicode characters that would have been written to
+ * <TT>f</TT> had count been sufficiently large.
+ * @see u_sprintf_u
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_vsnprintf_u(UChar *buffer,
+        int32_t         count,
+        const UChar     *patternSpecification,
+        va_list         ap);
+
+/* Input string functions */
+
+/**
+ * Read formatted data from a Unicode string.
+ *
+ * @param buffer The Unicode string from which to read.
+ * @param patternSpecification A pattern specifying how <TT>u_sscanf</TT> will
+ * interpret the variable arguments received and parse the data.
+ * @return The number of items successfully converted and assigned, or EOF
+ * if an error occurred.
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_sscanf(const UChar   *buffer,
+        const char     *patternSpecification,
+        ... );
+
+/**
+ * Read formatted data from a Unicode string.
+ * This is identical to <TT>u_sscanf</TT>, except that it will
+ * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
+ *
+ * @param buffer The Unicode string from which to read.
+ * @param patternSpecification A pattern specifying how <TT>u_sscanf</TT> will
+ * interpret the variable arguments received and parse the data.
+ * @param ap The argument list to use.
+ * @return The number of items successfully converted and assigned, or EOF
+ * if an error occurred.
+ * @see u_sscanf
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_vsscanf(const UChar  *buffer,
+        const char     *patternSpecification,
+        va_list        ap);
+
+/**
+ * Read formatted data from a Unicode string.
+ *
+ * @param buffer The Unicode string from which to read.
+ * @param patternSpecification A pattern specifying how <TT>u_sscanf</TT> will
+ * interpret the variable arguments received and parse the data.
+ * @return The number of items successfully converted and assigned, or EOF
+ * if an error occurred.
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_sscanf_u(const UChar  *buffer,
+        const UChar     *patternSpecification,
+        ... );
+
+/**
+ * Read formatted data from a Unicode string.
+ * This is identical to <TT>u_sscanf_u</TT>, except that it will
+ * <EM>not</EM> call <TT>va_start</TT> and <TT>va_end</TT>.
+ *
+ * @param buffer The Unicode string from which to read.
+ * @param patternSpecification A pattern specifying how <TT>u_sscanf</TT> will
+ * interpret the variable arguments received and parse the data.
+ * @param ap The argument list to use.
+ * @return The number of items successfully converted and assigned, or EOF
+ * if an error occurred.
+ * @see u_sscanf_u
+ * @draft 3.0
+ */
+U_DRAFT int32_t U_EXPORT2
+u_vsscanf_u(const UChar *buffer,
+        const UChar     *patternSpecification,
+        va_list         ap);
+
+#endif
+#endif
+
+

Deleted: MacRuby/trunk/icu-1060/unicode/ustream.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ustream.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ustream.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,67 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 2001-2007 International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*  FILE NAME : ustream.h
-*
-*   Modification History:
-*
-*   Date        Name        Description
-*   06/25/2001  grhoten     Move iostream from unistr.h
-******************************************************************************
-*/
-   
-#ifndef USTREAM_H
-#define USTREAM_H
-
-#include "unicode/unistr.h"
-
-/**
- * \file
- * \brief C++ API: Unicode iostream like API
- *
- * At this time, this API is very limited. It contains
- * operator<< and operator>> for UnicodeString manipulation with the
- * C++ I/O stream API.
- */
-
-#if U_IOSTREAM_SOURCE >= 199711
-#include <istream>
-#include <ostream>
-
-U_NAMESPACE_BEGIN
-
-/**
- * Write the contents of a UnicodeString to a C++ ostream. This functions writes
- * the characters in a UnicodeString to an ostream. The UChars in the
- * UnicodeString are converted to the char based ostream with the default
- * converter.
- * @stable 3.0
- */
-U_IO_API std::ostream & U_EXPORT2 operator<<(std::ostream& stream, const UnicodeString& s);
-
-/**
- * Write the contents from a C++ istream to a UnicodeString. The UChars in the
- * UnicodeString are converted from the char based istream with the default
- * converter.
- * @stable 3.0
- */
-U_IO_API std::istream & U_EXPORT2 operator>>(std::istream& stream, UnicodeString& s);
-U_NAMESPACE_END
-
-#elif U_IOSTREAM_SOURCE >= 198506
-/* <istream.h> and <ostream.h> don't exist. */
-#include <iostream.h>
-
-U_NAMESPACE_BEGIN
-U_IO_API ostream & U_EXPORT2 operator<<(ostream& stream, const UnicodeString& s);
-
-U_IO_API istream & U_EXPORT2 operator>>(istream& stream, UnicodeString& s);
-U_NAMESPACE_END
-
-#endif
-
-/* No operator for UChar because it can conflict with wchar_t  */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/ustream.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ustream.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ustream.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ustream.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,67 @@
+/*
+**********************************************************************
+*   Copyright (C) 2001-2007 International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*  FILE NAME : ustream.h
+*
+*   Modification History:
+*
+*   Date        Name        Description
+*   06/25/2001  grhoten     Move iostream from unistr.h
+******************************************************************************
+*/
+   
+#ifndef USTREAM_H
+#define USTREAM_H
+
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode iostream like API
+ *
+ * At this time, this API is very limited. It contains
+ * operator<< and operator>> for UnicodeString manipulation with the
+ * C++ I/O stream API.
+ */
+
+#if U_IOSTREAM_SOURCE >= 199711
+#include <istream>
+#include <ostream>
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Write the contents of a UnicodeString to a C++ ostream. This functions writes
+ * the characters in a UnicodeString to an ostream. The UChars in the
+ * UnicodeString are converted to the char based ostream with the default
+ * converter.
+ * @stable 3.0
+ */
+U_IO_API std::ostream & U_EXPORT2 operator<<(std::ostream& stream, const UnicodeString& s);
+
+/**
+ * Write the contents from a C++ istream to a UnicodeString. The UChars in the
+ * UnicodeString are converted from the char based istream with the default
+ * converter.
+ * @stable 3.0
+ */
+U_IO_API std::istream & U_EXPORT2 operator>>(std::istream& stream, UnicodeString& s);
+U_NAMESPACE_END
+
+#elif U_IOSTREAM_SOURCE >= 198506
+/* <istream.h> and <ostream.h> don't exist. */
+#include <iostream.h>
+
+U_NAMESPACE_BEGIN
+U_IO_API ostream & U_EXPORT2 operator<<(ostream& stream, const UnicodeString& s);
+
+U_IO_API istream & U_EXPORT2 operator>>(istream& stream, UnicodeString& s);
+U_NAMESPACE_END
+
+#endif
+
+/* No operator for UChar because it can conflict with wchar_t  */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/ustring.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/ustring.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/ustring.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1479 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1998-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*
-* File ustring.h
-*
-* Modification History:
-*
-*   Date        Name        Description
-*   12/07/98    bertrand    Creation.
-******************************************************************************
-*/
-
-#ifndef USTRING_H
-#define USTRING_H
-
-#include "unicode/utypes.h"
-#include "unicode/putil.h"
-#include "unicode/uiter.h"
-
-/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
-#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
-#   define UBRK_TYPEDEF_UBREAK_ITERATOR
-    typedef void UBreakIterator;
-#endif
-
-/**
- * \file
- * \brief C API: Unicode string handling functions
- *
- * These C API functions provide general Unicode string handling.
- *
- * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
- * functions. (For example, they do not check for bad arguments like NULL string pointers.)
- * In some cases, only the thread-safe variant of such a function is implemented here
- * (see u_strtok_r()).
- *
- * Other functions provide more Unicode-specific functionality like locale-specific
- * upper/lower-casing and string comparison in code point order.
- *
- * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
- * UTF-16 encodes each Unicode code point with either one or two UChar code units.
- * (This is the default form of Unicode, and a forward-compatible extension of the original,
- * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
- * in 1996.)
- *
- * Some APIs accept a 32-bit UChar32 value for a single code point.
- *
- * ICU also handles 16-bit Unicode text with unpaired surrogates.
- * Such text is not well-formed UTF-16.
- * Code-point-related functions treat unpaired surrogates as surrogate code points,
- * i.e., as separate units.
- *
- * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
- * it is much more efficient even for random access because the code unit values
- * for single-unit characters vs. lead units vs. trail units are completely disjoint.
- * This means that it is easy to determine character (code point) boundaries from
- * random offsets in the string.
- *
- * Unicode (UTF-16) string processing is optimized for the single-unit case.
- * Although it is important to support supplementary characters
- * (which use pairs of lead/trail code units called "surrogates"),
- * their occurrence is rare. Almost all characters in modern use require only
- * a single UChar code unit (i.e., their code point values are <=0xffff).
- *
- * For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html).
- * For a discussion of the handling of unpaired surrogates see also
- * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
- */
-
-/**
- * \defgroup ustring_ustrlen String Length
- * \ingroup ustring_strlen
- */
-/*@{*/
-/**
- * Determine the length of an array of UChar.
- *
- * @param s The array of UChars, NULL (U+0000) terminated.
- * @return The number of UChars in <code>chars</code>, minus the terminator.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_strlen(const UChar *s);
-/*@}*/
-
-/**
- * Count Unicode code points in the length UChar code units of the string.
- * A code point may occupy either one or two UChar code units.
- * Counting code points involves reading all code units.
- *
- * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
- *
- * @param s The input string.
- * @param length The number of UChar code units to be checked, or -1 to count all
- *               code points before the first NUL (U+0000).
- * @return The number of code points in the specified code units.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_countChar32(const UChar *s, int32_t length);
-
-/**
- * Check if the string contains more Unicode code points than a certain number.
- * This is more efficient than counting all code points in the entire string
- * and comparing that number with a threshold.
- * This function may not need to scan the string at all if the length is known
- * (not -1 for NUL-termination) and falls within a certain range, and
- * never needs to count more than 'number+1' code points.
- * Logically equivalent to (u_countChar32(s, length)>number).
- * A Unicode code point may occupy either one or two UChar code units.
- *
- * @param s The input string.
- * @param length The length of the string, or -1 if it is NUL-terminated.
- * @param number The number of code points in the string is compared against
- *               the 'number' parameter.
- * @return Boolean value for whether the string contains more Unicode code points
- *         than 'number'. Same as (u_countChar32(s, length)>number).
- * @stable ICU 2.4
- */
-U_STABLE UBool U_EXPORT2
-u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
-
-/**
- * Concatenate two ustrings.  Appends a copy of <code>src</code>,
- * including the null terminator, to <code>dst</code>. The initial copied
- * character from <code>src</code> overwrites the null terminator in <code>dst</code>.
- *
- * @param dst The destination string.
- * @param src The source string.
- * @return A pointer to <code>dst</code>.
- * @stable ICU 2.0
- */
-U_STABLE UChar* U_EXPORT2
-u_strcat(UChar     *dst, 
-    const UChar     *src);
-
-/**
- * Concatenate two ustrings.  
- * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
- * Adds a terminating NUL.
- * If src is too long, then only <code>n-1</code> characters will be copied
- * before the terminating NUL.
- * If <code>n&lt;=0</code> then dst is not modified.
- *
- * @param dst The destination string.
- * @param src The source string.
- * @param n The maximum number of characters to compare.
- * @return A pointer to <code>dst</code>.
- * @stable ICU 2.0
- */
-U_STABLE UChar* U_EXPORT2
-u_strncat(UChar     *dst, 
-     const UChar     *src, 
-     int32_t     n);
-
-/**
- * Find the first occurrence of a substring in a string.
- * The substring is found at code point boundaries.
- * That means that if the substring begins with
- * a trail surrogate or ends with a lead surrogate,
- * then it is found only if these surrogates stand alone in the text.
- * Otherwise, the substring edge units would be matched against
- * halves of surrogate pairs.
- *
- * @param s The string to search (NUL-terminated).
- * @param substring The substring to find (NUL-terminated).
- * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
- *         or <code>s</code> itself if the <code>substring</code> is empty,
- *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
- * @stable ICU 2.0
- *
- * @see u_strrstr
- * @see u_strFindFirst
- * @see u_strFindLast
- */
-U_STABLE UChar * U_EXPORT2
-u_strstr(const UChar *s, const UChar *substring);
-
-/**
- * Find the first occurrence of a substring in a string.
- * The substring is found at code point boundaries.
- * That means that if the substring begins with
- * a trail surrogate or ends with a lead surrogate,
- * then it is found only if these surrogates stand alone in the text.
- * Otherwise, the substring edge units would be matched against
- * halves of surrogate pairs.
- *
- * @param s The string to search.
- * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
- * @param substring The substring to find (NUL-terminated).
- * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
- * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
- *         or <code>s</code> itself if the <code>substring</code> is empty,
- *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
- * @stable ICU 2.4
- *
- * @see u_strstr
- * @see u_strFindLast
- */
-U_STABLE UChar * U_EXPORT2
-u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
-
-/**
- * Find the first occurrence of a BMP code point in a string.
- * A surrogate code point is found only if its match in the text is not
- * part of a surrogate pair.
- * A NUL character is found at the string terminator.
- *
- * @param s The string to search (NUL-terminated).
- * @param c The BMP code point to find.
- * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
- *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
- * @stable ICU 2.0
- *
- * @see u_strchr32
- * @see u_memchr
- * @see u_strstr
- * @see u_strFindFirst
- */
-U_STABLE UChar * U_EXPORT2
-u_strchr(const UChar *s, UChar c);
-
-/**
- * Find the first occurrence of a code point in a string.
- * A surrogate code point is found only if its match in the text is not
- * part of a surrogate pair.
- * A NUL character is found at the string terminator.
- *
- * @param s The string to search (NUL-terminated).
- * @param c The code point to find.
- * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
- *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
- * @stable ICU 2.0
- *
- * @see u_strchr
- * @see u_memchr32
- * @see u_strstr
- * @see u_strFindFirst
- */
-U_STABLE UChar * U_EXPORT2
-u_strchr32(const UChar *s, UChar32 c);
-
-/**
- * Find the last occurrence of a substring in a string.
- * The substring is found at code point boundaries.
- * That means that if the substring begins with
- * a trail surrogate or ends with a lead surrogate,
- * then it is found only if these surrogates stand alone in the text.
- * Otherwise, the substring edge units would be matched against
- * halves of surrogate pairs.
- *
- * @param s The string to search (NUL-terminated).
- * @param substring The substring to find (NUL-terminated).
- * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
- *         or <code>s</code> itself if the <code>substring</code> is empty,
- *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
- * @stable ICU 2.4
- *
- * @see u_strstr
- * @see u_strFindFirst
- * @see u_strFindLast
- */
-U_STABLE UChar * U_EXPORT2
-u_strrstr(const UChar *s, const UChar *substring);
-
-/**
- * Find the last occurrence of a substring in a string.
- * The substring is found at code point boundaries.
- * That means that if the substring begins with
- * a trail surrogate or ends with a lead surrogate,
- * then it is found only if these surrogates stand alone in the text.
- * Otherwise, the substring edge units would be matched against
- * halves of surrogate pairs.
- *
- * @param s The string to search.
- * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
- * @param substring The substring to find (NUL-terminated).
- * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
- * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
- *         or <code>s</code> itself if the <code>substring</code> is empty,
- *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
- * @stable ICU 2.4
- *
- * @see u_strstr
- * @see u_strFindLast
- */
-U_STABLE UChar * U_EXPORT2
-u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
-
-/**
- * Find the last occurrence of a BMP code point in a string.
- * A surrogate code point is found only if its match in the text is not
- * part of a surrogate pair.
- * A NUL character is found at the string terminator.
- *
- * @param s The string to search (NUL-terminated).
- * @param c The BMP code point to find.
- * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
- *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
- * @stable ICU 2.4
- *
- * @see u_strrchr32
- * @see u_memrchr
- * @see u_strrstr
- * @see u_strFindLast
- */
-U_STABLE UChar * U_EXPORT2
-u_strrchr(const UChar *s, UChar c);
-
-/**
- * Find the last occurrence of a code point in a string.
- * A surrogate code point is found only if its match in the text is not
- * part of a surrogate pair.
- * A NUL character is found at the string terminator.
- *
- * @param s The string to search (NUL-terminated).
- * @param c The code point to find.
- * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
- *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
- * @stable ICU 2.4
- *
- * @see u_strrchr
- * @see u_memchr32
- * @see u_strrstr
- * @see u_strFindLast
- */
-U_STABLE UChar * U_EXPORT2
-u_strrchr32(const UChar *s, UChar32 c);
-
-/**
- * Locates the first occurrence in the string <code>string</code> of any of the characters
- * in the string <code>matchSet</code>.
- * Works just like C's strpbrk but with Unicode.
- *
- * @param string The string in which to search, NUL-terminated.
- * @param matchSet A NUL-terminated string defining a set of code points
- *                 for which to search in the text string.
- * @return A pointer to the  character in <code>string</code> that matches one of the
- *         characters in <code>matchSet</code>, or NULL if no such character is found.
- * @stable ICU 2.0
- */
-U_STABLE UChar * U_EXPORT2
-u_strpbrk(const UChar *string, const UChar *matchSet);
-
-/**
- * Returns the number of consecutive characters in <code>string</code>,
- * beginning with the first, that do not occur somewhere in <code>matchSet</code>.
- * Works just like C's strcspn but with Unicode.
- *
- * @param string The string in which to search, NUL-terminated.
- * @param matchSet A NUL-terminated string defining a set of code points
- *                 for which to search in the text string.
- * @return The number of initial characters in <code>string</code> that do not
- *         occur in <code>matchSet</code>.
- * @see u_strspn
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_strcspn(const UChar *string, const UChar *matchSet);
-
-/**
- * Returns the number of consecutive characters in <code>string</code>,
- * beginning with the first, that occur somewhere in <code>matchSet</code>.
- * Works just like C's strspn but with Unicode.
- *
- * @param string The string in which to search, NUL-terminated.
- * @param matchSet A NUL-terminated string defining a set of code points
- *                 for which to search in the text string.
- * @return The number of initial characters in <code>string</code> that do
- *         occur in <code>matchSet</code>.
- * @see u_strcspn
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_strspn(const UChar *string, const UChar *matchSet);
-
-/**
- * The string tokenizer API allows an application to break a string into
- * tokens. Unlike strtok(), the saveState (the current pointer within the
- * original string) is maintained in saveState. In the first call, the
- * argument src is a pointer to the string. In subsequent calls to
- * return successive tokens of that string, src must be specified as
- * NULL. The value saveState is set by this function to maintain the
- * function's position within the string, and on each subsequent call
- * you must give this argument the same variable. This function does
- * handle surrogate pairs. This function is similar to the strtok_r()
- * the POSIX Threads Extension (1003.1c-1995) version.
- *
- * @param src String containing token(s). This string will be modified.
- *            After the first call to u_strtok_r(), this argument must
- *            be NULL to get to the next token.
- * @param delim Set of delimiter characters (Unicode code points).
- * @param saveState The current pointer within the original string,
- *              which is set by this function. The saveState
- *              parameter should the address of a local variable of type
- *              UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
- *              &myLocalSaveState for this parameter).
- * @return A pointer to the next token found in src, or NULL
- *         when there are no more tokens.
- * @stable ICU 2.0
- */
-U_STABLE UChar * U_EXPORT2
-u_strtok_r(UChar    *src, 
-     const UChar    *delim,
-           UChar   **saveState);
-
-/**
- * Compare two Unicode strings for bitwise equality (code unit order).
- *
- * @param s1 A string to compare.
- * @param s2 A string to compare.
- * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
- * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
- * value if <code>s1</code> is bitwise greater than <code>s2</code>.
- * @stable ICU 2.0
- */
-U_STABLE int32_t  U_EXPORT2
-u_strcmp(const UChar     *s1, 
-         const UChar     *s2);
-
-/**
- * Compare two Unicode strings in code point order.
- * See u_strCompare for details.
- *
- * @param s1 A string to compare.
- * @param s2 A string to compare.
- * @return a negative/zero/positive integer corresponding to whether
- * the first string is less than/equal to/greater than the second one
- * in code point order
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
-
-/**
- * Compare two Unicode strings (binary order).
- *
- * The comparison can be done in code unit order or in code point order.
- * They differ only in UTF-16 when
- * comparing supplementary code points (U+10000..U+10ffff)
- * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
- * In code unit order, high BMP code points sort after supplementary code points
- * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
- *
- * This functions works with strings of different explicitly specified lengths
- * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
- * NUL-terminated strings are possible with length arguments of -1.
- *
- * @param s1 First source string.
- * @param length1 Length of first source string, or -1 if NUL-terminated.
- *
- * @param s2 Second source string.
- * @param length2 Length of second source string, or -1 if NUL-terminated.
- *
- * @param codePointOrder Choose between code unit order (FALSE)
- *                       and code point order (TRUE).
- *
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @stable ICU 2.2
- */
-U_STABLE int32_t U_EXPORT2
-u_strCompare(const UChar *s1, int32_t length1,
-             const UChar *s2, int32_t length2,
-             UBool codePointOrder);
-
-/**
- * Compare two Unicode strings (binary order)
- * as presented by UCharIterator objects.
- * Works otherwise just like u_strCompare().
- *
- * Both iterators are reset to their start positions.
- * When the function returns, it is undefined where the iterators
- * have stopped.
- *
- * @param iter1 First source string iterator.
- * @param iter2 Second source string iterator.
- * @param codePointOrder Choose between code unit order (FALSE)
- *                       and code point order (TRUE).
- *
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @see u_strCompare
- *
- * @stable ICU 2.6
- */
-U_STABLE int32_t U_EXPORT2
-u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
-
-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also unistr.h and unorm.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER  0x8000
-#endif
-
-/**
- * Compare two strings case-insensitively using full case folding.
- * This is equivalent to
- *   u_strCompare(u_strFoldCase(s1, options),
- *                u_strFoldCase(s2, options),
- *                (options&U_COMPARE_CODE_POINT_ORDER)!=0).
- *
- * The comparison can be done in UTF-16 code unit order or in code point order.
- * They differ only when comparing supplementary code points (U+10000..U+10ffff)
- * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
- * In code unit order, high BMP code points sort after supplementary code points
- * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
- *
- * This functions works with strings of different explicitly specified lengths
- * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
- * NUL-terminated strings are possible with length arguments of -1.
- *
- * @param s1 First source string.
- * @param length1 Length of first source string, or -1 if NUL-terminated.
- *
- * @param s2 Second source string.
- * @param length2 Length of second source string, or -1 if NUL-terminated.
- *
- * @param options A bit set of options:
- *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
- *     Comparison in code unit order with default case folding.
- *
- *   - U_COMPARE_CODE_POINT_ORDER
- *     Set to choose code point order instead of code unit order
- *     (see u_strCompare for details).
- *
- *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
- *
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                  which must not indicate a failure before the function call.
- *
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @stable ICU 2.2
- */
-U_STABLE int32_t U_EXPORT2
-u_strCaseCompare(const UChar *s1, int32_t length1,
-                 const UChar *s2, int32_t length2,
-                 uint32_t options,
-                 UErrorCode *pErrorCode);
-
-/**
- * Compare two ustrings for bitwise equality. 
- * Compares at most <code>n</code> characters.
- *
- * @param ucs1 A string to compare.
- * @param ucs2 A string to compare.
- * @param n The maximum number of characters to compare.
- * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
- * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
- * value if <code>s1</code> is bitwise greater than <code>s2</code>.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_strncmp(const UChar     *ucs1, 
-     const UChar     *ucs2, 
-     int32_t     n);
-
-/**
- * Compare two Unicode strings in code point order.
- * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
- * For details, see u_strCompare().
- *
- * @param s1 A string to compare.
- * @param s2 A string to compare.
- * @param n The maximum number of characters to compare.
- * @return a negative/zero/positive integer corresponding to whether
- * the first string is less than/equal to/greater than the second one
- * in code point order
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
-
-/**
- * Compare two strings case-insensitively using full case folding.
- * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
- *
- * @param s1 A string to compare.
- * @param s2 A string to compare.
- * @param options A bit set of options:
- *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
- *     Comparison in code unit order with default case folding.
- *
- *   - U_COMPARE_CODE_POINT_ORDER
- *     Set to choose code point order instead of code unit order
- *     (see u_strCompare for details).
- *
- *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
- *
- * @return A negative, zero, or positive integer indicating the comparison result.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
-
-/**
- * Compare two strings case-insensitively using full case folding.
- * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
- * u_strFoldCase(s2, at most n, options)).
- *
- * @param s1 A string to compare.
- * @param s2 A string to compare.
- * @param n The maximum number of characters each string to case-fold and then compare.
- * @param options A bit set of options:
- *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
- *     Comparison in code unit order with default case folding.
- *
- *   - U_COMPARE_CODE_POINT_ORDER
- *     Set to choose code point order instead of code unit order
- *     (see u_strCompare for details).
- *
- *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
- *
- * @return A negative, zero, or positive integer indicating the comparison result.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
-
-/**
- * Compare two strings case-insensitively using full case folding.
- * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
- * u_strFoldCase(s2, n, options)).
- *
- * @param s1 A string to compare.
- * @param s2 A string to compare.
- * @param length The number of characters in each string to case-fold and then compare.
- * @param options A bit set of options:
- *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
- *     Comparison in code unit order with default case folding.
- *
- *   - U_COMPARE_CODE_POINT_ORDER
- *     Set to choose code point order instead of code unit order
- *     (see u_strCompare for details).
- *
- *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
- *
- * @return A negative, zero, or positive integer indicating the comparison result.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
-
-/**
- * Copy a ustring. Adds a null terminator.
- *
- * @param dst The destination string.
- * @param src The source string.
- * @return A pointer to <code>dst</code>.
- * @stable ICU 2.0
- */
-U_STABLE UChar* U_EXPORT2
-u_strcpy(UChar     *dst, 
-    const UChar     *src);
-
-/**
- * Copy a ustring.
- * Copies at most <code>n</code> characters.  The result will be null terminated
- * if the length of <code>src</code> is less than <code>n</code>.
- *
- * @param dst The destination string.
- * @param src The source string.
- * @param n The maximum number of characters to copy.
- * @return A pointer to <code>dst</code>.
- * @stable ICU 2.0
- */
-U_STABLE UChar* U_EXPORT2
-u_strncpy(UChar     *dst, 
-     const UChar     *src, 
-     int32_t     n);
-
-#if !UCONFIG_NO_CONVERSION
-
-/**
- * Copy a byte string encoded in the default codepage to a ustring.
- * Adds a null terminator.
- * Performs a host byte to UChar conversion
- *
- * @param dst The destination string.
- * @param src The source string.
- * @return A pointer to <code>dst</code>.
- * @stable ICU 2.0
- */
-U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
-               const char *src );
-
-/**
- * Copy a byte string encoded in the default codepage to a ustring.
- * Copies at most <code>n</code> characters.  The result will be null terminated
- * if the length of <code>src</code> is less than <code>n</code>.
- * Performs a host byte to UChar conversion
- *
- * @param dst The destination string.
- * @param src The source string.
- * @param n The maximum number of characters to copy.
- * @return A pointer to <code>dst</code>.
- * @stable ICU 2.0
- */
-U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
-            const char *src,
-            int32_t n);
-
-/**
- * Copy ustring to a byte string encoded in the default codepage.
- * Adds a null terminator.
- * Performs a UChar to host byte conversion
- *
- * @param dst The destination string.
- * @param src The source string.
- * @return A pointer to <code>dst</code>.
- * @stable ICU 2.0
- */
-U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
-            const UChar *src );
-
-/**
- * Copy ustring to a byte string encoded in the default codepage.
- * Copies at most <code>n</code> characters.  The result will be null terminated
- * if the length of <code>src</code> is less than <code>n</code>.
- * Performs a UChar to host byte conversion
- *
- * @param dst The destination string.
- * @param src The source string.
- * @param n The maximum number of characters to copy.
- * @return A pointer to <code>dst</code>.
- * @stable ICU 2.0
- */
-U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
-            const UChar *src,
-            int32_t n );
-
-#endif
-
-/**
- * Synonym for memcpy(), but with UChars only.
- * @param dest The destination string
- * @param src The source string
- * @param count The number of characters to copy
- * @return A pointer to <code>dest</code>
- * @stable ICU 2.0
- */
-U_STABLE UChar* U_EXPORT2
-u_memcpy(UChar *dest, const UChar *src, int32_t count);
-
-/**
- * Synonym for memmove(), but with UChars only.
- * @param dest The destination string
- * @param src The source string
- * @param count The number of characters to move
- * @return A pointer to <code>dest</code>
- * @stable ICU 2.0
- */
-U_STABLE UChar* U_EXPORT2
-u_memmove(UChar *dest, const UChar *src, int32_t count);
-
-/**
- * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
- *
- * @param dest The destination string.
- * @param c The character to initialize the string.
- * @param count The maximum number of characters to set.
- * @return A pointer to <code>dest</code>.
- * @stable ICU 2.0
- */
-U_STABLE UChar* U_EXPORT2
-u_memset(UChar *dest, UChar c, int32_t count);
-
-/**
- * Compare the first <code>count</code> UChars of each buffer.
- *
- * @param buf1 The first string to compare.
- * @param buf2 The second string to compare.
- * @param count The maximum number of UChars to compare.
- * @return When buf1 < buf2, a negative number is returned.
- *      When buf1 == buf2, 0 is returned.
- *      When buf1 > buf2, a positive number is returned.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
-
-/**
- * Compare two Unicode strings in code point order.
- * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
- * For details, see u_strCompare().
- *
- * @param s1 A string to compare.
- * @param s2 A string to compare.
- * @param count The maximum number of characters to compare.
- * @return a negative/zero/positive integer corresponding to whether
- * the first string is less than/equal to/greater than the second one
- * in code point order
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
-
-/**
- * Find the first occurrence of a BMP code point in a string.
- * A surrogate code point is found only if its match in the text is not
- * part of a surrogate pair.
- * A NUL character is found at the string terminator.
- *
- * @param s The string to search (contains <code>count</code> UChars).
- * @param c The BMP code point to find.
- * @param count The length of the string.
- * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
- *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
- * @stable ICU 2.0
- *
- * @see u_strchr
- * @see u_memchr32
- * @see u_strFindFirst
- */
-U_STABLE UChar* U_EXPORT2
-u_memchr(const UChar *s, UChar c, int32_t count);
-
-/**
- * Find the first occurrence of a code point in a string.
- * A surrogate code point is found only if its match in the text is not
- * part of a surrogate pair.
- * A NUL character is found at the string terminator.
- *
- * @param s The string to search (contains <code>count</code> UChars).
- * @param c The code point to find.
- * @param count The length of the string.
- * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
- *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
- * @stable ICU 2.0
- *
- * @see u_strchr32
- * @see u_memchr
- * @see u_strFindFirst
- */
-U_STABLE UChar* U_EXPORT2
-u_memchr32(const UChar *s, UChar32 c, int32_t count);
-
-/**
- * Find the last occurrence of a BMP code point in a string.
- * A surrogate code point is found only if its match in the text is not
- * part of a surrogate pair.
- * A NUL character is found at the string terminator.
- *
- * @param s The string to search (contains <code>count</code> UChars).
- * @param c The BMP code point to find.
- * @param count The length of the string.
- * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
- *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
- * @stable ICU 2.4
- *
- * @see u_strrchr
- * @see u_memrchr32
- * @see u_strFindLast
- */
-U_STABLE UChar* U_EXPORT2
-u_memrchr(const UChar *s, UChar c, int32_t count);
-
-/**
- * Find the last occurrence of a code point in a string.
- * A surrogate code point is found only if its match in the text is not
- * part of a surrogate pair.
- * A NUL character is found at the string terminator.
- *
- * @param s The string to search (contains <code>count</code> UChars).
- * @param c The code point to find.
- * @param count The length of the string.
- * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
- *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
- * @stable ICU 2.4
- *
- * @see u_strrchr32
- * @see u_memrchr
- * @see u_strFindLast
- */
-U_STABLE UChar* U_EXPORT2
-u_memrchr32(const UChar *s, UChar32 c, int32_t count);
-
-/**
- * Unicode String literals in C.
- * We need one macro to declare a variable for the string
- * and to statically preinitialize it if possible,
- * and a second macro to dynamically intialize such a string variable if necessary.
- *
- * The macros are defined for maximum performance.
- * They work only for strings that contain "invariant characters", i.e.,
- * only latin letters, digits, and some punctuation.
- * See utypes.h for details.
- *
- * A pair of macros for a single string must be used with the same
- * parameters.
- * The string parameter must be a C string literal.
- * The length of the string, not including the terminating
- * <code>NUL</code>, must be specified as a constant.
- * The U_STRING_DECL macro should be invoked exactly once for one
- * such string variable before it is used.
- *
- * Usage:
- * <pre>
- *    U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
- *    U_STRING_DECL(ustringVar2, "jumps 5%", 8);
- *    static UBool didInit=FALSE;
- * 
- *    int32_t function() {
- *        if(!didInit) {
- *            U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
- *            U_STRING_INIT(ustringVar2, "jumps 5%", 8);
- *            didInit=TRUE;
- *        }
- *        return u_strcmp(ustringVar1, ustringVar2);
- *    }
- * </pre>
- * @stable ICU 2.0
- */
-#if defined(U_DECLARE_UTF16)
-#   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=U_DECLARE_UTF16(cs)
-    /**@stable ICU 2.0 */
-#   define U_STRING_INIT(var, cs, length)
-#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
-#   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
-    /**@stable ICU 2.0 */
-#   define U_STRING_INIT(var, cs, length)
-#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
-#   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
-    /**@stable ICU 2.0 */
-#   define U_STRING_INIT(var, cs, length)
-#else
-#   define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
-    /**@stable ICU 2.0 */
-#   define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
-#endif
-
-/**
- * Unescape a string of characters and write the resulting
- * Unicode characters to the destination buffer.  The following escape
- * sequences are recognized:
- *
- * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
- * \\Uhhhhhhhh   8 hex digits
- * \\xhh         1-2 hex digits
- * \\x{h...}     1-8 hex digits
- * \\ooo         1-3 octal digits; o in [0-7]
- * \\cX          control-X; X is masked with 0x1F
- *
- * as well as the standard ANSI C escapes:
- *
- * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
- * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
- * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
- *
- * Anything else following a backslash is generically escaped.  For
- * example, "[a\\-z]" returns "[a-z]".
- *
- * If an escape sequence is ill-formed, this method returns an empty
- * string.  An example of an ill-formed sequence is "\\u" followed by
- * fewer than 4 hex digits.
- *
- * The above characters are recognized in the compiler's codepage,
- * that is, they are coded as 'u', '\\', etc.  Characters that are
- * not parts of escape sequences are converted using u_charsToUChars().
- *
- * This function is similar to UnicodeString::unescape() but not
- * identical to it.  The latter takes a source UnicodeString, so it
- * does escape recognition but no conversion.
- *
- * @param src a zero-terminated string of invariant characters
- * @param dest pointer to buffer to receive converted and unescaped
- * text and, if there is room, a zero terminator.  May be NULL for
- * preflighting, in which case no UChars will be written, but the
- * return value will still be valid.  On error, an empty string is
- * stored here (if possible).
- * @param destCapacity the number of UChars that may be written at
- * dest.  Ignored if dest == NULL.
- * @return the length of unescaped string.
- * @see u_unescapeAt
- * @see UnicodeString#unescape()
- * @see UnicodeString#unescapeAt()
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_unescape(const char *src,
-           UChar *dest, int32_t destCapacity);
-
-U_CDECL_BEGIN
-/**
- * Callback function for u_unescapeAt() that returns a character of
- * the source text given an offset and a context pointer.  The context
- * pointer will be whatever is passed into u_unescapeAt().
- *
- * @param offset pointer to the offset that will be passed to u_unescapeAt().
- * @param context an opaque pointer passed directly into u_unescapeAt()
- * @return the character represented by the escape sequence at
- * offset
- * @see u_unescapeAt
- * @stable ICU 2.0
- */
-typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
-U_CDECL_END
-
-/**
- * Unescape a single sequence. The character at offset-1 is assumed
- * (without checking) to be a backslash.  This method takes a callback
- * pointer to a function that returns the UChar at a given offset.  By
- * varying this callback, ICU functions are able to unescape char*
- * strings, UnicodeString objects, and UFILE pointers.
- *
- * If offset is out of range, or if the escape sequence is ill-formed,
- * (UChar32)0xFFFFFFFF is returned.  See documentation of u_unescape()
- * for a list of recognized sequences.
- *
- * @param charAt callback function that returns a UChar of the source
- * text given an offset and a context pointer.
- * @param offset pointer to the offset that will be passed to charAt.
- * The offset value will be updated upon return to point after the
- * last parsed character of the escape sequence.  On error the offset
- * is unchanged.
- * @param length the number of characters in the source text.  The
- * last character of the source text is considered to be at offset
- * length-1.
- * @param context an opaque pointer passed directly into charAt.
- * @return the character represented by the escape sequence at
- * offset, or (UChar32)0xFFFFFFFF on error.
- * @see u_unescape()
- * @see UnicodeString#unescape()
- * @see UnicodeString#unescapeAt()
- * @stable ICU 2.0
- */
-U_STABLE UChar32 U_EXPORT2
-u_unescapeAt(UNESCAPE_CHAR_AT charAt,
-             int32_t *offset,
-             int32_t length,
-             void *context);
-
-/**
- * Uppercase the characters in a string.
- * Casing is locale-dependent and context-sensitive.
- * The result may be longer or shorter than the original.
- * The source string and the destination buffer are allowed to overlap.
- *
- * @param dest      A buffer for the result string. The result will be zero-terminated if
- *                  the buffer is large enough.
- * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
- *                  dest may be NULL and the function will only return the length of the result
- *                  without writing any of the result string.
- * @param src       The original string
- * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
- * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                  which must not indicate a failure before the function call.
- * @return The length of the result string. It may be greater than destCapacity. In that case,
- *         only some of the result was written to the destination buffer.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_strToUpper(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             const char *locale,
-             UErrorCode *pErrorCode);
-
-/**
- * Lowercase the characters in a string.
- * Casing is locale-dependent and context-sensitive.
- * The result may be longer or shorter than the original.
- * The source string and the destination buffer are allowed to overlap.
- *
- * @param dest      A buffer for the result string. The result will be zero-terminated if
- *                  the buffer is large enough.
- * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
- *                  dest may be NULL and the function will only return the length of the result
- *                  without writing any of the result string.
- * @param src       The original string
- * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
- * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                  which must not indicate a failure before the function call.
- * @return The length of the result string. It may be greater than destCapacity. In that case,
- *         only some of the result was written to the destination buffer.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_strToLower(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             const char *locale,
-             UErrorCode *pErrorCode);
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-/**
- * Titlecase a string.
- * Casing is locale-dependent and context-sensitive.
- * Titlecasing uses a break iterator to find the first characters of words
- * that are to be titlecased. It titlecases those characters and lowercases
- * all others.
- *
- * The titlecase break iterator can be provided to customize for arbitrary
- * styles, using rules and dictionaries beyond the standard iterators.
- * It may be more efficient to always provide an iterator to avoid
- * opening and closing one for each string.
- * The standard titlecase iterator for the root locale implements the
- * algorithm of Unicode TR 21.
- *
- * This function uses only the setText(), first() and next() methods of the
- * provided break iterator.
- *
- * The result may be longer or shorter than the original.
- * The source string and the destination buffer are allowed to overlap.
- *
- * @param dest      A buffer for the result string. The result will be zero-terminated if
- *                  the buffer is large enough.
- * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
- *                  dest may be NULL and the function will only return the length of the result
- *                  without writing any of the result string.
- * @param src       The original string
- * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
- * @param titleIter A break iterator to find the first characters of words
- *                  that are to be titlecased.
- *                  If none is provided (NULL), then a standard titlecase
- *                  break iterator is opened.
- * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                  which must not indicate a failure before the function call.
- * @return The length of the result string. It may be greater than destCapacity. In that case,
- *         only some of the result was written to the destination buffer.
- * @stable ICU 2.1
- */
-U_STABLE int32_t U_EXPORT2
-u_strToTitle(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             UBreakIterator *titleIter,
-             const char *locale,
-             UErrorCode *pErrorCode);
-
-#endif
-
-/**
- * Case-fold the characters in a string.
- * Case-folding is locale-independent and not context-sensitive,
- * but there is an option for whether to include or exclude mappings for dotted I
- * and dotless i that are marked with 'I' in CaseFolding.txt.
- * The result may be longer or shorter than the original.
- * The source string and the destination buffer are allowed to overlap.
- *
- * @param dest      A buffer for the result string. The result will be zero-terminated if
- *                  the buffer is large enough.
- * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
- *                  dest may be NULL and the function will only return the length of the result
- *                  without writing any of the result string.
- * @param src       The original string
- * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
- * @param options   Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                  which must not indicate a failure before the function call.
- * @return The length of the result string. It may be greater than destCapacity. In that case,
- *         only some of the result was written to the destination buffer.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2
-u_strFoldCase(UChar *dest, int32_t destCapacity,
-              const UChar *src, int32_t srcLength,
-              uint32_t options,
-              UErrorCode *pErrorCode);
-
-#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
-/**
- * Converts a sequence of UChars to wchar_t units.
- *
- * @param dest          A buffer for the result string. The result will be zero-terminated if
- *                      the buffer is large enough.
- * @param destCapacity  The size of the buffer (number of wchar_t's). If it is 0, then
- *                      dest may be NULL and the function will only return the length of the 
- *                      result without writing any of the result string (pre-flighting).
- * @param pDestLength   A pointer to receive the number of units written to the destination. If 
- *                      pDestLength!=NULL then *pDestLength is always set to the 
- *                      number of output units corresponding to the transformation of 
- *                      all the input units, even in case of a buffer overflow.
- * @param src           The original source string
- * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
- * @param pErrorCode    Must be a valid pointer to an error code value,
- *                      which must not indicate a failure before the function call.
- * @return The pointer to destination buffer.
- * @stable ICU 2.0
- */
-U_STABLE wchar_t* U_EXPORT2
-u_strToWCS(wchar_t *dest, 
-           int32_t destCapacity,
-           int32_t *pDestLength,
-           const UChar *src, 
-           int32_t srcLength,
-           UErrorCode *pErrorCode);
-/**
- * Converts a sequence of wchar_t units to UChars
- *
- * @param dest          A buffer for the result string. The result will be zero-terminated if
- *                      the buffer is large enough.
- * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
- *                      dest may be NULL and the function will only return the length of the 
- *                      result without writing any of the result string (pre-flighting).
- * @param pDestLength   A pointer to receive the number of units written to the destination. If 
- *                      pDestLength!=NULL then *pDestLength is always set to the 
- *                      number of output units corresponding to the transformation of 
- *                      all the input units, even in case of a buffer overflow.
- * @param src           The original source string
- * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
- * @param pErrorCode    Must be a valid pointer to an error code value,
- *                      which must not indicate a failure before the function call.
- * @return The pointer to destination buffer.
- * @stable ICU 2.0
- */
-U_STABLE UChar* U_EXPORT2
-u_strFromWCS(UChar   *dest,
-             int32_t destCapacity, 
-             int32_t *pDestLength,
-             const wchar_t *src,
-             int32_t srcLength,
-             UErrorCode *pErrorCode);
-#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
-
-/**
- * Converts a sequence of UChars (UTF-16) to UTF-8 bytes
- *
- * @param dest          A buffer for the result string. The result will be zero-terminated if
- *                      the buffer is large enough.
- * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
- *                      dest may be NULL and the function will only return the length of the 
- *                      result without writing any of the result string (pre-flighting).
- * @param pDestLength   A pointer to receive the number of units written to the destination. If 
- *                      pDestLength!=NULL then *pDestLength is always set to the 
- *                      number of output units corresponding to the transformation of 
- *                      all the input units, even in case of a buffer overflow.
- * @param src           The original source string
- * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
- * @param pErrorCode    Must be a valid pointer to an error code value,
- *                      which must not indicate a failure before the function call.
- * @return The pointer to destination buffer.
- * @stable ICU 2.0
- * @see u_strToUTF8WithSub
- * @see u_strFromUTF8
- */
-U_STABLE char* U_EXPORT2 
-u_strToUTF8(char *dest,           
-            int32_t destCapacity,
-            int32_t *pDestLength,
-            const UChar *src, 
-            int32_t srcLength,
-            UErrorCode *pErrorCode);
-
-/**
- * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
- *
- * @param dest          A buffer for the result string. The result will be zero-terminated if
- *                      the buffer is large enough.
- * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
- *                      dest may be NULL and the function will only return the length of the 
- *                      result without writing any of the result string (pre-flighting).
- * @param pDestLength   A pointer to receive the number of units written to the destination. If 
- *                      pDestLength!=NULL then *pDestLength is always set to the 
- *                      number of output units corresponding to the transformation of 
- *                      all the input units, even in case of a buffer overflow.
- * @param src           The original source string
- * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
- * @param pErrorCode    Must be a valid pointer to an error code value,
- *                      which must not indicate a failure before the function call.
- * @return The pointer to destination buffer.
- * @stable ICU 2.0
- * @see u_strFromUTF8WithSub
- * @see u_strFromUTF8Lenient
- */
-U_STABLE UChar* U_EXPORT2
-u_strFromUTF8(UChar *dest,             
-              int32_t destCapacity,
-              int32_t *pDestLength,
-              const char *src, 
-              int32_t srcLength,
-              UErrorCode *pErrorCode);
-
-/**
- * Converts a sequence of UChars (UTF-16) to UTF-8 bytes.
- * Same as u_strToUTF8() except for the additional subchar which is output for
- * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
- * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
- *
- * @param dest          A buffer for the result string. The result will be zero-terminated if
- *                      the buffer is large enough.
- * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
- *                      dest may be NULL and the function will only return the length of the 
- *                      result without writing any of the result string (pre-flighting).
- * @param pDestLength   A pointer to receive the number of units written to the destination. If 
- *                      pDestLength!=NULL then *pDestLength is always set to the 
- *                      number of output units corresponding to the transformation of 
- *                      all the input units, even in case of a buffer overflow.
- * @param src           The original source string
- * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
- * @param subchar       The substitution character to use in place of an illegal input sequence,
- *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
- *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
- *                      except for surrogate code points (U+D800..U+DFFF).
- *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
- * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
- *                      Set to 0 if no substitutions occur or subchar<0.
- *                      pNumSubstitutions can be NULL.
- * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
- *                      pass the U_SUCCESS() test, or else the function returns
- *                      immediately. Check for U_FAILURE() on output or use with
- *                      function chaining. (See User Guide for details.)
- * @return The pointer to destination buffer.
- * @see u_strToUTF8
- * @see u_strFromUTF8WithSub
- * @stable ICU 3.6
- */
-U_STABLE char* U_EXPORT2
-u_strToUTF8WithSub(char *dest,
-            int32_t destCapacity,
-            int32_t *pDestLength,
-            const UChar *src,
-            int32_t srcLength,
-            UChar32 subchar, int32_t *pNumSubstitutions,
-            UErrorCode *pErrorCode);
-
-/**
- * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
- * Same as u_strFromUTF8() except for the additional subchar which is output for
- * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
- * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
- *
- * @param dest          A buffer for the result string. The result will be zero-terminated if
- *                      the buffer is large enough.
- * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
- *                      dest may be NULL and the function will only return the length of the 
- *                      result without writing any of the result string (pre-flighting).
- * @param pDestLength   A pointer to receive the number of units written to the destination. If 
- *                      pDestLength!=NULL then *pDestLength is always set to the 
- *                      number of output units corresponding to the transformation of 
- *                      all the input units, even in case of a buffer overflow.
- * @param src           The original source string
- * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
- * @param subchar       The substitution character to use in place of an illegal input sequence,
- *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
- *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
- *                      except for surrogate code points (U+D800..U+DFFF).
- *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
- * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
- *                      Set to 0 if no substitutions occur or subchar<0.
- *                      pNumSubstitutions can be NULL.
- * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
- *                      pass the U_SUCCESS() test, or else the function returns
- *                      immediately. Check for U_FAILURE() on output or use with
- *                      function chaining. (See User Guide for details.)
- * @return The pointer to destination buffer.
- * @see u_strFromUTF8
- * @see u_strFromUTF8Lenient
- * @see u_strToUTF8WithSub
- * @stable ICU 3.6
- */
-U_STABLE UChar* U_EXPORT2
-u_strFromUTF8WithSub(UChar *dest,
-              int32_t destCapacity,
-              int32_t *pDestLength,
-              const char *src,
-              int32_t srcLength,
-              UChar32 subchar, int32_t *pNumSubstitutions,
-              UErrorCode *pErrorCode);
-
-/**
- * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
- * Same as u_strFromUTF8() except that this function is designed to be very fast,
- * which it achieves by being lenient about malformed UTF-8 sequences.
- * This function is intended for use in environments where UTF-8 text is
- * expected to be well-formed.
- *
- * Its semantics are:
- * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
- * - The function will not read beyond the input string, nor write beyond
- *   the destCapacity.
- * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
- *   be well-formed UTF-16.
- *   The function will resynchronize to valid code point boundaries
- *   within a small number of code points after an illegal sequence.
- * - Non-shortest forms are not detected and will result in "spoofing" output.
- *
- * For further performance improvement, if srcLength is given (>=0),
- * then it must be destCapacity>=srcLength.
- *
- * @param dest          A buffer for the result string. The result will be zero-terminated if
- *                      the buffer is large enough.
- * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
- *                      dest may be NULL and the function will only return the length of the 
- *                      result without writing any of the result string (pre-flighting).
- *                      Unlike for other ICU functions, if srcLength>=0 then it
- *                      must be destCapacity>=srcLength.
- * @param pDestLength   A pointer to receive the number of units written to the destination. If 
- *                      pDestLength!=NULL then *pDestLength is always set to the 
- *                      number of output units corresponding to the transformation of 
- *                      all the input units, even in case of a buffer overflow.
- *                      Unlike for other ICU functions, if srcLength>=0 but
- *                      destCapacity<srcLength, then *pDestLength will be set to srcLength
- *                      (and U_BUFFER_OVERFLOW_ERROR will be set)
- *                      regardless of the actual result length.
- * @param src           The original source string
- * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
- * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
- *                      pass the U_SUCCESS() test, or else the function returns
- *                      immediately. Check for U_FAILURE() on output or use with
- *                      function chaining. (See User Guide for details.)
- * @return The pointer to destination buffer.
- * @see u_strFromUTF8
- * @see u_strFromUTF8WithSub
- * @see u_strToUTF8WithSub
- * @stable ICU 3.6
- */
-U_STABLE UChar * U_EXPORT2
-u_strFromUTF8Lenient(UChar *dest,
-                     int32_t destCapacity,
-                     int32_t *pDestLength,
-                     const char *src,
-                     int32_t srcLength,
-                     UErrorCode *pErrorCode);
-
-/**
- * Converts a sequence of UChars (UTF-16) to UTF32 units.
- *
- * @param dest          A buffer for the result string. The result will be zero-terminated if
- *                      the buffer is large enough.
- * @param destCapacity  The size of the buffer (number of UChar32s). If it is 0, then
- *                      dest may be NULL and the function will only return the length of the 
- *                      result without writing any of the result string (pre-flighting).
- * @param pDestLength   A pointer to receive the number of units written to the destination. If 
- *                      pDestLength!=NULL then *pDestLength is always set to the 
- *                      number of output units corresponding to the transformation of 
- *                      all the input units, even in case of a buffer overflow.
- * @param src           The original source string
- * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
- * @param pErrorCode    Must be a valid pointer to an error code value,
- *                      which must not indicate a failure before the function call.
- * @return The pointer to destination buffer.
- * @stable ICU 2.0
- */
-U_STABLE UChar32* U_EXPORT2 
-u_strToUTF32(UChar32 *dest, 
-             int32_t  destCapacity,
-             int32_t  *pDestLength,
-             const UChar *src, 
-             int32_t  srcLength,
-             UErrorCode *pErrorCode);
-
-/**
- * Converts a sequence of UTF32 units to UChars (UTF-16)
- *
- * @param dest          A buffer for the result string. The result will be zero-terminated if
- *                      the buffer is large enough.
- * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
- *                      dest may be NULL and the function will only return the length of the 
- *                      result without writing any of the result string (pre-flighting).
- * @param pDestLength   A pointer to receive the number of units written to the destination. If 
- *                      pDestLength!=NULL then *pDestLength is always set to the 
- *                      number of output units corresponding to the transformation of 
- *                      all the input units, even in case of a buffer overflow.
- * @param src           The original source string
- * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
- * @param pErrorCode    Must be a valid pointer to an error code value,
- *                      which must not indicate a failure before the function call.
- * @return The pointer to destination buffer.
- * @stable ICU 2.0
- */
-U_STABLE UChar* U_EXPORT2 
-u_strFromUTF32(UChar   *dest,
-               int32_t destCapacity, 
-               int32_t *pDestLength,
-               const UChar32 *src,
-               int32_t srcLength,
-               UErrorCode *pErrorCode);
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/ustring.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/ustring.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/ustring.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/ustring.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1479 @@
+/*
+**********************************************************************
+*   Copyright (C) 1998-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+* File ustring.h
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   12/07/98    bertrand    Creation.
+******************************************************************************
+*/
+
+#ifndef USTRING_H
+#define USTRING_H
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uiter.h"
+
+/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+#   define UBRK_TYPEDEF_UBREAK_ITERATOR
+    typedef void UBreakIterator;
+#endif
+
+/**
+ * \file
+ * \brief C API: Unicode string handling functions
+ *
+ * These C API functions provide general Unicode string handling.
+ *
+ * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
+ * functions. (For example, they do not check for bad arguments like NULL string pointers.)
+ * In some cases, only the thread-safe variant of such a function is implemented here
+ * (see u_strtok_r()).
+ *
+ * Other functions provide more Unicode-specific functionality like locale-specific
+ * upper/lower-casing and string comparison in code point order.
+ *
+ * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
+ * UTF-16 encodes each Unicode code point with either one or two UChar code units.
+ * (This is the default form of Unicode, and a forward-compatible extension of the original,
+ * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
+ * in 1996.)
+ *
+ * Some APIs accept a 32-bit UChar32 value for a single code point.
+ *
+ * ICU also handles 16-bit Unicode text with unpaired surrogates.
+ * Such text is not well-formed UTF-16.
+ * Code-point-related functions treat unpaired surrogates as surrogate code points,
+ * i.e., as separate units.
+ *
+ * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
+ * it is much more efficient even for random access because the code unit values
+ * for single-unit characters vs. lead units vs. trail units are completely disjoint.
+ * This means that it is easy to determine character (code point) boundaries from
+ * random offsets in the string.
+ *
+ * Unicode (UTF-16) string processing is optimized for the single-unit case.
+ * Although it is important to support supplementary characters
+ * (which use pairs of lead/trail code units called "surrogates"),
+ * their occurrence is rare. Almost all characters in modern use require only
+ * a single UChar code unit (i.e., their code point values are <=0xffff).
+ *
+ * For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html).
+ * For a discussion of the handling of unpaired surrogates see also
+ * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
+ */
+
+/**
+ * \defgroup ustring_ustrlen String Length
+ * \ingroup ustring_strlen
+ */
+/*@{*/
+/**
+ * Determine the length of an array of UChar.
+ *
+ * @param s The array of UChars, NULL (U+0000) terminated.
+ * @return The number of UChars in <code>chars</code>, minus the terminator.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strlen(const UChar *s);
+/*@}*/
+
+/**
+ * Count Unicode code points in the length UChar code units of the string.
+ * A code point may occupy either one or two UChar code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
+ *
+ * @param s The input string.
+ * @param length The number of UChar code units to be checked, or -1 to count all
+ *               code points before the first NUL (U+0000).
+ * @return The number of code points in the specified code units.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_countChar32(const UChar *s, int32_t length);
+
+/**
+ * Check if the string contains more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in the entire string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length is known
+ * (not -1 for NUL-termination) and falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (u_countChar32(s, length)>number).
+ * A Unicode code point may occupy either one or two UChar code units.
+ *
+ * @param s The input string.
+ * @param length The length of the string, or -1 if it is NUL-terminated.
+ * @param number The number of code points in the string is compared against
+ *               the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ *         than 'number'. Same as (u_countChar32(s, length)>number).
+ * @stable ICU 2.4
+ */
+U_STABLE UBool U_EXPORT2
+u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
+
+/**
+ * Concatenate two ustrings.  Appends a copy of <code>src</code>,
+ * including the null terminator, to <code>dst</code>. The initial copied
+ * character from <code>src</code> overwrites the null terminator in <code>dst</code>.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strcat(UChar     *dst, 
+    const UChar     *src);
+
+/**
+ * Concatenate two ustrings.  
+ * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
+ * Adds a terminating NUL.
+ * If src is too long, then only <code>n-1</code> characters will be copied
+ * before the terminating NUL.
+ * If <code>n&lt;=0</code> then dst is not modified.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to compare.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strncat(UChar     *dst, 
+     const UChar     *src, 
+     int32_t     n);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ *         or <code>s</code> itself if the <code>substring</code> is empty,
+ *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strrstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ *         or <code>s</code> itself if the <code>substring</code> is empty,
+ *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar * U_EXPORT2
+u_strchr(const UChar *s, UChar c);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar * U_EXPORT2
+u_strchr32(const UChar *s, UChar32 c);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ *         or <code>s</code> itself if the <code>substring</code> is empty,
+ *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ *         or <code>s</code> itself if the <code>substring</code> is empty,
+ *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrchr(const UChar *s, UChar c);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memchr32
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_STABLE UChar * U_EXPORT2
+u_strrchr32(const UChar *s, UChar32 c);
+
+/**
+ * Locates the first occurrence in the string <code>string</code> of any of the characters
+ * in the string <code>matchSet</code>.
+ * Works just like C's strpbrk but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ *                 for which to search in the text string.
+ * @return A pointer to the  character in <code>string</code> that matches one of the
+ *         characters in <code>matchSet</code>, or NULL if no such character is found.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar * U_EXPORT2
+u_strpbrk(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in <code>string</code>,
+ * beginning with the first, that do not occur somewhere in <code>matchSet</code>.
+ * Works just like C's strcspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ *                 for which to search in the text string.
+ * @return The number of initial characters in <code>string</code> that do not
+ *         occur in <code>matchSet</code>.
+ * @see u_strspn
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in <code>string</code>,
+ * beginning with the first, that occur somewhere in <code>matchSet</code>.
+ * Works just like C's strspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ *                 for which to search in the text string.
+ * @return The number of initial characters in <code>string</code> that do
+ *         occur in <code>matchSet</code>.
+ * @see u_strcspn
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * The string tokenizer API allows an application to break a string into
+ * tokens. Unlike strtok(), the saveState (the current pointer within the
+ * original string) is maintained in saveState. In the first call, the
+ * argument src is a pointer to the string. In subsequent calls to
+ * return successive tokens of that string, src must be specified as
+ * NULL. The value saveState is set by this function to maintain the
+ * function's position within the string, and on each subsequent call
+ * you must give this argument the same variable. This function does
+ * handle surrogate pairs. This function is similar to the strtok_r()
+ * the POSIX Threads Extension (1003.1c-1995) version.
+ *
+ * @param src String containing token(s). This string will be modified.
+ *            After the first call to u_strtok_r(), this argument must
+ *            be NULL to get to the next token.
+ * @param delim Set of delimiter characters (Unicode code points).
+ * @param saveState The current pointer within the original string,
+ *              which is set by this function. The saveState
+ *              parameter should the address of a local variable of type
+ *              UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
+ *              &myLocalSaveState for this parameter).
+ * @return A pointer to the next token found in src, or NULL
+ *         when there are no more tokens.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar * U_EXPORT2
+u_strtok_r(UChar    *src, 
+     const UChar    *delim,
+           UChar   **saveState);
+
+/**
+ * Compare two Unicode strings for bitwise equality (code unit order).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
+ * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
+ * value if <code>s1</code> is bitwise greater than <code>s2</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t  U_EXPORT2
+u_strcmp(const UChar     *s1, 
+         const UChar     *s2);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * See u_strCompare for details.
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
+
+/**
+ * Compare two Unicode strings (binary order).
+ *
+ * The comparison can be done in code unit order or in code point order.
+ * They differ only in UTF-16 when
+ * comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param codePointOrder Choose between code unit order (FALSE)
+ *                       and code point order (TRUE).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCompare(const UChar *s1, int32_t length1,
+             const UChar *s2, int32_t length2,
+             UBool codePointOrder);
+
+/**
+ * Compare two Unicode strings (binary order)
+ * as presented by UCharIterator objects.
+ * Works otherwise just like u_strCompare().
+ *
+ * Both iterators are reset to their start positions.
+ * When the function returns, it is undefined where the iterators
+ * have stopped.
+ *
+ * @param iter1 First source string iterator.
+ * @param iter2 Second source string iterator.
+ * @param codePointOrder Choose between code unit order (FALSE)
+ *                       and code point order (TRUE).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see u_strCompare
+ *
+ * @stable ICU 2.6
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also unistr.h and unorm.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER  0x8000
+#endif
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to
+ *   u_strCompare(u_strFoldCase(s1, options),
+ *                u_strFoldCase(s2, options),
+ *                (options&U_COMPARE_CODE_POINT_ORDER)!=0).
+ *
+ * The comparison can be done in UTF-16 code unit order or in code point order.
+ * They differ only when comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ *     Comparison in code unit order with default case folding.
+ *
+ *   - U_COMPARE_CODE_POINT_ORDER
+ *     Set to choose code point order instead of code unit order
+ *     (see u_strCompare for details).
+ *
+ *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_STABLE int32_t U_EXPORT2
+u_strCaseCompare(const UChar *s1, int32_t length1,
+                 const UChar *s2, int32_t length2,
+                 uint32_t options,
+                 UErrorCode *pErrorCode);
+
+/**
+ * Compare two ustrings for bitwise equality. 
+ * Compares at most <code>n</code> characters.
+ *
+ * @param ucs1 A string to compare.
+ * @param ucs2 A string to compare.
+ * @param n The maximum number of characters to compare.
+ * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
+ * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
+ * value if <code>s1</code> is bitwise greater than <code>s2</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncmp(const UChar     *ucs1, 
+     const UChar     *ucs2, 
+     int32_t     n);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param options A bit set of options:
+ *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ *     Comparison in code unit order with default case folding.
+ *
+ *   - U_COMPARE_CODE_POINT_ORDER
+ *     Set to choose code point order instead of code unit order
+ *     (see u_strCompare for details).
+ *
+ *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
+ * u_strFoldCase(s2, at most n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters each string to case-fold and then compare.
+ * @param options A bit set of options:
+ *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ *     Comparison in code unit order with default case folding.
+ *
+ *   - U_COMPARE_CODE_POINT_ORDER
+ *     Set to choose code point order instead of code unit order
+ *     (see u_strCompare for details).
+ *
+ *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
+ * u_strFoldCase(s2, n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param length The number of characters in each string to case-fold and then compare.
+ * @param options A bit set of options:
+ *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ *     Comparison in code unit order with default case folding.
+ *
+ *   - U_COMPARE_CODE_POINT_ORDER
+ *     Set to choose code point order instead of code unit order
+ *     (see u_strCompare for details).
+ *
+ *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
+
+/**
+ * Copy a ustring. Adds a null terminator.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strcpy(UChar     *dst, 
+    const UChar     *src);
+
+/**
+ * Copy a ustring.
+ * Copies at most <code>n</code> characters.  The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strncpy(UChar     *dst, 
+     const UChar     *src, 
+     int32_t     n);
+
+#if !UCONFIG_NO_CONVERSION
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Adds a null terminator.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
+               const char *src );
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Copies at most <code>n</code> characters.  The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
+            const char *src,
+            int32_t n);
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Adds a null terminator.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
+            const UChar *src );
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Copies at most <code>n</code> characters.  The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
+            const UChar *src,
+            int32_t n );
+
+#endif
+
+/**
+ * Synonym for memcpy(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string
+ * @param count The number of characters to copy
+ * @return A pointer to <code>dest</code>
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memcpy(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Synonym for memmove(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string
+ * @param count The number of characters to move
+ * @return A pointer to <code>dest</code>
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memmove(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
+ *
+ * @param dest The destination string.
+ * @param c The character to initialize the string.
+ * @param count The maximum number of characters to set.
+ * @return A pointer to <code>dest</code>.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_memset(UChar *dest, UChar c, int32_t count);
+
+/**
+ * Compare the first <code>count</code> UChars of each buffer.
+ *
+ * @param buf1 The first string to compare.
+ * @param buf2 The second string to compare.
+ * @param count The maximum number of UChars to compare.
+ * @return When buf1 < buf2, a negative number is returned.
+ *      When buf1 == buf2, 0 is returned.
+ *      When buf1 > buf2, a positive number is returned.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param count The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strFindFirst
+ */
+U_STABLE UChar* U_EXPORT2
+u_memchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strFindFirst
+ */
+U_STABLE UChar* U_EXPORT2
+u_memchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memrchr32
+ * @see u_strFindLast
+ */
+U_STABLE UChar* U_EXPORT2
+u_memrchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strFindLast
+ */
+U_STABLE UChar* U_EXPORT2
+u_memrchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Unicode String literals in C.
+ * We need one macro to declare a variable for the string
+ * and to statically preinitialize it if possible,
+ * and a second macro to dynamically intialize such a string variable if necessary.
+ *
+ * The macros are defined for maximum performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * A pair of macros for a single string must be used with the same
+ * parameters.
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * <code>NUL</code>, must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ *
+ * Usage:
+ * <pre>
+ *    U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
+ *    U_STRING_DECL(ustringVar2, "jumps 5%", 8);
+ *    static UBool didInit=FALSE;
+ * 
+ *    int32_t function() {
+ *        if(!didInit) {
+ *            U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
+ *            U_STRING_INIT(ustringVar2, "jumps 5%", 8);
+ *            didInit=TRUE;
+ *        }
+ *        return u_strcmp(ustringVar1, ustringVar2);
+ *    }
+ * </pre>
+ * @stable ICU 2.0
+ */
+#if defined(U_DECLARE_UTF16)
+#   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=U_DECLARE_UTF16(cs)
+    /**@stable ICU 2.0 */
+#   define U_STRING_INIT(var, cs, length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+#   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
+    /**@stable ICU 2.0 */
+#   define U_STRING_INIT(var, cs, length)
+#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
+#   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
+    /**@stable ICU 2.0 */
+#   define U_STRING_INIT(var, cs, length)
+#else
+#   define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
+    /**@stable ICU 2.0 */
+#   define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
+#endif
+
+/**
+ * Unescape a string of characters and write the resulting
+ * Unicode characters to the destination buffer.  The following escape
+ * sequences are recognized:
+ *
+ * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
+ * \\Uhhhhhhhh   8 hex digits
+ * \\xhh         1-2 hex digits
+ * \\x{h...}     1-8 hex digits
+ * \\ooo         1-3 octal digits; o in [0-7]
+ * \\cX          control-X; X is masked with 0x1F
+ *
+ * as well as the standard ANSI C escapes:
+ *
+ * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+ * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+ * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+ *
+ * Anything else following a backslash is generically escaped.  For
+ * example, "[a\\-z]" returns "[a-z]".
+ *
+ * If an escape sequence is ill-formed, this method returns an empty
+ * string.  An example of an ill-formed sequence is "\\u" followed by
+ * fewer than 4 hex digits.
+ *
+ * The above characters are recognized in the compiler's codepage,
+ * that is, they are coded as 'u', '\\', etc.  Characters that are
+ * not parts of escape sequences are converted using u_charsToUChars().
+ *
+ * This function is similar to UnicodeString::unescape() but not
+ * identical to it.  The latter takes a source UnicodeString, so it
+ * does escape recognition but no conversion.
+ *
+ * @param src a zero-terminated string of invariant characters
+ * @param dest pointer to buffer to receive converted and unescaped
+ * text and, if there is room, a zero terminator.  May be NULL for
+ * preflighting, in which case no UChars will be written, but the
+ * return value will still be valid.  On error, an empty string is
+ * stored here (if possible).
+ * @param destCapacity the number of UChars that may be written at
+ * dest.  Ignored if dest == NULL.
+ * @return the length of unescaped string.
+ * @see u_unescapeAt
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_unescape(const char *src,
+           UChar *dest, int32_t destCapacity);
+
+U_CDECL_BEGIN
+/**
+ * Callback function for u_unescapeAt() that returns a character of
+ * the source text given an offset and a context pointer.  The context
+ * pointer will be whatever is passed into u_unescapeAt().
+ *
+ * @param offset pointer to the offset that will be passed to u_unescapeAt().
+ * @param context an opaque pointer passed directly into u_unescapeAt()
+ * @return the character represented by the escape sequence at
+ * offset
+ * @see u_unescapeAt
+ * @stable ICU 2.0
+ */
+typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
+U_CDECL_END
+
+/**
+ * Unescape a single sequence. The character at offset-1 is assumed
+ * (without checking) to be a backslash.  This method takes a callback
+ * pointer to a function that returns the UChar at a given offset.  By
+ * varying this callback, ICU functions are able to unescape char*
+ * strings, UnicodeString objects, and UFILE pointers.
+ *
+ * If offset is out of range, or if the escape sequence is ill-formed,
+ * (UChar32)0xFFFFFFFF is returned.  See documentation of u_unescape()
+ * for a list of recognized sequences.
+ *
+ * @param charAt callback function that returns a UChar of the source
+ * text given an offset and a context pointer.
+ * @param offset pointer to the offset that will be passed to charAt.
+ * The offset value will be updated upon return to point after the
+ * last parsed character of the escape sequence.  On error the offset
+ * is unchanged.
+ * @param length the number of characters in the source text.  The
+ * last character of the source text is considered to be at offset
+ * length-1.
+ * @param context an opaque pointer passed directly into charAt.
+ * @return the character represented by the escape sequence at
+ * offset, or (UChar32)0xFFFFFFFF on error.
+ * @see u_unescape()
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32 U_EXPORT2
+u_unescapeAt(UNESCAPE_CHAR_AT charAt,
+             int32_t *offset,
+             int32_t length,
+             void *context);
+
+/**
+ * Uppercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest      A buffer for the result string. The result will be zero-terminated if
+ *                  the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ *         only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToUpper(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             const char *locale,
+             UErrorCode *pErrorCode);
+
+/**
+ * Lowercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest      A buffer for the result string. The result will be zero-terminated if
+ *                  the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ *         only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToLower(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             const char *locale,
+             UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest      A buffer for the result string. The result will be zero-terminated if
+ *                  the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param titleIter A break iterator to find the first characters of words
+ *                  that are to be titlecased.
+ *                  If none is provided (NULL), then a standard titlecase
+ *                  break iterator is opened.
+ * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ *         only some of the result was written to the destination buffer.
+ * @stable ICU 2.1
+ */
+U_STABLE int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             UBreakIterator *titleIter,
+             const char *locale,
+             UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-fold the characters in a string.
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'I' in CaseFolding.txt.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest      A buffer for the result string. The result will be zero-terminated if
+ *                  the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param options   Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ *         only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+u_strFoldCase(UChar *dest, int32_t destCapacity,
+              const UChar *src, int32_t srcLength,
+              uint32_t options,
+              UErrorCode *pErrorCode);
+
+#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
+/**
+ * Converts a sequence of UChars to wchar_t units.
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of wchar_t's). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE wchar_t* U_EXPORT2
+u_strToWCS(wchar_t *dest, 
+           int32_t destCapacity,
+           int32_t *pDestLength,
+           const UChar *src, 
+           int32_t srcLength,
+           UErrorCode *pErrorCode);
+/**
+ * Converts a sequence of wchar_t units to UChars
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromWCS(UChar   *dest,
+             int32_t destCapacity, 
+             int32_t *pDestLength,
+             const wchar_t *src,
+             int32_t srcLength,
+             UErrorCode *pErrorCode);
+#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
+
+/**
+ * Converts a sequence of UChars (UTF-16) to UTF-8 bytes
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ * @see u_strToUTF8WithSub
+ * @see u_strFromUTF8
+ */
+U_STABLE char* U_EXPORT2 
+u_strToUTF8(char *dest,           
+            int32_t destCapacity,
+            int32_t *pDestLength,
+            const UChar *src, 
+            int32_t srcLength,
+            UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ * @see u_strFromUTF8WithSub
+ * @see u_strFromUTF8Lenient
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF8(UChar *dest,             
+              int32_t destCapacity,
+              int32_t *pDestLength,
+              const char *src, 
+              int32_t srcLength,
+              UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UChars (UTF-16) to UTF-8 bytes.
+ * Same as u_strToUTF8() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar       The substitution character to use in place of an illegal input sequence,
+ *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ *                      except for surrogate code points (U+D800..U+DFFF).
+ *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ *                      Set to 0 if no substitutions occur or subchar<0.
+ *                      pNumSubstitutions can be NULL.
+ * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
+ *                      pass the U_SUCCESS() test, or else the function returns
+ *                      immediately. Check for U_FAILURE() on output or use with
+ *                      function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF8
+ * @see u_strFromUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_STABLE char* U_EXPORT2
+u_strToUTF8WithSub(char *dest,
+            int32_t destCapacity,
+            int32_t *pDestLength,
+            const UChar *src,
+            int32_t srcLength,
+            UChar32 subchar, int32_t *pNumSubstitutions,
+            UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
+ * Same as u_strFromUTF8() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar       The substitution character to use in place of an illegal input sequence,
+ *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ *                      except for surrogate code points (U+D800..U+DFFF).
+ *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ *                      Set to 0 if no substitutions occur or subchar<0.
+ *                      pNumSubstitutions can be NULL.
+ * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
+ *                      pass the U_SUCCESS() test, or else the function returns
+ *                      immediately. Check for U_FAILURE() on output or use with
+ *                      function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8
+ * @see u_strFromUTF8Lenient
+ * @see u_strToUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_STABLE UChar* U_EXPORT2
+u_strFromUTF8WithSub(UChar *dest,
+              int32_t destCapacity,
+              int32_t *pDestLength,
+              const char *src,
+              int32_t srcLength,
+              UChar32 subchar, int32_t *pNumSubstitutions,
+              UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
+ * Same as u_strFromUTF8() except that this function is designed to be very fast,
+ * which it achieves by being lenient about malformed UTF-8 sequences.
+ * This function is intended for use in environments where UTF-8 text is
+ * expected to be well-formed.
+ *
+ * Its semantics are:
+ * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
+ * - The function will not read beyond the input string, nor write beyond
+ *   the destCapacity.
+ * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
+ *   be well-formed UTF-16.
+ *   The function will resynchronize to valid code point boundaries
+ *   within a small number of code points after an illegal sequence.
+ * - Non-shortest forms are not detected and will result in "spoofing" output.
+ *
+ * For further performance improvement, if srcLength is given (>=0),
+ * then it must be destCapacity>=srcLength.
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ *                      Unlike for other ICU functions, if srcLength>=0 then it
+ *                      must be destCapacity>=srcLength.
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ *                      Unlike for other ICU functions, if srcLength>=0 but
+ *                      destCapacity<srcLength, then *pDestLength will be set to srcLength
+ *                      (and U_BUFFER_OVERFLOW_ERROR will be set)
+ *                      regardless of the actual result length.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
+ *                      pass the U_SUCCESS() test, or else the function returns
+ *                      immediately. Check for U_FAILURE() on output or use with
+ *                      function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8
+ * @see u_strFromUTF8WithSub
+ * @see u_strToUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_STABLE UChar * U_EXPORT2
+u_strFromUTF8Lenient(UChar *dest,
+                     int32_t destCapacity,
+                     int32_t *pDestLength,
+                     const char *src,
+                     int32_t srcLength,
+                     UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UChars (UTF-16) to UTF32 units.
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChar32s). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar32* U_EXPORT2 
+u_strToUTF32(UChar32 *dest, 
+             int32_t  destCapacity,
+             int32_t  *pDestLength,
+             const UChar *src, 
+             int32_t  srcLength,
+             UErrorCode *pErrorCode);
+
+/**
+ * Converts a sequence of UTF32 units to UChars (UTF-16)
+ *
+ * @param dest          A buffer for the result string. The result will be zero-terminated if
+ *                      the buffer is large enough.
+ * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
+ *                      dest may be NULL and the function will only return the length of the 
+ *                      result without writing any of the result string (pre-flighting).
+ * @param pDestLength   A pointer to receive the number of units written to the destination. If 
+ *                      pDestLength!=NULL then *pDestLength is always set to the 
+ *                      number of output units corresponding to the transformation of 
+ *                      all the input units, even in case of a buffer overflow.
+ * @param src           The original source string
+ * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode    Must be a valid pointer to an error code value,
+ *                      which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_STABLE UChar* U_EXPORT2 
+u_strFromUTF32(UChar   *dest,
+               int32_t destCapacity, 
+               int32_t *pDestLength,
+               const UChar32 *src,
+               int32_t srcLength,
+               UErrorCode *pErrorCode);
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/usystem.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/usystem.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/usystem.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,46 +0,0 @@
-/*
-*******************************************************************************
-*   Copyright (C) 2004-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*******************************************************************************
-*
-*   file name:  
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   Created by: genheaders.pl, a perl script written by Ram Viswanadha
-*
-*  Contains data for commenting out APIs.
-*  Gets included by umachine.h
-*
-*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
-*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
-*/
-
-#ifndef USYSTEM_H
-#define USYSTEM_H
-
-#ifdef U_HIDE_SYSTEM_API
-
-#    if U_DISABLE_RENAMING
-#        define u_cleanup u_cleanup_SYSTEM_API_DO_NOT_USE
-#        define u_setAtomicIncDecFunctions u_setAtomicIncDecFunctions_SYSTEM_API_DO_NOT_USE
-#        define u_setMemoryFunctions u_setMemoryFunctions_SYSTEM_API_DO_NOT_USE
-#        define u_setMutexFunctions u_setMutexFunctions_SYSTEM_API_DO_NOT_USE
-#        define ucnv_setDefaultName ucnv_setDefaultName_SYSTEM_API_DO_NOT_USE
-#        define uloc_getDefault uloc_getDefault_SYSTEM_API_DO_NOT_USE
-#        define uloc_setDefault uloc_setDefault_SYSTEM_API_DO_NOT_USE
-#    else
-#        define u_cleanup_4_0 u_cleanup_SYSTEM_API_DO_NOT_USE
-#        define u_setAtomicIncDecFunctions_4_0 u_setAtomicIncDecFunctions_SYSTEM_API_DO_NOT_USE
-#        define u_setMemoryFunctions_4_0 u_setMemoryFunctions_SYSTEM_API_DO_NOT_USE
-#        define u_setMutexFunctions_4_0 u_setMutexFunctions_SYSTEM_API_DO_NOT_USE
-#        define ucnv_setDefaultName_4_0 ucnv_setDefaultName_SYSTEM_API_DO_NOT_USE
-#        define uloc_getDefault_4_0 uloc_getDefault_SYSTEM_API_DO_NOT_USE
-#        define uloc_setDefault_4_0 uloc_setDefault_SYSTEM_API_DO_NOT_USE
-#    endif /* U_DISABLE_RENAMING */
-
-#endif /* U_HIDE_SYSTEM_API */
-#endif /* USYSTEM_H */
-

Copied: MacRuby/trunk/icu-1060/unicode/usystem.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/usystem.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/usystem.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/usystem.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,46 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2004-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*
+*   file name:  
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   Created by: genheaders.pl, a perl script written by Ram Viswanadha
+*
+*  Contains data for commenting out APIs.
+*  Gets included by umachine.h
+*
+*  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+*  YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef USYSTEM_H
+#define USYSTEM_H
+
+#ifdef U_HIDE_SYSTEM_API
+
+#    if U_DISABLE_RENAMING
+#        define u_cleanup u_cleanup_SYSTEM_API_DO_NOT_USE
+#        define u_setAtomicIncDecFunctions u_setAtomicIncDecFunctions_SYSTEM_API_DO_NOT_USE
+#        define u_setMemoryFunctions u_setMemoryFunctions_SYSTEM_API_DO_NOT_USE
+#        define u_setMutexFunctions u_setMutexFunctions_SYSTEM_API_DO_NOT_USE
+#        define ucnv_setDefaultName ucnv_setDefaultName_SYSTEM_API_DO_NOT_USE
+#        define uloc_getDefault uloc_getDefault_SYSTEM_API_DO_NOT_USE
+#        define uloc_setDefault uloc_setDefault_SYSTEM_API_DO_NOT_USE
+#    else
+#        define u_cleanup_4_0 u_cleanup_SYSTEM_API_DO_NOT_USE
+#        define u_setAtomicIncDecFunctions_4_0 u_setAtomicIncDecFunctions_SYSTEM_API_DO_NOT_USE
+#        define u_setMemoryFunctions_4_0 u_setMemoryFunctions_SYSTEM_API_DO_NOT_USE
+#        define u_setMutexFunctions_4_0 u_setMutexFunctions_SYSTEM_API_DO_NOT_USE
+#        define ucnv_setDefaultName_4_0 ucnv_setDefaultName_SYSTEM_API_DO_NOT_USE
+#        define uloc_getDefault_4_0 uloc_getDefault_SYSTEM_API_DO_NOT_USE
+#        define uloc_setDefault_4_0 uloc_setDefault_SYSTEM_API_DO_NOT_USE
+#    endif /* U_DISABLE_RENAMING */
+
+#endif /* U_HIDE_SYSTEM_API */
+#endif /* USYSTEM_H */
+

Deleted: MacRuby/trunk/icu-1060/unicode/utext.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/utext.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/utext.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1562 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 2004-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  utext.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2004oct06
-*   created by: Markus W. Scherer
-*/
-
-#ifndef __UTEXT_H__
-#define __UTEXT_H__
-
-/**
- * \file
- * \brief C API: Abstract Unicode Text API
- *
- * The Text Access API provides a means to allow text that is stored in alternative
- * formats to work with ICU services.  ICU normally operates on text that is
- * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type
- * UnicodeString for C++ APIs.
- *
- * ICU Text Access allows other formats, such as UTF-8 or non-contiguous
- * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services.
- *
- * There are three general classes of usage for UText:
- *
- *     Application Level Use.  This is the simplest usage - applications would
- *     use one of the utext_open() functions on their input text, and pass
- *     the resulting UText to the desired ICU service.
- *
- *     Second is usage in ICU Services, such as break iteration, that will need to
- *     operate on input presented to them as a UText.  These implementations
- *     will need to use the iteration and related UText functions to gain
- *     access to the actual text.
- *
- *     The third class of UText users are "text providers."  These are the
- *     UText implementations for the various text storage formats.  An application
- *     or system with a unique text storage format can implement a set of
- *     UText provider functions for that format, which will then allow
- *     ICU services to operate on that format.
- *
- *
- * <em>Iterating over text</em>
- *
- * Here is sample code for a forward iteration over the contents of a UText
- *
- * \code
- *    UChar32  c;
- *    UText    *ut = whatever();
- *
- *    for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) {
- *       // do whatever with the codepoint c here.
- *    }
- * \endcode
- *
- * And here is similar code to iterate in the reverse direction, from the end
- * of the text towards the beginning.
- *
- * \code
- *    UChar32  c;
- *    UText    *ut = whatever();
- *    int      textLength = utext_nativeLength(ut);
- *    for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) {
- *       // do whatever with the codepoint c here.
- *    }
- * \endcode
- *
- * <em>Characters and Indexing</em>
- *
- * Indexing into text by UText functions is nearly always in terms of the native
- * indexing of the underlying text storage.  The storage format could be UTF-8
- * or UTF-32, for example.  When coding to the UText access API, no assumptions
- * can be made regarding the size of characters, or how far an index
- * may move when iterating between characters.
- *
- * All indices supplied to UText functions are pinned to the length of the
- * text.  An out-of-bounds index is not considered to be an error, but is
- * adjusted to be in the range  0 <= index <= length of input text.
- *
- *
- * When an index position is returned from a UText function, it will be
- * a native index to the underlying text.  In the case of multi-unit characters,
- * it will  always refer to the first position of the character,
- * never to the interior.  This is essentially the same thing as saying that
- * a returned index will always point to a boundary between characters.
- *
- * When a native index is supplied to a UText function, all indices that
- * refer to any part of a multi-unit character representation are considered
- * to be equivalent.  In the case of multi-unit characters, an incoming index
- * will be logically normalized to refer to the start of the character.
- * 
- * It is possible to test whether a native index is on a code point boundary
- * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex().
- * If the index is returned unchanged, it was on a code point boundary.  If
- * an adjusted index is returned, the original index referred to the
- * interior of a character.
- *
- * <em>Conventions for calling UText functions</em>
- *
- * Most UText access functions have as their first parameter a (UText *) pointer,
- * which specifies the UText to be used.  Unless otherwise noted, the
- * pointer must refer to a valid, open UText.  Attempting to
- * use a closed UText or passing a NULL pointer is a programming error and
- * will produce undefined results or NULL pointer exceptions.
- * 
- * The UText_Open family of functions can either open an existing (closed)
- * UText, or heap allocate a new UText.  Here is sample code for creating
- * a stack-allocated UText.
- *
- * \code
- *    char     *s = whatever();  // A utf-8 string 
- *    U_ErrorCode status = U_ZERO_ERROR;
- *    UText    ut = UTEXT_INITIALIZER;
- *    utext_openUTF8(ut, s, -1, &status);
- *    if (U_FAILURE(status)) {
- *        // error handling
- *    } else {
- *        // work with the UText
- *    }
- * \endcode
- *
- * Any existing UText passed to an open function _must_ have been initialized, 
- * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated
- * by an open function.  Passing NULL will cause the open function to
- * heap-allocate and fully initialize a new UText.
- *
- */
-
-
-
-#include "unicode/utypes.h"
-#ifdef XP_CPLUSPLUS
-#include "unicode/rep.h"
-#include "unicode/unistr.h"
-#include "unicode/chariter.h"
-#endif
-
-
-U_CDECL_BEGIN
-
-struct UText;
-typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */
-
-
-/***************************************************************************************
- *
- *   C Functions for creating UText wrappers around various kinds of text strings.
- *
- ****************************************************************************************/
-
-
-/**
-  * Close function for UText instances.
-  * Cleans up, releases any resources being held by an open UText.
-  * <p>
-  *   If the UText was originally allocated by one of the utext_open functions,
-  *   the storage associated with the utext will also be freed.
-  *   If the UText storage originated with the application, as it would with
-  *   a local or static instance, the storage will not be deleted.
-  *
-  *   An open UText can be reset to refer to new string by using one of the utext_open()
-  *   functions without first closing the UText.  
-  *
-  * @param ut  The UText to be closed.
-  * @return    NULL if the UText struct was deleted by the close.  If the UText struct
-  *            was originally provided by the caller to the open function, it is
-  *            returned by this function, and may be safely used again in
-  *            a subsequent utext_open.
-  *
-  * @stable ICU 3.4
-  */
-U_STABLE UText * U_EXPORT2
-utext_close(UText *ut);
-
-
-/**
- * Open a read-only UText implementation for UTF-8 strings.
- * 
- * \htmlonly
- * Any invalid UTF-8 in the input will be handled in this way:
- * a sequence of bytes that has the form of a truncated, but otherwise valid,
- * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. 
- * Any other illegal bytes will each be replaced by a \uFFFD.
- * \endhtmlonly
- * 
- * @param ut     Pointer to a UText struct.  If NULL, a new UText will be created.
- *               If non-NULL, must refer to an initialized UText struct, which will then
- *               be reset to reference the specified UTF-8 string.
- * @param s      A UTF-8 string.  Must not be NULL.
- * @param length The length of the UTF-8 string in bytes, or -1 if the string is
- *               zero terminated.
- * @param status Errors are returned here.
- * @return       A pointer to the UText.  If a pre-allocated UText was provided, it
- *               will always be used and returned.
- * @stable ICU 3.4
- */
-U_STABLE UText * U_EXPORT2
-utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
-
-
-/**
- * Open a read-only UText for UChar * string.
- * 
- * @param ut     Pointer to a UText struct.  If NULL, a new UText will be created.
- *               If non-NULL, must refer to an initialized UText struct, which will then
- *               be reset to reference the specified UChar string.
- * @param s      A UChar (UTF-16) string
- * @param length The number of UChars in the input string, or -1 if the string is
- *               zero terminated.
- * @param status Errors are returned here.
- * @return       A pointer to the UText.  If a pre-allocated UText was provided, it
- *               will always be used and returned.
- * @stable ICU 3.4
- */
-U_STABLE UText * U_EXPORT2
-utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
-
-
-#ifdef XP_CPLUSPLUS
-/**
- * Open a writable UText for a non-const UnicodeString. 
- * 
- * @param ut      Pointer to a UText struct.  If NULL, a new UText will be created.
- *                 If non-NULL, must refer to an initialized UText struct, which will then
- *                 be reset to reference the specified input string.
- * @param s       A UnicodeString.
- * @param status Errors are returned here.
- * @return        Pointer to the UText.  If a UText was supplied as input, this
- *                 will always be used and returned.
- * @stable ICU 3.4
- */
-U_STABLE UText * U_EXPORT2
-utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
-
-
-/**
- * Open a UText for a const UnicodeString.   The resulting UText will not be writable.
- * 
- * @param ut    Pointer to a UText struct.  If NULL, a new UText will be created.
- *               If non-NULL, must refer to an initialized UText struct, which will then
- *               be reset to reference the specified input string.
- * @param s      A const UnicodeString to be wrapped.
- * @param status Errors are returned here.
- * @return       Pointer to the UText.  If a UText was supplied as input, this
- *               will always be used and returned.
- * @stable ICU 3.4
- */
-U_STABLE UText * U_EXPORT2
-utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
-
-
-/**
- * Open a writable UText implementation for an ICU Replaceable object.
- * @param ut    Pointer to a UText struct.  If NULL, a new UText will be created.
- *               If non-NULL, must refer to an already existing UText, which will then
- *               be reset to reference the specified replaceable text.
- * @param rep    A Replaceable text object.
- * @param status Errors are returned here.
- * @return       Pointer to the UText.  If a UText was supplied as input, this
- *               will always be used and returned.
- * @see Replaceable
- * @stable ICU 3.4
- */
-U_STABLE UText * U_EXPORT2
-utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorCode *status);
-
-/**
- * Open a  UText implementation over an ICU CharacterIterator.
- * @param ut    Pointer to a UText struct.  If NULL, a new UText will be created.
- *               If non-NULL, must refer to an already existing UText, which will then
- *               be reset to reference the specified replaceable text.
- * @param ci     A Character Iterator.
- * @param status Errors are returned here.
- * @return       Pointer to the UText.  If a UText was supplied as input, this
- *               will always be used and returned.
- * @see Replaceable
- * @stable ICU 3.4
- */
-U_STABLE UText * U_EXPORT2
-utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, UErrorCode *status);
-
-#endif
-
-
-/**
-  *  Clone a UText.  This is much like opening a UText where the source text is itself
-  *  another UText.
-  *
-  *  A deep clone will copy both the UText data structures and the underlying text.
-  *  The original and cloned UText will operate completely independently; modifications
-  *  made to the text in one will not affect the other.  Text providers are not
-  *  required to support deep clones.  The user of clone() must check the status return
-  *  and be prepared to handle failures.
-  *
-  *  The standard UText implementations for UTF8, UChar *, UnicodeString and
-  *  Replaceable all support deep cloning.
-  *
-  *  The UText returned from a deep clone will be writable, assuming that the text
-  *  provider is able to support writing, even if the source UText had been made
-  *  non-writable by means of UText_freeze().
-  *
-  *  A shallow clone replicates only the UText data structures; it does not make
-  *  a copy of the underlying text.  Shallow clones can be used as an efficient way to 
-  *  have multiple iterators active in a single text string that is not being
-  *  modified.
-  *
-  *  A shallow clone operation will not fail, barring truly exceptional conditions such
-  *  as memory allocation failures.
-  *
-  *  Shallow UText clones should be avoided if the UText functions that modify the
-  *  text are expected to be used, either on the original or the cloned UText.
-  *  Any such modifications  can cause unpredictable behavior.  Read Only
-  *  shallow clones provide some protection against errors of this type by
-  *  disabling text modification via the cloned UText.
-  *
-  *  A shallow clone made with the readOnly parameter == FALSE will preserve the 
-  *  utext_isWritable() state of the source object.  Note, however, that
-  *  write operations must be avoided while more than one UText exists that refer
-  *  to the same underlying text.
-  *
-  *  A UText and its clone may be safely concurrently accessed by separate threads.
-  *  This is true for read access only with shallow clones, and for both read and
-  *  write access with deep clones.
-  *  It is the responsibility of the Text Provider to ensure that this thread safety
-  *  constraint is met.
-  *
-  *  @param dest   A UText struct to be filled in with the result of the clone operation,
-  *                or NULL if the clone function should heap-allocate a new UText struct.
-  *                If non-NULL, must refer to an already existing UText, which will then
-  *                be reset to become the clone.
-  *  @param src    The UText to be cloned.
-  *  @param deep   TRUE to request a deep clone, FALSE for a shallow clone.
-  *  @param readOnly TRUE to request that the cloned UText have read only access to the 
-  *                underlying text.  
-
-  *  @param status Errors are returned here.  For deep clones, U_UNSUPPORTED_ERROR
-  *                will be returned if the text provider is unable to clone the
-  *                original text.
-  *  @return       The newly created clone, or NULL if the clone operation failed.
-  *  @stable ICU 3.4
-  */
-U_STABLE UText * U_EXPORT2
-utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
-
-
-/**
-  *  Compare two UText objects for equality.
-  *  UTexts are equal if they are iterating over the same text, and
-  *    have the same iteration position within the text.
-  *    If either or both of the parameters are NULL, the comparison is FALSE.
-  *
-  *  @param a   The first of the two UTexts to compare.
-  *  @param b   The other UText to be compared.
-  *  @return    TRUE if the two UTexts are equal.
-  *  @stable ICU 3.6
-  */
-U_STABLE UBool U_EXPORT2
-utext_equals(const UText *a, const UText *b);
-
-
-/*****************************************************************************
- *
- *   Functions to work with the text represeted by a UText wrapper
- *
- *****************************************************************************/
-
-/**
-  * Get the length of the text.  Depending on the characteristics
-  * of the underlying text representation, this may be expensive.  
-  * @see  utext_isLengthExpensive()
-  *
-  *
-  * @param ut  the text to be accessed.
-  * @return the length of the text, expressed in native units.
-  *
-  * @stable ICU 3.4
-  */
-U_STABLE int64_t U_EXPORT2
-utext_nativeLength(UText *ut);
-
-/**
- *  Return TRUE if calculating the length of the text could be expensive.
- *  Finding the length of NUL terminated strings is considered to be expensive.
- *
- *  Note that the value of this function may change
- *  as the result of other operations on a UText.
- *  Once the length of a string has been discovered, it will no longer
- *  be expensive to report it.
- *
- * @param ut the text to be accessed.
- * @return TRUE if determining the length of the text could be time consuming.
- * @stable ICU 3.4
- */
-U_STABLE UBool U_EXPORT2
-utext_isLengthExpensive(const UText *ut);
-
-/**
- * Returns the code point at the requested index,
- * or U_SENTINEL (-1) if it is out of bounds.
- *
- * If the specified index points to the interior of a multi-unit
- * character - one of the trail bytes of a UTF-8 sequence, for example -
- * the complete code point will be returned.
- *
- * The iteration position will be set to the start of the returned code point.
- *
- * This function is roughly equivalent to the the sequence
- *    utext_setNativeIndex(index);
- *    utext_current32();
- * (There is a subtle difference if the index is out of bounds by being less than zero - 
- * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current()
- * will return the char at zero.  utext_char32At(negative index), on the other hand, will
- * return the U_SENTINEL value of -1.)
- * 
- * @param ut the text to be accessed
- * @param nativeIndex the native index of the character to be accessed.  If the index points
- *        to other than the first unit of a multi-unit character, it will be adjusted
- *        to the start of the character.
- * @return the code point at the specified index.
- * @stable ICU 3.4
- */
-U_STABLE UChar32 U_EXPORT2
-utext_char32At(UText *ut, int64_t nativeIndex);
-
-
-/**
- *
- * Get the code point at the current iteration position,
- * or U_SENTINEL (-1) if the iteration has reached the end of
- * the input text.
- *
- * @param ut the text to be accessed.
- * @return the Unicode code point at the current iterator position.
- * @stable ICU 3.4
- */
-U_STABLE UChar32 U_EXPORT2
-utext_current32(UText *ut);
-
-
-/**
- * Get the code point at the current iteration position of the UText, and
- * advance the position to the first index following the character.
- *
- * If the position is at the end of the text (the index following
- * the last character, which is also the length of the text), 
- * return U_SENTINEL (-1) and do not advance the index. 
- *
- * This is a post-increment operation.
- *
- * An inline macro version of this function, UTEXT_NEXT32(), 
- * is available for performance critical use.
- *
- * @param ut the text to be accessed.
- * @return the Unicode code point at the iteration position.
- * @see UTEXT_NEXT32
- * @stable ICU 3.4
- */
-U_STABLE UChar32 U_EXPORT2
-utext_next32(UText *ut);
-
-
-/**
- *  Move the iterator position to the character (code point) whose
- *  index precedes the current position, and return that character.
- *  This is a pre-decrement operation.
- *
- *  If the initial position is at the start of the text (index of 0) 
- *  return U_SENTINEL (-1), and leave the position unchanged.
- *
- *  An inline macro version of this function, UTEXT_PREVIOUS32(), 
- *  is available for performance critical use.
- *
- *  @param ut the text to be accessed.
- *  @return the previous UChar32 code point, or U_SENTINEL (-1) 
- *          if the iteration has reached the start of the text.
- *  @see UTEXT_PREVIOUS32
- *  @stable ICU 3.4
- */
-U_STABLE UChar32 U_EXPORT2
-utext_previous32(UText *ut);
-
-
-/**
-  * Set the iteration index and return the code point at that index. 
-  * Leave the iteration index at the start of the following code point.
-  *
-  * This function is the most efficient and convenient way to
-  * begin a forward iteration.  The results are identical to the those
-  * from the sequence
-  * \code
-  *    utext_setIndex();
-  *    utext_next32();
-  * \endcode
-  *
-  *  @param ut the text to be accessed.
-  *  @param nativeIndex Iteration index, in the native units of the text provider.
-  *  @return Code point which starts at or before index,
-  *         or U_SENTINEL (-1) if it is out of bounds.
-  * @stable ICU 3.4
-  */
-U_STABLE UChar32 U_EXPORT2
-utext_next32From(UText *ut, int64_t nativeIndex);
-
-
-
-/**
-  * Set the iteration index, and return the code point preceding the
-  * one specified by the initial index.  Leave the iteration position
-  * at the start of the returned code point.
-  *
-  * This function is the most efficient and convenient way to
-  * begin a backwards iteration.
-  *
-  * @param ut the text to be accessed.
-  * @param nativeIndex Iteration index in the native units of the text provider.
-  * @return Code point preceding the one at the initial index,
-  *         or U_SENTINEL (-1) if it is out of bounds.
-  *
-  * @stable ICU 3.4
-  */
-U_STABLE UChar32 U_EXPORT2
-utext_previous32From(UText *ut, int64_t nativeIndex);
-
-/**
-  * Get the current iterator position, which can range from 0 to 
-  * the length of the text.
-  * The position is a native index into the input text, in whatever format it
-  * may have (possibly UTF-8 for example), and may not always be the same as
-  * the corresponding UChar (UTF-16) index.
-  * The returned position will always be aligned to a code point boundary. 
-  *
-  * @param ut the text to be accessed.
-  * @return the current index position, in the native units of the text provider.
-  * @stable ICU 3.4
-  */
-U_STABLE int64_t U_EXPORT2
-utext_getNativeIndex(const UText *ut);
-
-/**
- * Set the current iteration position to the nearest code point
- * boundary at or preceding the specified index.
- * The index is in the native units of the original input text.
- * If the index is out of range, it will be pinned to be within
- * the range of the input text.
- * <p>
- * It will usually be more efficient to begin an iteration
- * using the functions utext_next32From() or utext_previous32From()
- * rather than setIndex().
- * <p>
- * Moving the index position to an adjacent character is best done
- * with utext_next32(), utext_previous32() or utext_moveIndex32().
- * Attempting to do direct arithmetic on the index position is
- * complicated by the fact that the size (in native units) of a
- * character depends on the underlying representation of the character
- * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not
- * easily knowable.
- *
- * @param ut the text to be accessed.
- * @param nativeIndex the native unit index of the new iteration position.
- * @stable ICU 3.4
- */
-U_STABLE void U_EXPORT2
-utext_setNativeIndex(UText *ut, int64_t nativeIndex);
-
-/**
- * Move the iterator postion by delta code points.  The number of code points
- * is a signed number; a negative delta will move the iterator backwards,
- * towards the start of the text.
- * <p>
- * The index is moved by <code>delta</code> code points
- * forward or backward, but no further backward than to 0 and
- * no further forward than to utext_nativeLength().
- * The resulting index value will be in between 0 and length, inclusive.
- *
- * @param ut the text to be accessed.
- * @param delta the signed number of code points to move the iteration position.
- * @return TRUE if the position could be moved the requested number of positions while
- *              staying within the range [0 - text length].
- * @stable ICU 3.4
- */
-U_STABLE UBool U_EXPORT2
-utext_moveIndex32(UText *ut, int32_t delta);
-
-/**
- * Get the native index of the character preceeding the current position.
- * If the iteration position is already at the start of the text, zero
- * is returned.
- * The value returned is the same as that obtained from the following sequence,
- * but without the side effect of changing the iteration position.
- *   
- * \code
- *    UText  *ut = whatever;
- *      ...
- *    utext_previous(ut)
- *    utext_getNativeIndex(ut);
- * \endcode
- *
- * This function is most useful during forwards iteration, where it will get the
- *   native index of the character most recently returned from utext_next().
- *
- * @param ut the text to be accessed
- * @return the native index of the character preceeding the current index position,
- *         or zero if the current position is at the start of the text.
- * @stable ICU 3.6
- */
-U_STABLE int64_t U_EXPORT2
-utext_getPreviousNativeIndex(UText *ut); 
-
-
-/**
- *
- * Extract text from a UText into a UChar buffer.  The range of text to be extracted
- * is specified in the native indices of the UText provider.  These may not necessarily
- * be UTF-16 indices.
- * <p>
- * The size (number of 16 bit UChars) of the data to be extracted is returned.  The
- * full number of UChars is returned, even when the extracted text is truncated
- * because the specified buffer size is too small.
- * <p>
- * The extracted string will (if you are a user) / must (if you are a text provider)
- * be NUL-terminated if there is sufficient space in the destination buffer.  This
- * terminating NUL is not included in the returned length.
- * <p>
- * The iteration index is left at the position following the last extracted character.
- *
- * @param  ut    the UText from which to extract data.
- * @param  nativeStart the native index of the first character to extract.\
- *               If the specified index is out of range,
- *               it will be pinned to to be within 0 <= index <= textLength
- * @param  nativeLimit the native string index of the position following the last
- *               character to extract.  If the specified index is out of range,
- *               it will be pinned to to be within 0 <= index <= textLength.
- *               nativeLimit must be >= nativeStart.
- * @param  dest  the UChar (UTF-16) buffer into which the extracted text is placed
- * @param  destCapacity  The size, in UChars, of the destination buffer.  May be zero
- *               for precomputing the required size.
- * @param  status receives any error status.
- *         U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the 
- *         buffer was too small.  Returns number of UChars for preflighting.
- * @return Number of UChars in the data to be extracted.  Does not include a trailing NUL.
- *
- * @stable ICU 3.4
- */
-U_STABLE int32_t U_EXPORT2
-utext_extract(UText *ut,
-             int64_t nativeStart, int64_t nativeLimit,
-             UChar *dest, int32_t destCapacity,
-             UErrorCode *status);
-
-
-/************************************************************************************
- *
- *  #define inline versions of selected performance-critical text access functions
- *          Caution:  do not use auto increment++ or decrement-- expressions
- *                    as parameters to these macros.
- *
- *          For most use, where there is no extreme performance constraint, the
- *          normal, non-inline functions are a better choice.  The resulting code
- *          will be smaller, and, if the need ever arises, easier to debug.
- *
- *          These are implemented as #defines rather than real functions
- *          because there is no fully portable way to do inline functions in plain C.
- *
- ************************************************************************************/
-
-/**
- * inline version of utext_next32(), for performance-critical situations.
- *
- * Get the code point at the current iteration position of the UText, and
- * advance the position to the first index following the character.
- * This is a post-increment operation.
- * Returns U_SENTINEL (-1) if the position is at the end of the
- * text.
- *
- * @stable ICU 3.4
- */
-#define UTEXT_NEXT32(ut)  \
-    ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
-    ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
-
-/**
- * inline version of utext_previous32(), for performance-critical situations.
- *
- *  Move the iterator position to the character (code point) whose
- *  index precedes the current position, and return that character.
- *  This is a pre-decrement operation.
- *  Returns U_SENTINEL (-1) if the position is at the start of the  text.
- *
- * @stable ICU 3.4
- */
-#define UTEXT_PREVIOUS32(ut)  \
-    ((ut)->chunkOffset > 0 && \
-     (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
-          (ut)->chunkContents[--((ut)->chunkOffset)]  :  utext_previous32(ut))
-
-/**
-  *  inline version of utext_getNativeIndex(), for performance-critical situations.
-  *
-  * Get the current iterator position, which can range from 0 to 
-  * the length of the text.
-  * The position is a native index into the input text, in whatever format it
-  * may have (possibly UTF-8 for example), and may not always be the same as
-  * the corresponding UChar (UTF-16) index.
-  * The returned position will always be aligned to a code point boundary. 
-  *
-  * @stable ICU 3.6
-  */
-#define UTEXT_GETNATIVEINDEX(ut)                       \
-    ((ut)->chunkOffset <= (ut)->nativeIndexingLimit?   \
-        (ut)->chunkNativeStart+(ut)->chunkOffset :     \
-        (ut)->pFuncs->mapOffsetToNative(ut))    
-
-/**
-  *  inline version of utext_setNativeIndex(), for performance-critical situations.
-  *
-  * Set the current iteration position to the nearest code point
-  * boundary at or preceding the specified index.
-  * The index is in the native units of the original input text.
-  * If the index is out of range, it will be pinned to be within
-  * the range of the input text.
-  *
-  * @stable ICU 4.0
-  */
-#define UTEXT_SETNATIVEINDEX(ut, ix)                       \
-    { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
-      if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
-          (ut)->chunkOffset=(int32_t)__offset; \
-      } else { \
-          utext_setNativeIndex((ut), (ix)); } }
-
-
-
-/************************************************************************************
- *
- *   Functions related to writing or modifying the text.
- *   These will work only with modifiable UTexts.  Attempting to
- *   modify a read-only UText will return an error status.
- *
- ************************************************************************************/
-
-
-/**
- *  Return TRUE if the text can be written (modified) with utext_replace() or
- *  utext_copy().  For the text to be writable, the text provider must
- *  be of a type that supports writing and the UText must not be frozen.
- *
- *  Attempting to modify text when utext_isWriteable() is FALSE will fail -
- *  the text will not be modified, and an error will be returned from the function
- *  that attempted the modification.
- *
- * @param  ut   the UText to be tested.
- * @return TRUE if the text is modifiable.
- *
- * @see    utext_freeze()
- * @see    utext_replace()
- * @see    utext_copy()
- * @stable ICU 3.4
- *
- */
-U_STABLE UBool U_EXPORT2
-utext_isWritable(const UText *ut);
-
-
-/**
-  * Test whether there is meta data associated with the text.
-  * @see Replaceable::hasMetaData()
-  *
-  * @param ut The UText to be tested
-  * @return TRUE if the underlying text includes meta data.
-  * @stable ICU 3.4
-  */
-U_STABLE UBool U_EXPORT2
-utext_hasMetaData(const UText *ut);
-
-
-/**
- * Replace a range of the original text with a replacement text.
- *
- * Leaves the current iteration position at the position following the
- *  newly inserted replacement text.
- *
- * This function is only available on UText types that support writing,
- * that is, ones where utext_isWritable() returns TRUE.
- *
- * When using this function, there should be only a single UText opened onto the
- * underlying native text string.  Behavior after a replace operation
- * on a UText is undefined for any other additional UTexts that refer to the
- * modified string.
- *
- * @param ut               the UText representing the text to be operated on.
- * @param nativeStart      the native index of the start of the region to be replaced
- * @param nativeLimit      the native index of the character following the region to be replaced.
- * @param replacementText  pointer to the replacement text
- * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated.
- * @param status           receives any error status.  Possible errors include
- *                         U_NO_WRITE_PERMISSION
- *
- * @return The signed number of (native) storage units by which
- *         the length of the text expanded or contracted.
- *
- * @stable ICU 3.4
- */
-U_STABLE int32_t U_EXPORT2
-utext_replace(UText *ut,
-             int64_t nativeStart, int64_t nativeLimit,
-             const UChar *replacementText, int32_t replacementLength,
-             UErrorCode *status);
-
-
-
-/**
- *
- * Copy or move a substring from one position to another within the text,
- * while retaining any metadata associated with the text.
- * This function is used to duplicate or reorder substrings.
- * The destination index must not overlap the source range.
- *
- * The text to be copied or moved is inserted at destIndex;
- * it does not replace or overwrite any existing text.
- *
- * The iteration position is left following the newly inserted text
- * at the destination position.
- *
- * This function is only available on UText types that support writing,
- * that is, ones where utext_isWritable() returns TRUE.
- *
- * When using this function, there should be only a single UText opened onto the
- * underlying native text string.  Behavior after a copy operation
- * on a UText is undefined in any other additional UTexts that refer to the
- * modified string.
- *
- * @param ut           The UText representing the text to be operated on.
- * @param nativeStart  The native index of the start of the region to be copied or moved
- * @param nativeLimit  The native index of the character position following the region
- *                     to be copied.
- * @param destIndex    The native destination index to which the source substring is
- *                     copied or moved.
- * @param move         If TRUE, then the substring is moved, not copied/duplicated.
- * @param status       receives any error status.  Possible errors include U_NO_WRITE_PERMISSION
- *                       
- * @stable ICU 3.4
- */
-U_STABLE void U_EXPORT2
-utext_copy(UText *ut,
-          int64_t nativeStart, int64_t nativeLimit,
-          int64_t destIndex,
-          UBool move,
-          UErrorCode *status);
-
-
-/**
-  *  <p>
-  *  Freeze a UText.  This prevents any modification to the underlying text itself
-  *  by means of functions operating on this UText.
-  *  </p>
-  *  <p>
-  *  Once frozen, a UText can not be unfrozen.  The intent is to ensure
-  *  that a the text underlying a frozen UText wrapper cannot be modified via that UText.
-  *  </p>
-  *  <p>
-  *  Caution:  freezing a UText will disable changes made via the specific
-  *   frozen UText wrapper only; it will not have any effect on the ability to
-  *   directly modify the text by bypassing the UText.  Any such backdoor modifications
-  *   are always an error while UText access is occuring because the underlying
-  *   text can get out of sync with UText's buffering.
-  *  </p>
-  *
-  *  @param ut  The UText to be frozen.
-  *  @see   utext_isWritable()
-  *  @stable ICU 3.6
-  */
-U_STABLE void U_EXPORT2
-utext_freeze(UText *ut);
-
-
-/**
- * UText provider properties (bit field indexes).
- *
- * @see UText
- * @stable ICU 3.4
- */
-enum {
-    /**
-     * It is potentially time consuming for the provider to determine the length of the text.
-     * @stable ICU 3.4
-     */
-    UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1,
-    /**
-     * Text chunks remain valid and usable until the text object is modified or
-     * deleted, not just until the next time the access() function is called
-     * (which is the default).
-     * @stable ICU 3.4
-     */
-    UTEXT_PROVIDER_STABLE_CHUNKS = 2,
-    /**
-     * The provider supports modifying the text via the replace() and copy()
-     * functions.
-     * @see Replaceable
-     * @stable ICU 3.4
-     */
-    UTEXT_PROVIDER_WRITABLE = 3,
-    /**
-     * There is meta data associated with the text.
-     * @see Replaceable::hasMetaData()
-     * @stable ICU 3.4
-     */ 
-    UTEXT_PROVIDER_HAS_META_DATA = 4,
-    /**
-     * Text provider owns the text storage.
-     *  Generally occurs as the result of a deep clone of the UText.
-     *  When closing the UText, the associated text must
-     *  also be closed/deleted/freed/ whatever is appropriate.
-     * @stable ICU 3.6
-     */
-     UTEXT_PROVIDER_OWNS_TEXT = 5
-};
-
-/**
-  * Function type declaration for UText.clone().
-  *
-  *  clone a UText.  Much like opening a UText where the source text is itself
-  *  another UText.
-  *
-  *  A deep clone will copy both the UText data structures and the underlying text.
-  *  The original and cloned UText will operate completely independently; modifications
-  *  made to the text in one will not effect the other.  Text providers are not
-  *  required to support deep clones.  The user of clone() must check the status return
-  *  and be prepared to handle failures.
-  *
-  *  A shallow clone replicates only the UText data structures; it does not make
-  *  a copy of the underlying text.  Shallow clones can be used as an efficient way to 
-  *  have multiple iterators active in a single text string that is not being
-  *  modified.
-  *
-  *  A shallow clone operation must not fail except for truly exceptional conditions such
-  *  as memory allocation failures.
-  *
-  *  A UText and its clone may be safely concurrently accessed by separate threads.
-  *  This is true for both shallow and deep clones.
-  *  It is the responsibility of the Text Provider to ensure that this thread safety
-  *  constraint is met.
-
-  *
-  *  @param dest   A UText struct to be filled in with the result of the clone operation,
-  *                or NULL if the clone function should heap-allocate a new UText struct.
-  *  @param src    The UText to be cloned.
-  *  @param deep   TRUE to request a deep clone, FALSE for a shallow clone.
-  *  @param status Errors are returned here.  For deep clones, U_UNSUPPORTED_ERROR
-  *                should be returned if the text provider is unable to clone the
-  *                original text.
-  *  @return       The newly created clone, or NULL if the clone operation failed.
-  *
-  * @stable ICU 3.4
-  */
-typedef UText * U_CALLCONV
-UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
-
-
-/**
- * Function type declaration for UText.nativeLength().
- *
- * @param ut the UText to get the length of.
- * @return the length, in the native units of the original text string.
- * @see UText
- * @stable ICU 3.4
- */
-typedef int64_t U_CALLCONV
-UTextNativeLength(UText *ut);
-
-/**
- * Function type declaration for UText.access().  Get the description of the text chunk
- *  containing the text at a requested native index.  The UText's iteration
- *  position will be left at the requested index.  If the index is out
- *  of bounds, the iteration position will be left at the start or end
- *  of the string, as appropriate.
- *
- *  Chunks must begin and end on code point boundaries.  A single code point
- *  comprised of multiple storage units must never span a chunk boundary.
- *
- *
- * @param ut          the UText being accessed.
- * @param nativeIndex Requested index of the text to be accessed.
- * @param forward     If TRUE, then the returned chunk must contain text
- *                    starting from the index, so that start<=index<limit.
- *                    If FALSE, then the returned chunk must contain text
- *                    before the index, so that start<index<=limit.
- * @return            True if the requested index could be accessed.  The chunk
- *                    will contain the requested text.
- *                    False value if a chunk cannot be accessed
- *                    (the requested index is out of bounds).
- *
- * @see UText
- * @stable ICU 3.4
- */
-typedef UBool U_CALLCONV
-UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
-
-/**
- * Function type declaration for UText.extract().
- *
- * Extract text from a UText into a UChar buffer.  The range of text to be extracted
- * is specified in the native indices of the UText provider.  These may not necessarily
- * be UTF-16 indices.
- * <p>
- * The size (number of 16 bit UChars) in the data to be extracted is returned.  The
- * full amount is returned, even when the specified buffer size is smaller.
- * <p>
- * The extracted string will (if you are a user) / must (if you are a text provider)
- * be NUL-terminated if there is sufficient space in the destination buffer.
- *
- * @param  ut            the UText from which to extract data.
- * @param  nativeStart   the native index of the first characer to extract.
- * @param  nativeLimit   the native string index of the position following the last
- *                       character to extract.
- * @param  dest          the UChar (UTF-16) buffer into which the extracted text is placed
- * @param  destCapacity  The size, in UChars, of the destination buffer.  May be zero
- *                       for precomputing the required size.
- * @param  status        receives any error status.
- *                       If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
- *                       preflighting.
- * @return Number of UChars in the data.  Does not include a trailing NUL.
- *
- * @stable ICU 3.4
- */
-typedef int32_t U_CALLCONV
-UTextExtract(UText *ut,
-             int64_t nativeStart, int64_t nativeLimit,
-             UChar *dest, int32_t destCapacity,
-             UErrorCode *status);
-
-/**
- * Function type declaration for UText.replace().
- *
- * Replace a range of the original text with a replacement text.
- *
- * Leaves the current iteration position at the position following the
- *  newly inserted replacement text.
- *
- * This function need only be implemented on UText types that support writing.
- *
- * When using this function, there should be only a single UText opened onto the
- * underlying native text string.  The function is responsible for updating the
- * text chunk within the UText to reflect the updated iteration position,
- * taking into account any changes to the underlying string's structure caused
- * by the replace operation.
- *
- * @param ut               the UText representing the text to be operated on.
- * @param nativeStart      the index of the start of the region to be replaced
- * @param nativeLimit      the index of the character following the region to be replaced.
- * @param replacementText  pointer to the replacement text
- * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
- * @param status           receives any error status.  Possible errors include
- *                         U_NO_WRITE_PERMISSION
- *
- * @return The signed number of (native) storage units by which
- *         the length of the text expanded or contracted.
- *
- * @stable ICU 3.4
- */
-typedef int32_t U_CALLCONV
-UTextReplace(UText *ut,
-             int64_t nativeStart, int64_t nativeLimit,
-             const UChar *replacementText, int32_t replacmentLength,
-             UErrorCode *status);
-
-/**
- * Function type declaration for UText.copy().
- *
- * Copy or move a substring from one position to another within the text,
- * while retaining any metadata associated with the text.
- * This function is used to duplicate or reorder substrings.
- * The destination index must not overlap the source range.
- *
- * The text to be copied or moved is inserted at destIndex;
- * it does not replace or overwrite any existing text.
- *
- * This function need only be implemented for UText types that support writing.
- *
- * When using this function, there should be only a single UText opened onto the
- * underlying native text string.  The function is responsible for updating the
- * text chunk within the UText to reflect the updated iteration position,
- * taking into account any changes to the underlying string's structure caused
- * by the replace operation.
- *
- * @param ut           The UText representing the text to be operated on.
- * @param nativeStart  The index of the start of the region to be copied or moved
- * @param nativeLimit  The index of the character following the region to be replaced.
- * @param nativeDest   The destination index to which the source substring is copied or moved.
- * @param move         If TRUE, then the substring is moved, not copied/duplicated.
- * @param status       receives any error status.  Possible errors include U_NO_WRITE_PERMISSION
- *
- * @stable ICU 3.4
- */
-typedef void U_CALLCONV
-UTextCopy(UText *ut,
-          int64_t nativeStart, int64_t nativeLimit,
-          int64_t nativeDest,
-          UBool move,
-          UErrorCode *status);
-
-/**
- * Function type declaration for UText.mapOffsetToNative().
- * Map from the current UChar offset within the current text chunk to
- *  the corresponding native index in the original source text.
- *
- * This is required only for text providers that do not use native UTF-16 indexes.
- *
- * @param ut     the UText.
- * @return Absolute (native) index corresponding to chunkOffset in the current chunk.
- *         The returned native index should always be to a code point boundary.
- *
- * @stable ICU 3.4
- */
-typedef int64_t U_CALLCONV
-UTextMapOffsetToNative(const UText *ut);
-
-/**
- * Function type declaration for UText.mapIndexToUTF16().
- * Map from a native index to a UChar offset within a text chunk.
- * Behavior is undefined if the native index does not fall within the
- *   current chunk.
- *
- * This function is required only for text providers that do not use native UTF-16 indexes.
- *
- * @param ut          The UText containing the text chunk.
- * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
- * @return            Chunk-relative UTF-16 offset corresponding to the specified native
- *                    index.
- *
- * @stable ICU 3.4
- */
-typedef int32_t U_CALLCONV
-UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
-
-
-/**
- * Function type declaration for UText.utextClose().
- *
- * A Text Provider close function is only required for provider types that make
- *  allocations in their open function (or other functions) that must be 
- *  cleaned when the UText is closed.
- *
- * The allocation of the UText struct itself and any "extra" storage
- * associated with the UText is handled by the common UText implementation
- * and does not require provider specific cleanup in a close function.
- *
- * Most UText provider implementations do not need to implement this function.
- *
- * @param ut A UText object to be closed.
- *
- * @stable ICU 3.4
- */
-typedef void U_CALLCONV
-UTextClose(UText *ut);
-
-
-/**
-  *   (public)  Function dispatch table for UText.
-  *             Conceptually very much like a C++ Virtual Function Table.
-  *             This struct defines the organization of the table.
-  *             Each text provider implementation must provide an
-  *              actual table that is initialized with the appropriate functions
-  *              for the type of text being handled.
-  *   @stable ICU 3.6
-  */
-struct UTextFuncs {
-    /**
-     *   (public)  Function table size, sizeof(UTextFuncs)
-     *             Intended for use should the table grow to accomodate added
-     *             functions in the future, to allow tests for older format
-     *             function tables that do not contain the extensions.
-     *
-     *             Fields are placed for optimal alignment on
-     *             32/64/128-bit-pointer machines, by normally grouping together
-     *             4 32-bit fields,
-     *             4 pointers,
-     *             2 64-bit fields
-     *             in sequence.
-     *   @stable ICU 3.6
-     */
-    int32_t       tableSize;
-
-    /**
-      *   (private)  Alignment padding.
-      *              Do not use, reserved for use by the UText framework only.
-      *   @internal
-      */
-    int32_t       reserved1, /** @internal */ reserved2, /** @internal */ reserved3;
-
-
-    /**
-     * (public) Function pointer for UTextClone
-     *
-     * @see UTextClone
-     * @stable ICU 3.6
-     */
-    UTextClone *clone;
-
-    /**
-     * (public) function pointer for UTextLength
-     * May be expensive to compute!
-     *
-     * @see UTextLength
-     * @stable ICU 3.6
-     */
-    UTextNativeLength *nativeLength;
-
-    /**
-     * (public) Function pointer for UTextAccess.
-     *
-     * @see UTextAccess
-     * @stable ICU 3.6
-     */
-    UTextAccess *access;
-
-    /**
-     * (public) Function pointer for UTextExtract.
-     *
-     * @see UTextExtract
-     * @stable ICU 3.6
-     */
-    UTextExtract *extract;
-
-    /**
-     * (public) Function pointer for UTextReplace.
-     *
-     * @see UTextReplace
-     * @stable ICU 3.6
-     */
-    UTextReplace *replace;
-
-    /**
-     * (public) Function pointer for UTextCopy.
-     *
-     * @see UTextCopy
-     * @stable ICU 3.6
-     */
-    UTextCopy *copy;
-
-    /**
-     * (public) Function pointer for UTextMapOffsetToNative.
-     *
-     * @see UTextMapOffsetToNative
-     * @stable ICU 3.6
-     */
-    UTextMapOffsetToNative *mapOffsetToNative;
-
-    /**
-     * (public) Function pointer for UTextMapNativeIndexToUTF16.
-     *
-     * @see UTextMapNativeIndexToUTF16
-     * @stable ICU 3.6
-     */
-    UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16;
-
-    /**
-     * (public) Function pointer for UTextClose.
-      *
-      * @see UTextClose
-      * @stable ICU 3.6
-      */
-    UTextClose  *close;
-
-    /**
-      * (private)  Spare function pointer
-      * @internal
-      */
-
-    UTextClose  *spare1;
-    /**
-      * (private)  Spare function pointer
-      * @internal
-      */
-    UTextClose  *spare2;
-
-    /**
-      * (private)  Spare function pointer
-      * @internal
-      */
-    UTextClose  *spare3;
-
-};
-/**
- * Function dispatch table for UText
- * @see UTextFuncs
- */
-typedef struct UTextFuncs UTextFuncs;
-
- /**
-  *   UText struct.  Provides the interface between the generic UText access code
-  *                  and the UText provider code that works on specific kinds of
-  *                  text  (UTF-8, noncontiguous UTF-16, whatever.)
-  *
-  *                  Applications that are using predefined types of text providers
-  *                  to pass text data to ICU services will have no need to view the
-  *                  internals of the UText structs that they open.
-  *
-  * @stable ICU 3.6
-  */
-struct UText {
-    /**
-     *     (private)  Magic.  Used to help detect when UText functions are handed
-     *                        invalid or unitialized UText structs.
-     *                        utext_openXYZ() functions take an initialized,
-     *                        but not necessarily open, UText struct as an
-     *                        optional fill-in parameter.  This magic field
-     *                        is used to check for that initialization.
-     *                        Text provider close functions must NOT clear
-     *                        the magic field because that would prevent
-     *                        reuse of the UText struct.
-     * @internal
-     */
-    uint32_t       magic;
-
-
-    /**
-     *     (private)  Flags for managing the allocation and freeing of
-     *                memory associated with this UText.
-     * @internal
-     */
-    int32_t        flags;
-
-
-    /**
-      *  Text provider properties.  This set of flags is maintainted by the
-      *                             text provider implementation.
-      *  @stable ICU 3.4
-      */
-    int32_t         providerProperties;
-
-    /**
-     * (public) sizeOfStruct=sizeof(UText)
-     * Allows possible backward compatible extension.
-     *
-     * @stable ICU 3.4
-     */
-    int32_t         sizeOfStruct;
-    
-    /* ------ 16 byte alignment boundary -----------  */
-    
-
-    /**
-      *  (protected) Native index of the first character position following
-      *              the current chunk.
-      *  @stable ICU 3.6
-      */
-    int64_t         chunkNativeLimit;
-
-    /**
-     *   (protected)  Size in bytes of the extra space (pExtra).
-     *  @stable ICU 3.4
-     */
-    int32_t        extraSize;
-
-    /**
-      *    (protected) The highest chunk offset where native indexing and
-      *    chunk (UTF-16) indexing correspond.  For UTF-16 sources, value
-      *    will be equal to chunkLength.
-      *
-      *    @stable ICU 3.6
-      */
-    int32_t         nativeIndexingLimit;
-
-    /* ---- 16 byte alignment boundary------ */
-    
-    /**
-     *  (protected) Native index of the first character in the text chunk.
-     *  @stable ICU 3.6
-     */
-    int64_t         chunkNativeStart;
-
-    /**
-     *  (protected) Current iteration position within the text chunk (UTF-16 buffer).
-     *  This is the index to the character that will be returned by utext_next32().
-     *  @stable ICU 3.6
-     */
-    int32_t         chunkOffset;
-
-    /**
-     *  (protected) Length the text chunk (UTF-16 buffer), in UChars.
-     *  @stable ICU 3.6
-     */
-    int32_t         chunkLength;
-
-    /* ---- 16  byte alignment boundary-- */
-    
-
-    /**
-     *  (protected)  pointer to a chunk of text in UTF-16 format.
-     *  May refer either to original storage of the source of the text, or
-     *  if conversion was required, to a buffer owned by the UText.
-     *  @stable ICU 3.6
-     */
-    const UChar    *chunkContents;
-
-     /**
-      * (public)     Pointer to Dispatch table for accessing functions for this UText.
-      * @stable ICU 3.6
-      */
-    const UTextFuncs     *pFuncs;
-
-    /**
-     *  (protected)  Pointer to additional space requested by the
-     *               text provider during the utext_open operation.
-     * @stable ICU 3.4
-     */
-    void          *pExtra;
-
-    /**
-     * (protected) Pointer to string or text-containin object or similar.
-     * This is the source of the text that this UText is wrapping, in a format
-     *  that is known to the text provider functions.
-     * @stable ICU 3.4
-     */
-    const void   *context;
-
-    /* --- 16 byte alignment boundary--- */
-
-    /**
-     * (protected) Pointer fields available for use by the text provider.
-     * Not used by UText common code.
-     * @stable ICU 3.6
-     */
-    const void     *p; 
-    /**
-     * (protected) Pointer fields available for use by the text provider.
-     * Not used by UText common code.
-     * @stable ICU 3.6
-     */
-    const void     *q;
-     /**
-     * (protected) Pointer fields available for use by the text provider.
-     * Not used by UText common code.
-     * @stable ICU 3.6
-      */
-    const void     *r;
-
-    /**
-      *  Private field reserved for future use by the UText framework
-      *     itself.  This is not to be touched by the text providers.
-      * @internal ICU 3.4
-      */
-    void           *privP;
-
-
-    /* --- 16 byte alignment boundary--- */
-    
-
-    /**
-      * (protected) Integer field reserved for use by the text provider.
-      * Not used by the UText framework, or by the client (user) of the UText.
-      * @stable ICU 3.4
-      */
-    int64_t         a;
-
-    /**
-      * (protected) Integer field reserved for use by the text provider.
-      * Not used by the UText framework, or by the client (user) of the UText.
-      * @stable ICU 3.4
-      */
-    int32_t         b;
-
-    /**
-      * (protected) Integer field reserved for use by the text provider.
-      * Not used by the UText framework, or by the client (user) of the UText.
-      * @stable ICU 3.4
-      */
-    int32_t         c;
-
-    /*  ---- 16 byte alignment boundary---- */
-
-
-    /**
-      *  Private field reserved for future use by the UText framework
-      *     itself.  This is not to be touched by the text providers.
-      * @internal ICU 3.4
-      */
-    int64_t         privA;
-    /**
-      *  Private field reserved for future use by the UText framework
-      *     itself.  This is not to be touched by the text providers.
-      * @internal ICU 3.4
-      */
-    int32_t         privB;
-    /**
-      *  Private field reserved for future use by the UText framework
-      *     itself.  This is not to be touched by the text providers.
-      * @internal ICU 3.4
-      */
-    int32_t         privC;
-};
-
-
-/**
- *  Common function for use by Text Provider implementations to allocate and/or initialize
- *  a new UText struct.  To be called in the implementation of utext_open() functions.
- *  If the supplied UText parameter is null, a new UText struct will be allocated on the heap.
- *  If the supplied UText is already open, the provider's close function will be called
- *  so that the struct can be reused by the open that is in progress.
- *
- * @param ut   pointer to a UText struct to be re-used, or null if a new UText
- *             should be allocated.
- * @param extraSpace The amount of additional space to be allocated as part
- *             of this UText, for use by types of providers that require
- *             additional storage.
- * @param status Errors are returned here.
- * @return pointer to the UText, allocated if necessary, with extra space set up if requested.
- * @stable ICU 3.4
- */
-U_STABLE UText * U_EXPORT2
-utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
-
-/**
-  * @internal
-  *  Value used to help identify correctly initialized UText structs.
-  *  Note:  must be publicly visible so that UTEXT_INITIALIZER can access it.
-  */
-enum {
-    UTEXT_MAGIC = 0x345ad82c
-};
-
-/**
- * initializer to be used with local (stack) instances of a UText
- *  struct.  UText structs must be initialized before passing
- *  them to one of the utext_open functions.
- *
- * @stable ICU 3.6
- */
-#define UTEXT_INITIALIZER {                                        \
-                  UTEXT_MAGIC,          /* magic                */ \
-                  0,                    /* flags                */ \
-                  0,                    /* providerProps        */ \
-                  sizeof(UText),        /* sizeOfStruct         */ \
-                  0,                    /* chunkNativeLimit     */ \
-                  0,                    /* extraSize            */ \
-                  0,                    /* nativeIndexingLimit  */ \
-                  0,                    /* chunkNativeStart     */ \
-                  0,                    /* chunkOffset          */ \
-                  0,                    /* chunkLength          */ \
-                  NULL,                 /* chunkContents        */ \
-                  NULL,                 /* pFuncs               */ \
-                  NULL,                 /* pExtra               */ \
-                  NULL,                 /* context              */ \
-                  NULL, NULL, NULL,     /* p, q, r              */ \
-                  NULL,                 /* privP                */ \
-                  0, 0, 0,              /* a, b, c              */ \
-                  0, 0, 0               /* privA,B,C,           */ \
-                  }
-
-
-U_CDECL_END
-
-
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/utext.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/utext.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/utext.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/utext.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1562 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2004-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utext.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2004oct06
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UTEXT_H__
+#define __UTEXT_H__
+
+/**
+ * \file
+ * \brief C API: Abstract Unicode Text API
+ *
+ * The Text Access API provides a means to allow text that is stored in alternative
+ * formats to work with ICU services.  ICU normally operates on text that is
+ * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type
+ * UnicodeString for C++ APIs.
+ *
+ * ICU Text Access allows other formats, such as UTF-8 or non-contiguous
+ * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services.
+ *
+ * There are three general classes of usage for UText:
+ *
+ *     Application Level Use.  This is the simplest usage - applications would
+ *     use one of the utext_open() functions on their input text, and pass
+ *     the resulting UText to the desired ICU service.
+ *
+ *     Second is usage in ICU Services, such as break iteration, that will need to
+ *     operate on input presented to them as a UText.  These implementations
+ *     will need to use the iteration and related UText functions to gain
+ *     access to the actual text.
+ *
+ *     The third class of UText users are "text providers."  These are the
+ *     UText implementations for the various text storage formats.  An application
+ *     or system with a unique text storage format can implement a set of
+ *     UText provider functions for that format, which will then allow
+ *     ICU services to operate on that format.
+ *
+ *
+ * <em>Iterating over text</em>
+ *
+ * Here is sample code for a forward iteration over the contents of a UText
+ *
+ * \code
+ *    UChar32  c;
+ *    UText    *ut = whatever();
+ *
+ *    for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) {
+ *       // do whatever with the codepoint c here.
+ *    }
+ * \endcode
+ *
+ * And here is similar code to iterate in the reverse direction, from the end
+ * of the text towards the beginning.
+ *
+ * \code
+ *    UChar32  c;
+ *    UText    *ut = whatever();
+ *    int      textLength = utext_nativeLength(ut);
+ *    for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) {
+ *       // do whatever with the codepoint c here.
+ *    }
+ * \endcode
+ *
+ * <em>Characters and Indexing</em>
+ *
+ * Indexing into text by UText functions is nearly always in terms of the native
+ * indexing of the underlying text storage.  The storage format could be UTF-8
+ * or UTF-32, for example.  When coding to the UText access API, no assumptions
+ * can be made regarding the size of characters, or how far an index
+ * may move when iterating between characters.
+ *
+ * All indices supplied to UText functions are pinned to the length of the
+ * text.  An out-of-bounds index is not considered to be an error, but is
+ * adjusted to be in the range  0 <= index <= length of input text.
+ *
+ *
+ * When an index position is returned from a UText function, it will be
+ * a native index to the underlying text.  In the case of multi-unit characters,
+ * it will  always refer to the first position of the character,
+ * never to the interior.  This is essentially the same thing as saying that
+ * a returned index will always point to a boundary between characters.
+ *
+ * When a native index is supplied to a UText function, all indices that
+ * refer to any part of a multi-unit character representation are considered
+ * to be equivalent.  In the case of multi-unit characters, an incoming index
+ * will be logically normalized to refer to the start of the character.
+ * 
+ * It is possible to test whether a native index is on a code point boundary
+ * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex().
+ * If the index is returned unchanged, it was on a code point boundary.  If
+ * an adjusted index is returned, the original index referred to the
+ * interior of a character.
+ *
+ * <em>Conventions for calling UText functions</em>
+ *
+ * Most UText access functions have as their first parameter a (UText *) pointer,
+ * which specifies the UText to be used.  Unless otherwise noted, the
+ * pointer must refer to a valid, open UText.  Attempting to
+ * use a closed UText or passing a NULL pointer is a programming error and
+ * will produce undefined results or NULL pointer exceptions.
+ * 
+ * The UText_Open family of functions can either open an existing (closed)
+ * UText, or heap allocate a new UText.  Here is sample code for creating
+ * a stack-allocated UText.
+ *
+ * \code
+ *    char     *s = whatever();  // A utf-8 string 
+ *    U_ErrorCode status = U_ZERO_ERROR;
+ *    UText    ut = UTEXT_INITIALIZER;
+ *    utext_openUTF8(ut, s, -1, &status);
+ *    if (U_FAILURE(status)) {
+ *        // error handling
+ *    } else {
+ *        // work with the UText
+ *    }
+ * \endcode
+ *
+ * Any existing UText passed to an open function _must_ have been initialized, 
+ * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated
+ * by an open function.  Passing NULL will cause the open function to
+ * heap-allocate and fully initialize a new UText.
+ *
+ */
+
+
+
+#include "unicode/utypes.h"
+#ifdef XP_CPLUSPLUS
+#include "unicode/rep.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#endif
+
+
+U_CDECL_BEGIN
+
+struct UText;
+typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */
+
+
+/***************************************************************************************
+ *
+ *   C Functions for creating UText wrappers around various kinds of text strings.
+ *
+ ****************************************************************************************/
+
+
+/**
+  * Close function for UText instances.
+  * Cleans up, releases any resources being held by an open UText.
+  * <p>
+  *   If the UText was originally allocated by one of the utext_open functions,
+  *   the storage associated with the utext will also be freed.
+  *   If the UText storage originated with the application, as it would with
+  *   a local or static instance, the storage will not be deleted.
+  *
+  *   An open UText can be reset to refer to new string by using one of the utext_open()
+  *   functions without first closing the UText.  
+  *
+  * @param ut  The UText to be closed.
+  * @return    NULL if the UText struct was deleted by the close.  If the UText struct
+  *            was originally provided by the caller to the open function, it is
+  *            returned by this function, and may be safely used again in
+  *            a subsequent utext_open.
+  *
+  * @stable ICU 3.4
+  */
+U_STABLE UText * U_EXPORT2
+utext_close(UText *ut);
+
+
+/**
+ * Open a read-only UText implementation for UTF-8 strings.
+ * 
+ * \htmlonly
+ * Any invalid UTF-8 in the input will be handled in this way:
+ * a sequence of bytes that has the form of a truncated, but otherwise valid,
+ * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. 
+ * Any other illegal bytes will each be replaced by a \uFFFD.
+ * \endhtmlonly
+ * 
+ * @param ut     Pointer to a UText struct.  If NULL, a new UText will be created.
+ *               If non-NULL, must refer to an initialized UText struct, which will then
+ *               be reset to reference the specified UTF-8 string.
+ * @param s      A UTF-8 string.  Must not be NULL.
+ * @param length The length of the UTF-8 string in bytes, or -1 if the string is
+ *               zero terminated.
+ * @param status Errors are returned here.
+ * @return       A pointer to the UText.  If a pre-allocated UText was provided, it
+ *               will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
+
+
+/**
+ * Open a read-only UText for UChar * string.
+ * 
+ * @param ut     Pointer to a UText struct.  If NULL, a new UText will be created.
+ *               If non-NULL, must refer to an initialized UText struct, which will then
+ *               be reset to reference the specified UChar string.
+ * @param s      A UChar (UTF-16) string
+ * @param length The number of UChars in the input string, or -1 if the string is
+ *               zero terminated.
+ * @param status Errors are returned here.
+ * @return       A pointer to the UText.  If a pre-allocated UText was provided, it
+ *               will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
+
+
+#ifdef XP_CPLUSPLUS
+/**
+ * Open a writable UText for a non-const UnicodeString. 
+ * 
+ * @param ut      Pointer to a UText struct.  If NULL, a new UText will be created.
+ *                 If non-NULL, must refer to an initialized UText struct, which will then
+ *                 be reset to reference the specified input string.
+ * @param s       A UnicodeString.
+ * @param status Errors are returned here.
+ * @return        Pointer to the UText.  If a UText was supplied as input, this
+ *                 will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
+
+
+/**
+ * Open a UText for a const UnicodeString.   The resulting UText will not be writable.
+ * 
+ * @param ut    Pointer to a UText struct.  If NULL, a new UText will be created.
+ *               If non-NULL, must refer to an initialized UText struct, which will then
+ *               be reset to reference the specified input string.
+ * @param s      A const UnicodeString to be wrapped.
+ * @param status Errors are returned here.
+ * @return       Pointer to the UText.  If a UText was supplied as input, this
+ *               will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
+
+
+/**
+ * Open a writable UText implementation for an ICU Replaceable object.
+ * @param ut    Pointer to a UText struct.  If NULL, a new UText will be created.
+ *               If non-NULL, must refer to an already existing UText, which will then
+ *               be reset to reference the specified replaceable text.
+ * @param rep    A Replaceable text object.
+ * @param status Errors are returned here.
+ * @return       Pointer to the UText.  If a UText was supplied as input, this
+ *               will always be used and returned.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorCode *status);
+
+/**
+ * Open a  UText implementation over an ICU CharacterIterator.
+ * @param ut    Pointer to a UText struct.  If NULL, a new UText will be created.
+ *               If non-NULL, must refer to an already existing UText, which will then
+ *               be reset to reference the specified replaceable text.
+ * @param ci     A Character Iterator.
+ * @param status Errors are returned here.
+ * @return       Pointer to the UText.  If a UText was supplied as input, this
+ *               will always be used and returned.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, UErrorCode *status);
+
+#endif
+
+
+/**
+  *  Clone a UText.  This is much like opening a UText where the source text is itself
+  *  another UText.
+  *
+  *  A deep clone will copy both the UText data structures and the underlying text.
+  *  The original and cloned UText will operate completely independently; modifications
+  *  made to the text in one will not affect the other.  Text providers are not
+  *  required to support deep clones.  The user of clone() must check the status return
+  *  and be prepared to handle failures.
+  *
+  *  The standard UText implementations for UTF8, UChar *, UnicodeString and
+  *  Replaceable all support deep cloning.
+  *
+  *  The UText returned from a deep clone will be writable, assuming that the text
+  *  provider is able to support writing, even if the source UText had been made
+  *  non-writable by means of UText_freeze().
+  *
+  *  A shallow clone replicates only the UText data structures; it does not make
+  *  a copy of the underlying text.  Shallow clones can be used as an efficient way to 
+  *  have multiple iterators active in a single text string that is not being
+  *  modified.
+  *
+  *  A shallow clone operation will not fail, barring truly exceptional conditions such
+  *  as memory allocation failures.
+  *
+  *  Shallow UText clones should be avoided if the UText functions that modify the
+  *  text are expected to be used, either on the original or the cloned UText.
+  *  Any such modifications  can cause unpredictable behavior.  Read Only
+  *  shallow clones provide some protection against errors of this type by
+  *  disabling text modification via the cloned UText.
+  *
+  *  A shallow clone made with the readOnly parameter == FALSE will preserve the 
+  *  utext_isWritable() state of the source object.  Note, however, that
+  *  write operations must be avoided while more than one UText exists that refer
+  *  to the same underlying text.
+  *
+  *  A UText and its clone may be safely concurrently accessed by separate threads.
+  *  This is true for read access only with shallow clones, and for both read and
+  *  write access with deep clones.
+  *  It is the responsibility of the Text Provider to ensure that this thread safety
+  *  constraint is met.
+  *
+  *  @param dest   A UText struct to be filled in with the result of the clone operation,
+  *                or NULL if the clone function should heap-allocate a new UText struct.
+  *                If non-NULL, must refer to an already existing UText, which will then
+  *                be reset to become the clone.
+  *  @param src    The UText to be cloned.
+  *  @param deep   TRUE to request a deep clone, FALSE for a shallow clone.
+  *  @param readOnly TRUE to request that the cloned UText have read only access to the 
+  *                underlying text.  
+
+  *  @param status Errors are returned here.  For deep clones, U_UNSUPPORTED_ERROR
+  *                will be returned if the text provider is unable to clone the
+  *                original text.
+  *  @return       The newly created clone, or NULL if the clone operation failed.
+  *  @stable ICU 3.4
+  */
+U_STABLE UText * U_EXPORT2
+utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
+
+
+/**
+  *  Compare two UText objects for equality.
+  *  UTexts are equal if they are iterating over the same text, and
+  *    have the same iteration position within the text.
+  *    If either or both of the parameters are NULL, the comparison is FALSE.
+  *
+  *  @param a   The first of the two UTexts to compare.
+  *  @param b   The other UText to be compared.
+  *  @return    TRUE if the two UTexts are equal.
+  *  @stable ICU 3.6
+  */
+U_STABLE UBool U_EXPORT2
+utext_equals(const UText *a, const UText *b);
+
+
+/*****************************************************************************
+ *
+ *   Functions to work with the text represeted by a UText wrapper
+ *
+ *****************************************************************************/
+
+/**
+  * Get the length of the text.  Depending on the characteristics
+  * of the underlying text representation, this may be expensive.  
+  * @see  utext_isLengthExpensive()
+  *
+  *
+  * @param ut  the text to be accessed.
+  * @return the length of the text, expressed in native units.
+  *
+  * @stable ICU 3.4
+  */
+U_STABLE int64_t U_EXPORT2
+utext_nativeLength(UText *ut);
+
+/**
+ *  Return TRUE if calculating the length of the text could be expensive.
+ *  Finding the length of NUL terminated strings is considered to be expensive.
+ *
+ *  Note that the value of this function may change
+ *  as the result of other operations on a UText.
+ *  Once the length of a string has been discovered, it will no longer
+ *  be expensive to report it.
+ *
+ * @param ut the text to be accessed.
+ * @return TRUE if determining the length of the text could be time consuming.
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+utext_isLengthExpensive(const UText *ut);
+
+/**
+ * Returns the code point at the requested index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ *
+ * If the specified index points to the interior of a multi-unit
+ * character - one of the trail bytes of a UTF-8 sequence, for example -
+ * the complete code point will be returned.
+ *
+ * The iteration position will be set to the start of the returned code point.
+ *
+ * This function is roughly equivalent to the the sequence
+ *    utext_setNativeIndex(index);
+ *    utext_current32();
+ * (There is a subtle difference if the index is out of bounds by being less than zero - 
+ * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current()
+ * will return the char at zero.  utext_char32At(negative index), on the other hand, will
+ * return the U_SENTINEL value of -1.)
+ * 
+ * @param ut the text to be accessed
+ * @param nativeIndex the native index of the character to be accessed.  If the index points
+ *        to other than the first unit of a multi-unit character, it will be adjusted
+ *        to the start of the character.
+ * @return the code point at the specified index.
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_char32At(UText *ut, int64_t nativeIndex);
+
+
+/**
+ *
+ * Get the code point at the current iteration position,
+ * or U_SENTINEL (-1) if the iteration has reached the end of
+ * the input text.
+ *
+ * @param ut the text to be accessed.
+ * @return the Unicode code point at the current iterator position.
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_current32(UText *ut);
+
+
+/**
+ * Get the code point at the current iteration position of the UText, and
+ * advance the position to the first index following the character.
+ *
+ * If the position is at the end of the text (the index following
+ * the last character, which is also the length of the text), 
+ * return U_SENTINEL (-1) and do not advance the index. 
+ *
+ * This is a post-increment operation.
+ *
+ * An inline macro version of this function, UTEXT_NEXT32(), 
+ * is available for performance critical use.
+ *
+ * @param ut the text to be accessed.
+ * @return the Unicode code point at the iteration position.
+ * @see UTEXT_NEXT32
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_next32(UText *ut);
+
+
+/**
+ *  Move the iterator position to the character (code point) whose
+ *  index precedes the current position, and return that character.
+ *  This is a pre-decrement operation.
+ *
+ *  If the initial position is at the start of the text (index of 0) 
+ *  return U_SENTINEL (-1), and leave the position unchanged.
+ *
+ *  An inline macro version of this function, UTEXT_PREVIOUS32(), 
+ *  is available for performance critical use.
+ *
+ *  @param ut the text to be accessed.
+ *  @return the previous UChar32 code point, or U_SENTINEL (-1) 
+ *          if the iteration has reached the start of the text.
+ *  @see UTEXT_PREVIOUS32
+ *  @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_previous32(UText *ut);
+
+
+/**
+  * Set the iteration index and return the code point at that index. 
+  * Leave the iteration index at the start of the following code point.
+  *
+  * This function is the most efficient and convenient way to
+  * begin a forward iteration.  The results are identical to the those
+  * from the sequence
+  * \code
+  *    utext_setIndex();
+  *    utext_next32();
+  * \endcode
+  *
+  *  @param ut the text to be accessed.
+  *  @param nativeIndex Iteration index, in the native units of the text provider.
+  *  @return Code point which starts at or before index,
+  *         or U_SENTINEL (-1) if it is out of bounds.
+  * @stable ICU 3.4
+  */
+U_STABLE UChar32 U_EXPORT2
+utext_next32From(UText *ut, int64_t nativeIndex);
+
+
+
+/**
+  * Set the iteration index, and return the code point preceding the
+  * one specified by the initial index.  Leave the iteration position
+  * at the start of the returned code point.
+  *
+  * This function is the most efficient and convenient way to
+  * begin a backwards iteration.
+  *
+  * @param ut the text to be accessed.
+  * @param nativeIndex Iteration index in the native units of the text provider.
+  * @return Code point preceding the one at the initial index,
+  *         or U_SENTINEL (-1) if it is out of bounds.
+  *
+  * @stable ICU 3.4
+  */
+U_STABLE UChar32 U_EXPORT2
+utext_previous32From(UText *ut, int64_t nativeIndex);
+
+/**
+  * Get the current iterator position, which can range from 0 to 
+  * the length of the text.
+  * The position is a native index into the input text, in whatever format it
+  * may have (possibly UTF-8 for example), and may not always be the same as
+  * the corresponding UChar (UTF-16) index.
+  * The returned position will always be aligned to a code point boundary. 
+  *
+  * @param ut the text to be accessed.
+  * @return the current index position, in the native units of the text provider.
+  * @stable ICU 3.4
+  */
+U_STABLE int64_t U_EXPORT2
+utext_getNativeIndex(const UText *ut);
+
+/**
+ * Set the current iteration position to the nearest code point
+ * boundary at or preceding the specified index.
+ * The index is in the native units of the original input text.
+ * If the index is out of range, it will be pinned to be within
+ * the range of the input text.
+ * <p>
+ * It will usually be more efficient to begin an iteration
+ * using the functions utext_next32From() or utext_previous32From()
+ * rather than setIndex().
+ * <p>
+ * Moving the index position to an adjacent character is best done
+ * with utext_next32(), utext_previous32() or utext_moveIndex32().
+ * Attempting to do direct arithmetic on the index position is
+ * complicated by the fact that the size (in native units) of a
+ * character depends on the underlying representation of the character
+ * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not
+ * easily knowable.
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex the native unit index of the new iteration position.
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+utext_setNativeIndex(UText *ut, int64_t nativeIndex);
+
+/**
+ * Move the iterator postion by delta code points.  The number of code points
+ * is a signed number; a negative delta will move the iterator backwards,
+ * towards the start of the text.
+ * <p>
+ * The index is moved by <code>delta</code> code points
+ * forward or backward, but no further backward than to 0 and
+ * no further forward than to utext_nativeLength().
+ * The resulting index value will be in between 0 and length, inclusive.
+ *
+ * @param ut the text to be accessed.
+ * @param delta the signed number of code points to move the iteration position.
+ * @return TRUE if the position could be moved the requested number of positions while
+ *              staying within the range [0 - text length].
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+utext_moveIndex32(UText *ut, int32_t delta);
+
+/**
+ * Get the native index of the character preceeding the current position.
+ * If the iteration position is already at the start of the text, zero
+ * is returned.
+ * The value returned is the same as that obtained from the following sequence,
+ * but without the side effect of changing the iteration position.
+ *   
+ * \code
+ *    UText  *ut = whatever;
+ *      ...
+ *    utext_previous(ut)
+ *    utext_getNativeIndex(ut);
+ * \endcode
+ *
+ * This function is most useful during forwards iteration, where it will get the
+ *   native index of the character most recently returned from utext_next().
+ *
+ * @param ut the text to be accessed
+ * @return the native index of the character preceeding the current index position,
+ *         or zero if the current position is at the start of the text.
+ * @stable ICU 3.6
+ */
+U_STABLE int64_t U_EXPORT2
+utext_getPreviousNativeIndex(UText *ut); 
+
+
+/**
+ *
+ * Extract text from a UText into a UChar buffer.  The range of text to be extracted
+ * is specified in the native indices of the UText provider.  These may not necessarily
+ * be UTF-16 indices.
+ * <p>
+ * The size (number of 16 bit UChars) of the data to be extracted is returned.  The
+ * full number of UChars is returned, even when the extracted text is truncated
+ * because the specified buffer size is too small.
+ * <p>
+ * The extracted string will (if you are a user) / must (if you are a text provider)
+ * be NUL-terminated if there is sufficient space in the destination buffer.  This
+ * terminating NUL is not included in the returned length.
+ * <p>
+ * The iteration index is left at the position following the last extracted character.
+ *
+ * @param  ut    the UText from which to extract data.
+ * @param  nativeStart the native index of the first character to extract.\
+ *               If the specified index is out of range,
+ *               it will be pinned to to be within 0 <= index <= textLength
+ * @param  nativeLimit the native string index of the position following the last
+ *               character to extract.  If the specified index is out of range,
+ *               it will be pinned to to be within 0 <= index <= textLength.
+ *               nativeLimit must be >= nativeStart.
+ * @param  dest  the UChar (UTF-16) buffer into which the extracted text is placed
+ * @param  destCapacity  The size, in UChars, of the destination buffer.  May be zero
+ *               for precomputing the required size.
+ * @param  status receives any error status.
+ *         U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the 
+ *         buffer was too small.  Returns number of UChars for preflighting.
+ * @return Number of UChars in the data to be extracted.  Does not include a trailing NUL.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+utext_extract(UText *ut,
+             int64_t nativeStart, int64_t nativeLimit,
+             UChar *dest, int32_t destCapacity,
+             UErrorCode *status);
+
+
+/************************************************************************************
+ *
+ *  #define inline versions of selected performance-critical text access functions
+ *          Caution:  do not use auto increment++ or decrement-- expressions
+ *                    as parameters to these macros.
+ *
+ *          For most use, where there is no extreme performance constraint, the
+ *          normal, non-inline functions are a better choice.  The resulting code
+ *          will be smaller, and, if the need ever arises, easier to debug.
+ *
+ *          These are implemented as #defines rather than real functions
+ *          because there is no fully portable way to do inline functions in plain C.
+ *
+ ************************************************************************************/
+
+/**
+ * inline version of utext_next32(), for performance-critical situations.
+ *
+ * Get the code point at the current iteration position of the UText, and
+ * advance the position to the first index following the character.
+ * This is a post-increment operation.
+ * Returns U_SENTINEL (-1) if the position is at the end of the
+ * text.
+ *
+ * @stable ICU 3.4
+ */
+#define UTEXT_NEXT32(ut)  \
+    ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
+    ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
+
+/**
+ * inline version of utext_previous32(), for performance-critical situations.
+ *
+ *  Move the iterator position to the character (code point) whose
+ *  index precedes the current position, and return that character.
+ *  This is a pre-decrement operation.
+ *  Returns U_SENTINEL (-1) if the position is at the start of the  text.
+ *
+ * @stable ICU 3.4
+ */
+#define UTEXT_PREVIOUS32(ut)  \
+    ((ut)->chunkOffset > 0 && \
+     (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
+          (ut)->chunkContents[--((ut)->chunkOffset)]  :  utext_previous32(ut))
+
+/**
+  *  inline version of utext_getNativeIndex(), for performance-critical situations.
+  *
+  * Get the current iterator position, which can range from 0 to 
+  * the length of the text.
+  * The position is a native index into the input text, in whatever format it
+  * may have (possibly UTF-8 for example), and may not always be the same as
+  * the corresponding UChar (UTF-16) index.
+  * The returned position will always be aligned to a code point boundary. 
+  *
+  * @stable ICU 3.6
+  */
+#define UTEXT_GETNATIVEINDEX(ut)                       \
+    ((ut)->chunkOffset <= (ut)->nativeIndexingLimit?   \
+        (ut)->chunkNativeStart+(ut)->chunkOffset :     \
+        (ut)->pFuncs->mapOffsetToNative(ut))    
+
+/**
+  *  inline version of utext_setNativeIndex(), for performance-critical situations.
+  *
+  * Set the current iteration position to the nearest code point
+  * boundary at or preceding the specified index.
+  * The index is in the native units of the original input text.
+  * If the index is out of range, it will be pinned to be within
+  * the range of the input text.
+  *
+  * @stable ICU 4.0
+  */
+#define UTEXT_SETNATIVEINDEX(ut, ix)                       \
+    { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
+      if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
+          (ut)->chunkOffset=(int32_t)__offset; \
+      } else { \
+          utext_setNativeIndex((ut), (ix)); } }
+
+
+
+/************************************************************************************
+ *
+ *   Functions related to writing or modifying the text.
+ *   These will work only with modifiable UTexts.  Attempting to
+ *   modify a read-only UText will return an error status.
+ *
+ ************************************************************************************/
+
+
+/**
+ *  Return TRUE if the text can be written (modified) with utext_replace() or
+ *  utext_copy().  For the text to be writable, the text provider must
+ *  be of a type that supports writing and the UText must not be frozen.
+ *
+ *  Attempting to modify text when utext_isWriteable() is FALSE will fail -
+ *  the text will not be modified, and an error will be returned from the function
+ *  that attempted the modification.
+ *
+ * @param  ut   the UText to be tested.
+ * @return TRUE if the text is modifiable.
+ *
+ * @see    utext_freeze()
+ * @see    utext_replace()
+ * @see    utext_copy()
+ * @stable ICU 3.4
+ *
+ */
+U_STABLE UBool U_EXPORT2
+utext_isWritable(const UText *ut);
+
+
+/**
+  * Test whether there is meta data associated with the text.
+  * @see Replaceable::hasMetaData()
+  *
+  * @param ut The UText to be tested
+  * @return TRUE if the underlying text includes meta data.
+  * @stable ICU 3.4
+  */
+U_STABLE UBool U_EXPORT2
+utext_hasMetaData(const UText *ut);
+
+
+/**
+ * Replace a range of the original text with a replacement text.
+ *
+ * Leaves the current iteration position at the position following the
+ *  newly inserted replacement text.
+ *
+ * This function is only available on UText types that support writing,
+ * that is, ones where utext_isWritable() returns TRUE.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string.  Behavior after a replace operation
+ * on a UText is undefined for any other additional UTexts that refer to the
+ * modified string.
+ *
+ * @param ut               the UText representing the text to be operated on.
+ * @param nativeStart      the native index of the start of the region to be replaced
+ * @param nativeLimit      the native index of the character following the region to be replaced.
+ * @param replacementText  pointer to the replacement text
+ * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated.
+ * @param status           receives any error status.  Possible errors include
+ *                         U_NO_WRITE_PERMISSION
+ *
+ * @return The signed number of (native) storage units by which
+ *         the length of the text expanded or contracted.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+utext_replace(UText *ut,
+             int64_t nativeStart, int64_t nativeLimit,
+             const UChar *replacementText, int32_t replacementLength,
+             UErrorCode *status);
+
+
+
+/**
+ *
+ * Copy or move a substring from one position to another within the text,
+ * while retaining any metadata associated with the text.
+ * This function is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * The text to be copied or moved is inserted at destIndex;
+ * it does not replace or overwrite any existing text.
+ *
+ * The iteration position is left following the newly inserted text
+ * at the destination position.
+ *
+ * This function is only available on UText types that support writing,
+ * that is, ones where utext_isWritable() returns TRUE.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string.  Behavior after a copy operation
+ * on a UText is undefined in any other additional UTexts that refer to the
+ * modified string.
+ *
+ * @param ut           The UText representing the text to be operated on.
+ * @param nativeStart  The native index of the start of the region to be copied or moved
+ * @param nativeLimit  The native index of the character position following the region
+ *                     to be copied.
+ * @param destIndex    The native destination index to which the source substring is
+ *                     copied or moved.
+ * @param move         If TRUE, then the substring is moved, not copied/duplicated.
+ * @param status       receives any error status.  Possible errors include U_NO_WRITE_PERMISSION
+ *                       
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+utext_copy(UText *ut,
+          int64_t nativeStart, int64_t nativeLimit,
+          int64_t destIndex,
+          UBool move,
+          UErrorCode *status);
+
+
+/**
+  *  <p>
+  *  Freeze a UText.  This prevents any modification to the underlying text itself
+  *  by means of functions operating on this UText.
+  *  </p>
+  *  <p>
+  *  Once frozen, a UText can not be unfrozen.  The intent is to ensure
+  *  that a the text underlying a frozen UText wrapper cannot be modified via that UText.
+  *  </p>
+  *  <p>
+  *  Caution:  freezing a UText will disable changes made via the specific
+  *   frozen UText wrapper only; it will not have any effect on the ability to
+  *   directly modify the text by bypassing the UText.  Any such backdoor modifications
+  *   are always an error while UText access is occuring because the underlying
+  *   text can get out of sync with UText's buffering.
+  *  </p>
+  *
+  *  @param ut  The UText to be frozen.
+  *  @see   utext_isWritable()
+  *  @stable ICU 3.6
+  */
+U_STABLE void U_EXPORT2
+utext_freeze(UText *ut);
+
+
+/**
+ * UText provider properties (bit field indexes).
+ *
+ * @see UText
+ * @stable ICU 3.4
+ */
+enum {
+    /**
+     * It is potentially time consuming for the provider to determine the length of the text.
+     * @stable ICU 3.4
+     */
+    UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1,
+    /**
+     * Text chunks remain valid and usable until the text object is modified or
+     * deleted, not just until the next time the access() function is called
+     * (which is the default).
+     * @stable ICU 3.4
+     */
+    UTEXT_PROVIDER_STABLE_CHUNKS = 2,
+    /**
+     * The provider supports modifying the text via the replace() and copy()
+     * functions.
+     * @see Replaceable
+     * @stable ICU 3.4
+     */
+    UTEXT_PROVIDER_WRITABLE = 3,
+    /**
+     * There is meta data associated with the text.
+     * @see Replaceable::hasMetaData()
+     * @stable ICU 3.4
+     */ 
+    UTEXT_PROVIDER_HAS_META_DATA = 4,
+    /**
+     * Text provider owns the text storage.
+     *  Generally occurs as the result of a deep clone of the UText.
+     *  When closing the UText, the associated text must
+     *  also be closed/deleted/freed/ whatever is appropriate.
+     * @stable ICU 3.6
+     */
+     UTEXT_PROVIDER_OWNS_TEXT = 5
+};
+
+/**
+  * Function type declaration for UText.clone().
+  *
+  *  clone a UText.  Much like opening a UText where the source text is itself
+  *  another UText.
+  *
+  *  A deep clone will copy both the UText data structures and the underlying text.
+  *  The original and cloned UText will operate completely independently; modifications
+  *  made to the text in one will not effect the other.  Text providers are not
+  *  required to support deep clones.  The user of clone() must check the status return
+  *  and be prepared to handle failures.
+  *
+  *  A shallow clone replicates only the UText data structures; it does not make
+  *  a copy of the underlying text.  Shallow clones can be used as an efficient way to 
+  *  have multiple iterators active in a single text string that is not being
+  *  modified.
+  *
+  *  A shallow clone operation must not fail except for truly exceptional conditions such
+  *  as memory allocation failures.
+  *
+  *  A UText and its clone may be safely concurrently accessed by separate threads.
+  *  This is true for both shallow and deep clones.
+  *  It is the responsibility of the Text Provider to ensure that this thread safety
+  *  constraint is met.
+
+  *
+  *  @param dest   A UText struct to be filled in with the result of the clone operation,
+  *                or NULL if the clone function should heap-allocate a new UText struct.
+  *  @param src    The UText to be cloned.
+  *  @param deep   TRUE to request a deep clone, FALSE for a shallow clone.
+  *  @param status Errors are returned here.  For deep clones, U_UNSUPPORTED_ERROR
+  *                should be returned if the text provider is unable to clone the
+  *                original text.
+  *  @return       The newly created clone, or NULL if the clone operation failed.
+  *
+  * @stable ICU 3.4
+  */
+typedef UText * U_CALLCONV
+UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
+
+
+/**
+ * Function type declaration for UText.nativeLength().
+ *
+ * @param ut the UText to get the length of.
+ * @return the length, in the native units of the original text string.
+ * @see UText
+ * @stable ICU 3.4
+ */
+typedef int64_t U_CALLCONV
+UTextNativeLength(UText *ut);
+
+/**
+ * Function type declaration for UText.access().  Get the description of the text chunk
+ *  containing the text at a requested native index.  The UText's iteration
+ *  position will be left at the requested index.  If the index is out
+ *  of bounds, the iteration position will be left at the start or end
+ *  of the string, as appropriate.
+ *
+ *  Chunks must begin and end on code point boundaries.  A single code point
+ *  comprised of multiple storage units must never span a chunk boundary.
+ *
+ *
+ * @param ut          the UText being accessed.
+ * @param nativeIndex Requested index of the text to be accessed.
+ * @param forward     If TRUE, then the returned chunk must contain text
+ *                    starting from the index, so that start<=index<limit.
+ *                    If FALSE, then the returned chunk must contain text
+ *                    before the index, so that start<index<=limit.
+ * @return            True if the requested index could be accessed.  The chunk
+ *                    will contain the requested text.
+ *                    False value if a chunk cannot be accessed
+ *                    (the requested index is out of bounds).
+ *
+ * @see UText
+ * @stable ICU 3.4
+ */
+typedef UBool U_CALLCONV
+UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
+
+/**
+ * Function type declaration for UText.extract().
+ *
+ * Extract text from a UText into a UChar buffer.  The range of text to be extracted
+ * is specified in the native indices of the UText provider.  These may not necessarily
+ * be UTF-16 indices.
+ * <p>
+ * The size (number of 16 bit UChars) in the data to be extracted is returned.  The
+ * full amount is returned, even when the specified buffer size is smaller.
+ * <p>
+ * The extracted string will (if you are a user) / must (if you are a text provider)
+ * be NUL-terminated if there is sufficient space in the destination buffer.
+ *
+ * @param  ut            the UText from which to extract data.
+ * @param  nativeStart   the native index of the first characer to extract.
+ * @param  nativeLimit   the native string index of the position following the last
+ *                       character to extract.
+ * @param  dest          the UChar (UTF-16) buffer into which the extracted text is placed
+ * @param  destCapacity  The size, in UChars, of the destination buffer.  May be zero
+ *                       for precomputing the required size.
+ * @param  status        receives any error status.
+ *                       If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
+ *                       preflighting.
+ * @return Number of UChars in the data.  Does not include a trailing NUL.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextExtract(UText *ut,
+             int64_t nativeStart, int64_t nativeLimit,
+             UChar *dest, int32_t destCapacity,
+             UErrorCode *status);
+
+/**
+ * Function type declaration for UText.replace().
+ *
+ * Replace a range of the original text with a replacement text.
+ *
+ * Leaves the current iteration position at the position following the
+ *  newly inserted replacement text.
+ *
+ * This function need only be implemented on UText types that support writing.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string.  The function is responsible for updating the
+ * text chunk within the UText to reflect the updated iteration position,
+ * taking into account any changes to the underlying string's structure caused
+ * by the replace operation.
+ *
+ * @param ut               the UText representing the text to be operated on.
+ * @param nativeStart      the index of the start of the region to be replaced
+ * @param nativeLimit      the index of the character following the region to be replaced.
+ * @param replacementText  pointer to the replacement text
+ * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
+ * @param status           receives any error status.  Possible errors include
+ *                         U_NO_WRITE_PERMISSION
+ *
+ * @return The signed number of (native) storage units by which
+ *         the length of the text expanded or contracted.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextReplace(UText *ut,
+             int64_t nativeStart, int64_t nativeLimit,
+             const UChar *replacementText, int32_t replacmentLength,
+             UErrorCode *status);
+
+/**
+ * Function type declaration for UText.copy().
+ *
+ * Copy or move a substring from one position to another within the text,
+ * while retaining any metadata associated with the text.
+ * This function is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * The text to be copied or moved is inserted at destIndex;
+ * it does not replace or overwrite any existing text.
+ *
+ * This function need only be implemented for UText types that support writing.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string.  The function is responsible for updating the
+ * text chunk within the UText to reflect the updated iteration position,
+ * taking into account any changes to the underlying string's structure caused
+ * by the replace operation.
+ *
+ * @param ut           The UText representing the text to be operated on.
+ * @param nativeStart  The index of the start of the region to be copied or moved
+ * @param nativeLimit  The index of the character following the region to be replaced.
+ * @param nativeDest   The destination index to which the source substring is copied or moved.
+ * @param move         If TRUE, then the substring is moved, not copied/duplicated.
+ * @param status       receives any error status.  Possible errors include U_NO_WRITE_PERMISSION
+ *
+ * @stable ICU 3.4
+ */
+typedef void U_CALLCONV
+UTextCopy(UText *ut,
+          int64_t nativeStart, int64_t nativeLimit,
+          int64_t nativeDest,
+          UBool move,
+          UErrorCode *status);
+
+/**
+ * Function type declaration for UText.mapOffsetToNative().
+ * Map from the current UChar offset within the current text chunk to
+ *  the corresponding native index in the original source text.
+ *
+ * This is required only for text providers that do not use native UTF-16 indexes.
+ *
+ * @param ut     the UText.
+ * @return Absolute (native) index corresponding to chunkOffset in the current chunk.
+ *         The returned native index should always be to a code point boundary.
+ *
+ * @stable ICU 3.4
+ */
+typedef int64_t U_CALLCONV
+UTextMapOffsetToNative(const UText *ut);
+
+/**
+ * Function type declaration for UText.mapIndexToUTF16().
+ * Map from a native index to a UChar offset within a text chunk.
+ * Behavior is undefined if the native index does not fall within the
+ *   current chunk.
+ *
+ * This function is required only for text providers that do not use native UTF-16 indexes.
+ *
+ * @param ut          The UText containing the text chunk.
+ * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
+ * @return            Chunk-relative UTF-16 offset corresponding to the specified native
+ *                    index.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
+
+
+/**
+ * Function type declaration for UText.utextClose().
+ *
+ * A Text Provider close function is only required for provider types that make
+ *  allocations in their open function (or other functions) that must be 
+ *  cleaned when the UText is closed.
+ *
+ * The allocation of the UText struct itself and any "extra" storage
+ * associated with the UText is handled by the common UText implementation
+ * and does not require provider specific cleanup in a close function.
+ *
+ * Most UText provider implementations do not need to implement this function.
+ *
+ * @param ut A UText object to be closed.
+ *
+ * @stable ICU 3.4
+ */
+typedef void U_CALLCONV
+UTextClose(UText *ut);
+
+
+/**
+  *   (public)  Function dispatch table for UText.
+  *             Conceptually very much like a C++ Virtual Function Table.
+  *             This struct defines the organization of the table.
+  *             Each text provider implementation must provide an
+  *              actual table that is initialized with the appropriate functions
+  *              for the type of text being handled.
+  *   @stable ICU 3.6
+  */
+struct UTextFuncs {
+    /**
+     *   (public)  Function table size, sizeof(UTextFuncs)
+     *             Intended for use should the table grow to accomodate added
+     *             functions in the future, to allow tests for older format
+     *             function tables that do not contain the extensions.
+     *
+     *             Fields are placed for optimal alignment on
+     *             32/64/128-bit-pointer machines, by normally grouping together
+     *             4 32-bit fields,
+     *             4 pointers,
+     *             2 64-bit fields
+     *             in sequence.
+     *   @stable ICU 3.6
+     */
+    int32_t       tableSize;
+
+    /**
+      *   (private)  Alignment padding.
+      *              Do not use, reserved for use by the UText framework only.
+      *   @internal
+      */
+    int32_t       reserved1, /** @internal */ reserved2, /** @internal */ reserved3;
+
+
+    /**
+     * (public) Function pointer for UTextClone
+     *
+     * @see UTextClone
+     * @stable ICU 3.6
+     */
+    UTextClone *clone;
+
+    /**
+     * (public) function pointer for UTextLength
+     * May be expensive to compute!
+     *
+     * @see UTextLength
+     * @stable ICU 3.6
+     */
+    UTextNativeLength *nativeLength;
+
+    /**
+     * (public) Function pointer for UTextAccess.
+     *
+     * @see UTextAccess
+     * @stable ICU 3.6
+     */
+    UTextAccess *access;
+
+    /**
+     * (public) Function pointer for UTextExtract.
+     *
+     * @see UTextExtract
+     * @stable ICU 3.6
+     */
+    UTextExtract *extract;
+
+    /**
+     * (public) Function pointer for UTextReplace.
+     *
+     * @see UTextReplace
+     * @stable ICU 3.6
+     */
+    UTextReplace *replace;
+
+    /**
+     * (public) Function pointer for UTextCopy.
+     *
+     * @see UTextCopy
+     * @stable ICU 3.6
+     */
+    UTextCopy *copy;
+
+    /**
+     * (public) Function pointer for UTextMapOffsetToNative.
+     *
+     * @see UTextMapOffsetToNative
+     * @stable ICU 3.6
+     */
+    UTextMapOffsetToNative *mapOffsetToNative;
+
+    /**
+     * (public) Function pointer for UTextMapNativeIndexToUTF16.
+     *
+     * @see UTextMapNativeIndexToUTF16
+     * @stable ICU 3.6
+     */
+    UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16;
+
+    /**
+     * (public) Function pointer for UTextClose.
+      *
+      * @see UTextClose
+      * @stable ICU 3.6
+      */
+    UTextClose  *close;
+
+    /**
+      * (private)  Spare function pointer
+      * @internal
+      */
+
+    UTextClose  *spare1;
+    /**
+      * (private)  Spare function pointer
+      * @internal
+      */
+    UTextClose  *spare2;
+
+    /**
+      * (private)  Spare function pointer
+      * @internal
+      */
+    UTextClose  *spare3;
+
+};
+/**
+ * Function dispatch table for UText
+ * @see UTextFuncs
+ */
+typedef struct UTextFuncs UTextFuncs;
+
+ /**
+  *   UText struct.  Provides the interface between the generic UText access code
+  *                  and the UText provider code that works on specific kinds of
+  *                  text  (UTF-8, noncontiguous UTF-16, whatever.)
+  *
+  *                  Applications that are using predefined types of text providers
+  *                  to pass text data to ICU services will have no need to view the
+  *                  internals of the UText structs that they open.
+  *
+  * @stable ICU 3.6
+  */
+struct UText {
+    /**
+     *     (private)  Magic.  Used to help detect when UText functions are handed
+     *                        invalid or unitialized UText structs.
+     *                        utext_openXYZ() functions take an initialized,
+     *                        but not necessarily open, UText struct as an
+     *                        optional fill-in parameter.  This magic field
+     *                        is used to check for that initialization.
+     *                        Text provider close functions must NOT clear
+     *                        the magic field because that would prevent
+     *                        reuse of the UText struct.
+     * @internal
+     */
+    uint32_t       magic;
+
+
+    /**
+     *     (private)  Flags for managing the allocation and freeing of
+     *                memory associated with this UText.
+     * @internal
+     */
+    int32_t        flags;
+
+
+    /**
+      *  Text provider properties.  This set of flags is maintainted by the
+      *                             text provider implementation.
+      *  @stable ICU 3.4
+      */
+    int32_t         providerProperties;
+
+    /**
+     * (public) sizeOfStruct=sizeof(UText)
+     * Allows possible backward compatible extension.
+     *
+     * @stable ICU 3.4
+     */
+    int32_t         sizeOfStruct;
+    
+    /* ------ 16 byte alignment boundary -----------  */
+    
+
+    /**
+      *  (protected) Native index of the first character position following
+      *              the current chunk.
+      *  @stable ICU 3.6
+      */
+    int64_t         chunkNativeLimit;
+
+    /**
+     *   (protected)  Size in bytes of the extra space (pExtra).
+     *  @stable ICU 3.4
+     */
+    int32_t        extraSize;
+
+    /**
+      *    (protected) The highest chunk offset where native indexing and
+      *    chunk (UTF-16) indexing correspond.  For UTF-16 sources, value
+      *    will be equal to chunkLength.
+      *
+      *    @stable ICU 3.6
+      */
+    int32_t         nativeIndexingLimit;
+
+    /* ---- 16 byte alignment boundary------ */
+    
+    /**
+     *  (protected) Native index of the first character in the text chunk.
+     *  @stable ICU 3.6
+     */
+    int64_t         chunkNativeStart;
+
+    /**
+     *  (protected) Current iteration position within the text chunk (UTF-16 buffer).
+     *  This is the index to the character that will be returned by utext_next32().
+     *  @stable ICU 3.6
+     */
+    int32_t         chunkOffset;
+
+    /**
+     *  (protected) Length the text chunk (UTF-16 buffer), in UChars.
+     *  @stable ICU 3.6
+     */
+    int32_t         chunkLength;
+
+    /* ---- 16  byte alignment boundary-- */
+    
+
+    /**
+     *  (protected)  pointer to a chunk of text in UTF-16 format.
+     *  May refer either to original storage of the source of the text, or
+     *  if conversion was required, to a buffer owned by the UText.
+     *  @stable ICU 3.6
+     */
+    const UChar    *chunkContents;
+
+     /**
+      * (public)     Pointer to Dispatch table for accessing functions for this UText.
+      * @stable ICU 3.6
+      */
+    const UTextFuncs     *pFuncs;
+
+    /**
+     *  (protected)  Pointer to additional space requested by the
+     *               text provider during the utext_open operation.
+     * @stable ICU 3.4
+     */
+    void          *pExtra;
+
+    /**
+     * (protected) Pointer to string or text-containin object or similar.
+     * This is the source of the text that this UText is wrapping, in a format
+     *  that is known to the text provider functions.
+     * @stable ICU 3.4
+     */
+    const void   *context;
+
+    /* --- 16 byte alignment boundary--- */
+
+    /**
+     * (protected) Pointer fields available for use by the text provider.
+     * Not used by UText common code.
+     * @stable ICU 3.6
+     */
+    const void     *p; 
+    /**
+     * (protected) Pointer fields available for use by the text provider.
+     * Not used by UText common code.
+     * @stable ICU 3.6
+     */
+    const void     *q;
+     /**
+     * (protected) Pointer fields available for use by the text provider.
+     * Not used by UText common code.
+     * @stable ICU 3.6
+      */
+    const void     *r;
+
+    /**
+      *  Private field reserved for future use by the UText framework
+      *     itself.  This is not to be touched by the text providers.
+      * @internal ICU 3.4
+      */
+    void           *privP;
+
+
+    /* --- 16 byte alignment boundary--- */
+    
+
+    /**
+      * (protected) Integer field reserved for use by the text provider.
+      * Not used by the UText framework, or by the client (user) of the UText.
+      * @stable ICU 3.4
+      */
+    int64_t         a;
+
+    /**
+      * (protected) Integer field reserved for use by the text provider.
+      * Not used by the UText framework, or by the client (user) of the UText.
+      * @stable ICU 3.4
+      */
+    int32_t         b;
+
+    /**
+      * (protected) Integer field reserved for use by the text provider.
+      * Not used by the UText framework, or by the client (user) of the UText.
+      * @stable ICU 3.4
+      */
+    int32_t         c;
+
+    /*  ---- 16 byte alignment boundary---- */
+
+
+    /**
+      *  Private field reserved for future use by the UText framework
+      *     itself.  This is not to be touched by the text providers.
+      * @internal ICU 3.4
+      */
+    int64_t         privA;
+    /**
+      *  Private field reserved for future use by the UText framework
+      *     itself.  This is not to be touched by the text providers.
+      * @internal ICU 3.4
+      */
+    int32_t         privB;
+    /**
+      *  Private field reserved for future use by the UText framework
+      *     itself.  This is not to be touched by the text providers.
+      * @internal ICU 3.4
+      */
+    int32_t         privC;
+};
+
+
+/**
+ *  Common function for use by Text Provider implementations to allocate and/or initialize
+ *  a new UText struct.  To be called in the implementation of utext_open() functions.
+ *  If the supplied UText parameter is null, a new UText struct will be allocated on the heap.
+ *  If the supplied UText is already open, the provider's close function will be called
+ *  so that the struct can be reused by the open that is in progress.
+ *
+ * @param ut   pointer to a UText struct to be re-used, or null if a new UText
+ *             should be allocated.
+ * @param extraSpace The amount of additional space to be allocated as part
+ *             of this UText, for use by types of providers that require
+ *             additional storage.
+ * @param status Errors are returned here.
+ * @return pointer to the UText, allocated if necessary, with extra space set up if requested.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
+
+/**
+  * @internal
+  *  Value used to help identify correctly initialized UText structs.
+  *  Note:  must be publicly visible so that UTEXT_INITIALIZER can access it.
+  */
+enum {
+    UTEXT_MAGIC = 0x345ad82c
+};
+
+/**
+ * initializer to be used with local (stack) instances of a UText
+ *  struct.  UText structs must be initialized before passing
+ *  them to one of the utext_open functions.
+ *
+ * @stable ICU 3.6
+ */
+#define UTEXT_INITIALIZER {                                        \
+                  UTEXT_MAGIC,          /* magic                */ \
+                  0,                    /* flags                */ \
+                  0,                    /* providerProps        */ \
+                  sizeof(UText),        /* sizeOfStruct         */ \
+                  0,                    /* chunkNativeLimit     */ \
+                  0,                    /* extraSize            */ \
+                  0,                    /* nativeIndexingLimit  */ \
+                  0,                    /* chunkNativeStart     */ \
+                  0,                    /* chunkOffset          */ \
+                  0,                    /* chunkLength          */ \
+                  NULL,                 /* chunkContents        */ \
+                  NULL,                 /* pFuncs               */ \
+                  NULL,                 /* pExtra               */ \
+                  NULL,                 /* context              */ \
+                  NULL, NULL, NULL,     /* p, q, r              */ \
+                  NULL,                 /* privP                */ \
+                  0, 0, 0,              /* a, b, c              */ \
+                  0, 0, 0               /* privA,B,C,           */ \
+                  }
+
+
+U_CDECL_END
+
+
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/utf.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/utf.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/utf.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,227 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 1999-2007, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  utf.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 1999sep09
-*   created by: Markus W. Scherer
-*/
-
-/**
- * \file
- * \brief C API: Code point macros
- *
- * This file defines macros for checking whether a code point is
- * a surrogate or a non-character etc.
- *
- * The UChar and UChar32 data types for Unicode code units and code points
- * are defined in umachines.h because they can be machine-dependent.
- *
- * utf.h is included by utypes.h and itself includes utf8.h and utf16.h after some
- * common definitions. Those files define macros for efficiently getting code points
- * in and out of UTF-8/16 strings.
- * utf16.h macros have "U16_" prefixes.
- * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
- *
- * ICU processes 16-bit Unicode strings.
- * Most of the time, such strings are well-formed UTF-16.
- * Single, unpaired surrogates must be handled as well, and are treated in ICU
- * like regular code points where possible.
- * (Pairs of surrogate code points are indistinguishable from supplementary
- * code points encoded as pairs of supplementary code units.)
- *
- * In fact, almost all Unicode code points in normal text (>99%)
- * are on the BMP (<=U+ffff) and even <=U+d7ff.
- * ICU functions handle supplementary code points (U+10000..U+10ffff)
- * but are optimized for the much more frequently occurring BMP code points.
- *
- * utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
- * UChar is defined to be exactly wchar_t, otherwise uint16_t.
- *
- * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
- * Unicode code point (Unicode scalar value, 0..0x10ffff).
- * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
- * the definition of UChar. For details see the documentation for UChar32 itself.
- *
- * utf.h also defines a small number of C macros for single Unicode code points.
- * These are simple checks for surrogates and non-characters.
- * For actual Unicode character properties see uchar.h.
- *
- * By default, string operations must be done with error checking in case
- * a string is not well-formed UTF-16.
- * The macros will detect if a surrogate code unit is unpaired
- * (lead unit without trail unit or vice versa) and just return the unit itself
- * as the code point.
- * (It is an accidental property of Unicode and UTF-16 that all
- * malformed sequences can be expressed unambiguously with a distinct subrange
- * of Unicode code points.)
- *
- * The regular "safe" macros require that the initial, passed-in string index
- * is within bounds. They only check the index when they read more than one
- * code unit. This is usually done with code similar to the following loop:
- * <pre>while(i<length) {
- *   U16_NEXT(s, i, length, c);
- *   // use c
- * }</pre>
- *
- * When it is safe to assume that text is well-formed UTF-16
- * (does not contain single, unpaired surrogates), then one can use
- * U16_..._UNSAFE macros.
- * These do not check for proper code unit sequences or truncated text and may
- * yield wrong results or even cause a crash if they are used with "malformed"
- * text.
- * In practice, U16_..._UNSAFE macros will produce slightly less code but
- * should not be faster because the processing is only different when a
- * surrogate code unit is detected, which will be rare.
- *
- * Similarly for UTF-8, there are "safe" macros without a suffix,
- * and U8_..._UNSAFE versions.
- * The performance differences are much larger here because UTF-8 provides so
- * many opportunities for malformed sequences.
- * The unsafe UTF-8 macros are entirely implemented inside the macro definitions
- * and are fast, while the safe UTF-8 macros call functions for all but the
- * trivial (ASCII) cases.
- * (ICU 3.6 optimizes U8_NEXT() and U8_APPEND() to handle most other common
- * characters inline as well.)
- *
- * Unlike with UTF-16, malformed sequences cannot be expressed with distinct
- * code point values (0..U+10ffff). They are indicated with negative values instead.
- *
- * For more information see the ICU User Guide Strings chapter
- * (http://icu-project.org/userguide/strings.html).
- *
- * <em>Usage:</em>
- * ICU coding guidelines for if() statements should be followed when using these macros.
- * Compound statements (curly braces {}) must be used  for if-else-while... 
- * bodies and all macro statements should be terminated with semicolon.
- *
- * @stable ICU 2.4
- */
-
-#ifndef __UTF_H__
-#define __UTF_H__
-
-#include "unicode/utypes.h"
-/* include the utfXX.h after the following definitions */
-
-/* single-code point definitions -------------------------------------------- */
-
-/**
- * This value is intended for sentinel values for APIs that
- * (take or) return single code points (UChar32).
- * It is outside of the Unicode code point range 0..0x10ffff.
- * 
- * For example, a "done" or "error" value in a new API
- * could be indicated with U_SENTINEL.
- *
- * ICU APIs designed before ICU 2.4 usually define service-specific "done"
- * values, mostly 0xffff.
- * Those may need to be distinguished from
- * actual U+ffff text contents by calling functions like
- * CharacterIterator::hasNext() or UnicodeString::length().
- *
- * @return -1
- * @see UChar32
- * @stable ICU 2.4
- */
-#define U_SENTINEL (-1)
-
-/**
- * Is this code point a Unicode noncharacter?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U_IS_UNICODE_NONCHAR(c) \
-    ((c)>=0xfdd0 && \
-     ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
-     (uint32_t)(c)<=0x10ffff)
-
-/**
- * Is c a Unicode code point value (0..U+10ffff)
- * that can be assigned a character?
- *
- * Code points that are not characters include:
- * - single surrogate code points (U+d800..U+dfff, 2048 code points)
- * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
- * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
- * - the highest Unicode code point value is U+10ffff
- *
- * This means that all code points below U+d800 are character code points,
- * and that boundary is tested first for performance.
- *
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U_IS_UNICODE_CHAR(c) \
-    ((uint32_t)(c)<0xd800 || \
-        ((uint32_t)(c)>0xdfff && \
-         (uint32_t)(c)<=0x10ffff && \
-         !U_IS_UNICODE_NONCHAR(c)))
-
-/**
- * Is this code point a BMP code point (U+0000..U+ffff)?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.8
- */
-#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff)
-
-/**
- * Is this code point a supplementary code point (U+10000..U+10ffff)?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.8
- */
-#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff)
- 
-/**
- * Is this code point a lead surrogate (U+d800..U+dbff)?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
-
-/**
- * Is this code point a trail surrogate (U+dc00..U+dfff)?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
-
-/**
- * Is this code point a surrogate (U+d800..U+dfff)?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
-
-/**
- * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
- * is it a lead surrogate?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
-
-/* include the utfXX.h ------------------------------------------------------ */
-
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-
-/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
-#include "unicode/utf_old.h"
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/utf.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/utf.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/utf.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/utf.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,227 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep09
+*   created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: Code point macros
+ *
+ * This file defines macros for checking whether a code point is
+ * a surrogate or a non-character etc.
+ *
+ * The UChar and UChar32 data types for Unicode code units and code points
+ * are defined in umachines.h because they can be machine-dependent.
+ *
+ * utf.h is included by utypes.h and itself includes utf8.h and utf16.h after some
+ * common definitions. Those files define macros for efficiently getting code points
+ * in and out of UTF-8/16 strings.
+ * utf16.h macros have "U16_" prefixes.
+ * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
+ *
+ * ICU processes 16-bit Unicode strings.
+ * Most of the time, such strings are well-formed UTF-16.
+ * Single, unpaired surrogates must be handled as well, and are treated in ICU
+ * like regular code points where possible.
+ * (Pairs of surrogate code points are indistinguishable from supplementary
+ * code points encoded as pairs of supplementary code units.)
+ *
+ * In fact, almost all Unicode code points in normal text (>99%)
+ * are on the BMP (<=U+ffff) and even <=U+d7ff.
+ * ICU functions handle supplementary code points (U+10000..U+10ffff)
+ * but are optimized for the much more frequently occurring BMP code points.
+ *
+ * utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
+ * UChar is defined to be exactly wchar_t, otherwise uint16_t.
+ *
+ * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.
+ *
+ * utf.h also defines a small number of C macros for single Unicode code points.
+ * These are simple checks for surrogates and non-characters.
+ * For actual Unicode character properties see uchar.h.
+ *
+ * By default, string operations must be done with error checking in case
+ * a string is not well-formed UTF-16.
+ * The macros will detect if a surrogate code unit is unpaired
+ * (lead unit without trail unit or vice versa) and just return the unit itself
+ * as the code point.
+ * (It is an accidental property of Unicode and UTF-16 that all
+ * malformed sequences can be expressed unambiguously with a distinct subrange
+ * of Unicode code points.)
+ *
+ * The regular "safe" macros require that the initial, passed-in string index
+ * is within bounds. They only check the index when they read more than one
+ * code unit. This is usually done with code similar to the following loop:
+ * <pre>while(i<length) {
+ *   U16_NEXT(s, i, length, c);
+ *   // use c
+ * }</pre>
+ *
+ * When it is safe to assume that text is well-formed UTF-16
+ * (does not contain single, unpaired surrogates), then one can use
+ * U16_..._UNSAFE macros.
+ * These do not check for proper code unit sequences or truncated text and may
+ * yield wrong results or even cause a crash if they are used with "malformed"
+ * text.
+ * In practice, U16_..._UNSAFE macros will produce slightly less code but
+ * should not be faster because the processing is only different when a
+ * surrogate code unit is detected, which will be rare.
+ *
+ * Similarly for UTF-8, there are "safe" macros without a suffix,
+ * and U8_..._UNSAFE versions.
+ * The performance differences are much larger here because UTF-8 provides so
+ * many opportunities for malformed sequences.
+ * The unsafe UTF-8 macros are entirely implemented inside the macro definitions
+ * and are fast, while the safe UTF-8 macros call functions for all but the
+ * trivial (ASCII) cases.
+ * (ICU 3.6 optimizes U8_NEXT() and U8_APPEND() to handle most other common
+ * characters inline as well.)
+ *
+ * Unlike with UTF-16, malformed sequences cannot be expressed with distinct
+ * code point values (0..U+10ffff). They are indicated with negative values instead.
+ *
+ * For more information see the ICU User Guide Strings chapter
+ * (http://icu-project.org/userguide/strings.html).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while... 
+ * bodies and all macro statements should be terminated with semicolon.
+ *
+ * @stable ICU 2.4
+ */
+
+#ifndef __UTF_H__
+#define __UTF_H__
+
+#include "unicode/utypes.h"
+/* include the utfXX.h after the following definitions */
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ * 
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with U_SENTINEL.
+ *
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ *
+ * @return -1
+ * @see UChar32
+ * @stable ICU 2.4
+ */
+#define U_SENTINEL (-1)
+
+/**
+ * Is this code point a Unicode noncharacter?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_UNICODE_NONCHAR(c) \
+    ((c)>=0xfdd0 && \
+     ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+     (uint32_t)(c)<=0x10ffff)
+
+/**
+ * Is c a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_UNICODE_CHAR(c) \
+    ((uint32_t)(c)<0xd800 || \
+        ((uint32_t)(c)>0xdfff && \
+         (uint32_t)(c)<=0x10ffff && \
+         !U_IS_UNICODE_NONCHAR(c)))
+
+/**
+ * Is this code point a BMP code point (U+0000..U+ffff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.8
+ */
+#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff)
+
+/**
+ * Is this code point a supplementary code point (U+10000..U+10ffff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.8
+ */
+#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff)
+ 
+/**
+ * Is this code point a lead surrogate (U+d800..U+dbff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code point a trail surrogate (U+dc00..U+dfff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code point a surrogate (U+d800..U+dfff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/* include the utfXX.h ------------------------------------------------------ */
+
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
+#include "unicode/utf_old.h"
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/utf16.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/utf16.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/utf16.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,605 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 1999-2007, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  utf16.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 1999sep09
-*   created by: Markus W. Scherer
-*/
-
-/**
- * \file
- * \brief C API: 16-bit Unicode handling macros
- * 
- * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
- * utf16.h is included by utf.h after unicode/umachine.h
- * and some common definitions.
- *
- * For more information see utf.h and the ICU User Guide Strings chapter
- * (http://icu-project.org/userguide/strings.html).
- *
- * <em>Usage:</em>
- * ICU coding guidelines for if() statements should be followed when using these macros.
- * Compound statements (curly braces {}) must be used  for if-else-while... 
- * bodies and all macro statements should be terminated with semicolon.
- */
-
-#ifndef __UTF16_H__
-#define __UTF16_H__
-
-/* utf.h must be included first. */
-#ifndef __UTF_H__
-#   include "unicode/utf.h"
-#endif
-
-/* single-code point definitions -------------------------------------------- */
-
-/**
- * Does this code unit alone encode a code point (BMP, not a surrogate)?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
-
-/**
- * Is this code unit a lead surrogate (U+d800..U+dbff)?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
-
-/**
- * Is this code unit a trail surrogate (U+dc00..U+dfff)?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
-
-/**
- * Is this code unit a surrogate (U+d800..U+dfff)?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
-
-/**
- * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
- * is it a lead surrogate?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
-
-/**
- * Helper constant for U16_GET_SUPPLEMENTARY.
- * @internal
- */
-#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
-
-/**
- * Get a supplementary code point value (U+10000..U+10ffff)
- * from its lead and trail surrogates.
- * The result is undefined if the input values are not
- * lead and trail surrogates.
- *
- * @param lead lead surrogate (U+d800..U+dbff)
- * @param trail trail surrogate (U+dc00..U+dfff)
- * @return supplementary code point (U+10000..U+10ffff)
- * @stable ICU 2.4
- */
-#define U16_GET_SUPPLEMENTARY(lead, trail) \
-    (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
-
-
-/**
- * Get the lead surrogate (0xd800..0xdbff) for a
- * supplementary code point (0x10000..0x10ffff).
- * @param supplementary 32-bit code point (U+10000..U+10ffff)
- * @return lead surrogate (U+d800..U+dbff) for supplementary
- * @stable ICU 2.4
- */
-#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
-
-/**
- * Get the trail surrogate (0xdc00..0xdfff) for a
- * supplementary code point (0x10000..0x10ffff).
- * @param supplementary 32-bit code point (U+10000..U+10ffff)
- * @return trail surrogate (U+dc00..U+dfff) for supplementary
- * @stable ICU 2.4
- */
-#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
-
-/**
- * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
- * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
- * @param c 32-bit code point
- * @return 1 or 2
- * @stable ICU 2.4
- */
-#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
-
-/**
- * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
- * @return 2
- * @stable ICU 2.4
- */
-#define U16_MAX_LENGTH 2
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * The offset may point to either the lead or trail surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the adjacent matching surrogate as well.
- * The result is undefined if the offset points to a single, unpaired surrogate.
- * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U16_GET
- * @stable ICU 2.4
- */
-#define U16_GET_UNSAFE(s, i, c) { \
-    (c)=(s)[i]; \
-    if(U16_IS_SURROGATE(c)) { \
-        if(U16_IS_SURROGATE_LEAD(c)) { \
-            (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
-        } else { \
-            (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
-        } \
-    } \
-}
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The offset may point to either the lead or trail surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the adjacent matching surrogate as well.
- * If the offset points to a single, unpaired surrogate, then that itself
- * will be returned as the code point.
- * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<=i<length
- * @param length string length
- * @param c output UChar32 variable
- * @see U16_GET_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_GET(s, start, i, length, c) { \
-    (c)=(s)[i]; \
-    if(U16_IS_SURROGATE(c)) { \
-        uint16_t __c2; \
-        if(U16_IS_SURROGATE_LEAD(c)) { \
-            if((i)+1<(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
-                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
-            } \
-        } else { \
-            if((i)-1>=(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
-                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
-            } \
-        } \
-    } \
-}
-
-/* definitions with forward iteration --------------------------------------- */
-
-/**
- * Get a code point from a string at a code point boundary offset,
- * and advance the offset to the next code point boundary.
- * (Post-incrementing forward iteration.)
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * The offset may point to the lead surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the following trail surrogate as well.
- * If the offset points to a trail surrogate, then that itself
- * will be returned as the code point.
- * The result is undefined if the offset points to a single, unpaired lead surrogate.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U16_NEXT
- * @stable ICU 2.4
- */
-#define U16_NEXT_UNSAFE(s, i, c) { \
-    (c)=(s)[(i)++]; \
-    if(U16_IS_LEAD(c)) { \
-        (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
-    } \
-}
-
-/**
- * Get a code point from a string at a code point boundary offset,
- * and advance the offset to the next code point boundary.
- * (Post-incrementing forward iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The offset may point to the lead surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the following trail surrogate as well.
- * If the offset points to a trail surrogate or
- * to a single, unpaired lead surrogate, then that itself
- * will be returned as the code point.
- *
- * @param s const UChar * string
- * @param i string offset, must be i<length
- * @param length string length
- * @param c output UChar32 variable
- * @see U16_NEXT_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_NEXT(s, i, length, c) { \
-    (c)=(s)[(i)++]; \
-    if(U16_IS_LEAD(c)) { \
-        uint16_t __c2; \
-        if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
-            ++(i); \
-            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
-        } \
-    } \
-}
-
-/**
- * Append a code point to a string, overwriting 1 or 2 code units.
- * The offset points to the current end of the string contents
- * and is advanced (post-increment).
- * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
- * Otherwise, the result is undefined.
- *
- * @param s const UChar * string buffer
- * @param i string offset
- * @param c code point to append
- * @see U16_APPEND
- * @stable ICU 2.4
- */
-#define U16_APPEND_UNSAFE(s, i, c) { \
-    if((uint32_t)(c)<=0xffff) { \
-        (s)[(i)++]=(uint16_t)(c); \
-    } else { \
-        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
-        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
-    } \
-}
-
-/**
- * Append a code point to a string, overwriting 1 or 2 code units.
- * The offset points to the current end of the string contents
- * and is advanced (post-increment).
- * "Safe" macro, checks for a valid code point.
- * If a surrogate pair is written, checks for sufficient space in the string.
- * If the code point is not valid or a trail surrogate does not fit,
- * then isError is set to TRUE.
- *
- * @param s const UChar * string buffer
- * @param i string offset, must be i<capacity
- * @param capacity size of the string buffer
- * @param c code point to append
- * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
- * @see U16_APPEND_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_APPEND(s, i, capacity, c, isError) { \
-    if((uint32_t)(c)<=0xffff) { \
-        (s)[(i)++]=(uint16_t)(c); \
-    } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
-        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
-        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
-    } else /* c>0x10ffff or not enough space */ { \
-        (isError)=TRUE; \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the next.
- * (Post-incrementing iteration.)
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @see U16_FWD_1
- * @stable ICU 2.4
- */
-#define U16_FWD_1_UNSAFE(s, i) { \
-    if(U16_IS_LEAD((s)[(i)++])) { \
-        ++(i); \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the next.
- * (Post-incrementing iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * @param s const UChar * string
- * @param i string offset, must be i<length
- * @param length string length
- * @see U16_FWD_1_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_FWD_1(s, i, length) { \
-    if(U16_IS_LEAD((s)[(i)++]) && (i)<(length) && U16_IS_TRAIL((s)[i])) { \
-        ++(i); \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the n-th next one,
- * i.e., move forward by n code points.
- * (Post-incrementing iteration.)
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param n number of code points to skip
- * @see U16_FWD_N
- * @stable ICU 2.4
- */
-#define U16_FWD_N_UNSAFE(s, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0) { \
-        U16_FWD_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the n-th next one,
- * i.e., move forward by n code points.
- * (Post-incrementing iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * @param s const UChar * string
- * @param i string offset, must be i<length
- * @param length string length
- * @param n number of code points to skip
- * @see U16_FWD_N_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_FWD_N(s, i, length, n) { \
-    int32_t __N=(n); \
-    while(__N>0 && (i)<(length)) { \
-        U16_FWD_1(s, i, length); \
-        --__N; \
-    } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary
- * at the start of a code point.
- * If the offset points to the trail surrogate of a surrogate pair,
- * then the offset is decremented.
- * Otherwise, it is not modified.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @see U16_SET_CP_START
- * @stable ICU 2.4
- */
-#define U16_SET_CP_START_UNSAFE(s, i) { \
-    if(U16_IS_TRAIL((s)[i])) { \
-        --(i); \
-    } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary
- * at the start of a code point.
- * If the offset points to the trail surrogate of a surrogate pair,
- * then the offset is decremented.
- * Otherwise, it is not modified.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<=i
- * @see U16_SET_CP_START_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_SET_CP_START(s, start, i) { \
-    if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
-        --(i); \
-    } \
-}
-
-/* definitions with backward iteration -------------------------------------- */
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a trail surrogate unit
- * for a supplementary code point, then the macro will read
- * the preceding lead surrogate as well.
- * If the offset is behind a lead surrogate, then that itself
- * will be returned as the code point.
- * The result is undefined if the offset is behind a single, unpaired trail surrogate.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U16_PREV
- * @stable ICU 2.4
- */
-#define U16_PREV_UNSAFE(s, i, c) { \
-    (c)=(s)[--(i)]; \
-    if(U16_IS_TRAIL(c)) { \
-        (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a trail surrogate unit
- * for a supplementary code point, then the macro will read
- * the preceding lead surrogate as well.
- * If the offset is behind a lead surrogate or behind a single, unpaired
- * trail surrogate, then that itself
- * will be returned as the code point.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<i
- * @param c output UChar32 variable
- * @see U16_PREV_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_PREV(s, start, i, c) { \
-    (c)=(s)[--(i)]; \
-    if(U16_IS_TRAIL(c)) { \
-        uint16_t __c2; \
-        if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
-            --(i); \
-            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
-        } \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the previous one.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @see U16_BACK_1
- * @stable ICU 2.4
- */
-#define U16_BACK_1_UNSAFE(s, i) { \
-    if(U16_IS_TRAIL((s)[--(i)])) { \
-        --(i); \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the previous one.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<i
- * @see U16_BACK_1_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_BACK_1(s, start, i) { \
-    if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
-        --(i); \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the n-th one before it,
- * i.e., move backward by n code points.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param n number of code points to skip
- * @see U16_BACK_N
- * @stable ICU 2.4
- */
-#define U16_BACK_N_UNSAFE(s, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0) { \
-        U16_BACK_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the n-th one before it,
- * i.e., move backward by n code points.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * @param s const UChar * string
- * @param start start of string
- * @param i string offset, must be start<i
- * @param n number of code points to skip
- * @see U16_BACK_N_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_BACK_N(s, start, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0 && (i)>(start)) { \
-        U16_BACK_1(s, start, i); \
-        --__N; \
-    } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary after a code point.
- * If the offset is behind the lead surrogate of a surrogate pair,
- * then the offset is incremented.
- * Otherwise, it is not modified.
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @see U16_SET_CP_LIMIT
- * @stable ICU 2.4
- */
-#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \
-    if(U16_IS_LEAD((s)[(i)-1])) { \
-        ++(i); \
-    } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary after a code point.
- * If the offset is behind the lead surrogate of a surrogate pair,
- * then the offset is incremented.
- * Otherwise, it is not modified.
- * The input offset may be the same as the string length.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, start<=i<=length
- * @param length string length
- * @see U16_SET_CP_LIMIT_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_SET_CP_LIMIT(s, start, i, length) { \
-    if((start)<(i) && (i)<(length) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
-        ++(i); \
-    } \
-}
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/utf16.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/utf16.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/utf16.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/utf16.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,605 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf16.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep09
+*   created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 16-bit Unicode handling macros
+ * 
+ * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
+ * utf16.h is included by utf.h after unicode/umachine.h
+ * and some common definitions.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://icu-project.org/userguide/strings.html).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while... 
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF16_H__
+#define __UTF16_H__
+
+/* utf.h must be included first. */
+#ifndef __UTF_H__
+#   include "unicode/utf.h"
+#endif
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
+
+/**
+ * Is this code unit a lead surrogate (U+d800..U+dbff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code unit a trail surrogate (U+dc00..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code unit a surrogate (U+d800..U+dfff)?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 16-bit code unit
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Helper constant for U16_GET_SUPPLEMENTARY.
+ * @internal
+ */
+#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/**
+ * Get a supplementary code point value (U+10000..U+10ffff)
+ * from its lead and trail surrogates.
+ * The result is undefined if the input values are not
+ * lead and trail surrogates.
+ *
+ * @param lead lead surrogate (U+d800..U+dbff)
+ * @param trail trail surrogate (U+dc00..U+dfff)
+ * @return supplementary code point (U+10000..U+10ffff)
+ * @stable ICU 2.4
+ */
+#define U16_GET_SUPPLEMENTARY(lead, trail) \
+    (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
+
+
+/**
+ * Get the lead surrogate (0xd800..0xdbff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return lead surrogate (U+d800..U+dbff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/**
+ * Get the trail surrogate (0xdc00..0xdfff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return trail surrogate (U+dc00..U+dfff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/**
+ * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
+ * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
+ * @param c 32-bit code point
+ * @return 1 or 2
+ * @stable ICU 2.4
+ */
+#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/**
+ * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+ * @return 2
+ * @stable ICU 2.4
+ */
+#define U16_MAX_LENGTH 2
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ * The result is undefined if the offset points to a single, unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_GET
+ * @stable ICU 2.4
+ */
+#define U16_GET_UNSAFE(s, i, c) { \
+    (c)=(s)[i]; \
+    if(U16_IS_SURROGATE(c)) { \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
+        } else { \
+            (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
+        } \
+    } \
+}
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ * If the offset points to a single, unpaired surrogate, then that itself
+ * will be returned as the code point.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_GET(s, start, i, length, c) { \
+    (c)=(s)[i]; \
+    if(U16_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(U16_IS_SURROGATE_LEAD(c)) { \
+            if((i)+1<(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+                (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+            } \
+        } else { \
+            if((i)-1>=(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+            } \
+        } \
+    } \
+}
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset points to a single, unpaired lead surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_NEXT
+ * @stable ICU 2.4
+ */
+#define U16_NEXT_UNSAFE(s, i, c) { \
+    (c)=(s)[(i)++]; \
+    if(U16_IS_LEAD(c)) { \
+        (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
+    } \
+}
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then that itself
+ * will be returned as the code point.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_NEXT(s, i, length, c) { \
+    (c)=(s)[(i)++]; \
+    if(U16_IS_LEAD(c)) { \
+        uint16_t __c2; \
+        if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+            ++(i); \
+            (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+        } \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U16_APPEND
+ * @stable ICU 2.4
+ */
+#define U16_APPEND_UNSAFE(s, i, c) { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else { \
+        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a surrogate pair is written, checks for sufficient space in the string.
+ * If the code point is not valid or a trail surrogate does not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset, must be i<capacity
+ * @param capacity size of the string buffer
+ * @param c code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U16_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_APPEND(s, i, capacity, c, isError) { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
+        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+    } else /* c>0x10ffff or not enough space */ { \
+        (isError)=TRUE; \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_FWD_1
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1_UNSAFE(s, i) { \
+    if(U16_IS_LEAD((s)[(i)++])) { \
+        ++(i); \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @see U16_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1(s, i, length) { \
+    if(U16_IS_LEAD((s)[(i)++]) && (i)<(length) && U16_IS_TRAIL((s)[i])) { \
+        ++(i); \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_FWD_N
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U16_FWD_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param n number of code points to skip
+ * @see U16_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N(s, i, length, n) { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)<(length)) { \
+        U16_FWD_1(s, i, length); \
+        --__N; \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START_UNSAFE(s, i) { \
+    if(U16_IS_TRAIL((s)[i])) { \
+        --(i); \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i
+ * @see U16_SET_CP_START_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START(s, start, i) { \
+    if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+        --(i); \
+    } \
+}
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind a single, unpaired trail surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_PREV
+ * @stable ICU 2.4
+ */
+#define U16_PREV_UNSAFE(s, i, c) { \
+    (c)=(s)[--(i)]; \
+    if(U16_IS_TRAIL(c)) { \
+        (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then that itself
+ * will be returned as the code point.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_PREV(s, start, i, c) { \
+    (c)=(s)[--(i)]; \
+    if(U16_IS_TRAIL(c)) { \
+        uint16_t __c2; \
+        if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+            --(i); \
+            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+        } \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_BACK_1
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1_UNSAFE(s, i) { \
+    if(U16_IS_TRAIL((s)[--(i)])) { \
+        --(i); \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @see U16_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1(s, start, i) { \
+    if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+        --(i); \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_BACK_N
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U16_BACK_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start start of string
+ * @param i string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U16_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N(s, start, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)>(start)) { \
+        U16_BACK_1(s, start, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \
+    if(U16_IS_LEAD((s)[(i)-1])) { \
+        ++(i); \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, start<=i<=length
+ * @param length string length
+ * @see U16_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT(s, start, i, length) { \
+    if((start)<(i) && (i)<(length) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
+        ++(i); \
+    } \
+}
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/utf32.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/utf32.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/utf32.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,23 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 1999-2001, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  utf32.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 1999sep20
-*   created by: Markus W. Scherer
-*/
-/**
- * \file
- * \brief C API: UTF-32 macros
- *
- * This file is obsolete and its contents moved to utf_old.h.
- * See utf_old.h and Jitterbug 2150 and its discussion on the ICU mailing list
- * in September 2002.
- */

Copied: MacRuby/trunk/icu-1060/unicode/utf32.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/utf32.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/utf32.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/utf32.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,23 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf32.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep20
+*   created by: Markus W. Scherer
+*/
+/**
+ * \file
+ * \brief C API: UTF-32 macros
+ *
+ * This file is obsolete and its contents moved to utf_old.h.
+ * See utf_old.h and Jitterbug 2150 and its discussion on the ICU mailing list
+ * in September 2002.
+ */

Deleted: MacRuby/trunk/icu-1060/unicode/utf8.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/utf8.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/utf8.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,652 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 1999-2007, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  utf8.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 1999sep13
-*   created by: Markus W. Scherer
-*/
-
-/**
- * \file
- * \brief C API: 8-bit Unicode handling macros
- * 
- * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
- * utf8.h is included by utf.h after unicode/umachine.h
- * and some common definitions.
- *
- * For more information see utf.h and the ICU User Guide Strings chapter
- * (http://icu-project.org/userguide/strings.html).
- *
- * <em>Usage:</em>
- * ICU coding guidelines for if() statements should be followed when using these macros.
- * Compound statements (curly braces {}) must be used  for if-else-while... 
- * bodies and all macro statements should be terminated with semicolon.
- */
-
-#ifndef __UTF8_H__
-#define __UTF8_H__
-
-/* utf.h must be included first. */
-#ifndef __UTF_H__
-#   include "unicode/utf.h"
-#endif
-
-/* internal definitions ----------------------------------------------------- */
-
-/**
- * \var utf8_countTrailBytes
- * Internal array with numbers of trail bytes for any given byte used in
- * lead byte position.
- * @internal
- */
-#ifdef U_UTF8_IMPL
-U_EXPORT const uint8_t 
-#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION)
-U_CFUNC const uint8_t
-#else
-U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? */ /*U_IMPORT*/ 
-#endif
-utf8_countTrailBytes[256];
-
-/**
- * Count the trail bytes for a UTF-8 lead byte.
- * @internal
- */
-#define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
-
-/**
- * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
- * @internal
- */
-#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
-
-/**
- * Function for handling "next code point" with error-checking.
- * @internal
- */
-U_INTERNAL UChar32 U_EXPORT2
-utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
-
-/**
- * Function for handling "append code point" with error-checking.
- * @internal
- */
-U_INTERNAL int32_t U_EXPORT2
-utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
-
-/**
- * Function for handling "previous code point" with error-checking.
- * @internal
- */
-U_INTERNAL UChar32 U_EXPORT2
-utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
-
-/**
- * Function for handling "skip backward one code point" with error-checking.
- * @internal
- */
-U_INTERNAL int32_t U_EXPORT2
-utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
-
-/* single-code point definitions -------------------------------------------- */
-
-/**
- * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
- * @param c 8-bit code unit (byte)
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U8_IS_SINGLE(c) (((c)&0x80)==0)
-
-/**
- * Is this code unit (byte) a UTF-8 lead byte?
- * @param c 8-bit code unit (byte)
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)
-
-/**
- * Is this code unit (byte) a UTF-8 trail byte?
- * @param c 8-bit code unit (byte)
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)
-
-/**
- * How many code units (bytes) are used for the UTF-8 encoding
- * of this Unicode code point?
- * @param c 32-bit code point
- * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
- * @stable ICU 2.4
- */
-#define U8_LENGTH(c) \
-    ((uint32_t)(c)<=0x7f ? 1 : \
-        ((uint32_t)(c)<=0x7ff ? 2 : \
-            ((uint32_t)(c)<=0xd7ff ? 3 : \
-                ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
-                    ((uint32_t)(c)<=0xffff ? 3 : 4)\
-                ) \
-            ) \
-        ) \
-    )
-
-/**
- * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
- * @return 4
- * @stable ICU 2.4
- */
-#define U8_MAX_LENGTH 4
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * The offset may point to either the lead byte or one of the trail bytes
- * for a code point, in which case the macro will read all of the bytes
- * for the code point.
- * The result is undefined if the offset points to an illegal UTF-8
- * byte sequence.
- * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U8_GET
- * @stable ICU 2.4
- */
-#define U8_GET_UNSAFE(s, i, c) { \
-    int32_t _u8_get_unsafe_index=(int32_t)(i); \
-    U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
-    U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
-}
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * The offset may point to either the lead byte or one of the trail bytes
- * for a code point, in which case the macro will read all of the bytes
- * for the code point.
- * If the offset points to an illegal UTF-8 byte sequence, then
- * c is set to a negative value.
- * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
- *
- * @param s const uint8_t * string
- * @param start starting string offset
- * @param i string offset, must be start<=i<length
- * @param length string length
- * @param c output UChar32 variable, set to <0 in case of an error
- * @see U8_GET_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_GET(s, start, i, length, c) { \
-    int32_t _u8_get_index=(int32_t)(i); \
-    U8_SET_CP_START(s, start, _u8_get_index); \
-    U8_NEXT(s, _u8_get_index, length, c); \
-}
-
-/* definitions with forward iteration --------------------------------------- */
-
-/**
- * Get a code point from a string at a code point boundary offset,
- * and advance the offset to the next code point boundary.
- * (Post-incrementing forward iteration.)
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * The offset may point to the lead byte of a multi-byte sequence,
- * in which case the macro will read the whole sequence.
- * The result is undefined if the offset points to a trail byte
- * or an illegal UTF-8 sequence.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U8_NEXT
- * @stable ICU 2.4
- */
-#define U8_NEXT_UNSAFE(s, i, c) { \
-    (c)=(uint8_t)(s)[(i)++]; \
-    if((uint8_t)((c)-0xc0)<0x35) { \
-        uint8_t __count=U8_COUNT_TRAIL_BYTES(c); \
-        U8_MASK_LEAD_BYTE(c, __count); \
-        switch(__count) { \
-        /* each following branch falls through to the next one */ \
-        case 3: \
-            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
-        case 2: \
-            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
-        case 1: \
-            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
-        /* no other branches to optimize switch() */ \
-            break; \
-        } \
-    } \
-}
-
-/**
- * Get a code point from a string at a code point boundary offset,
- * and advance the offset to the next code point boundary.
- * (Post-incrementing forward iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The offset may point to the lead byte of a multi-byte sequence,
- * in which case the macro will read the whole sequence.
- * If the offset points to a trail byte or an illegal UTF-8 sequence, then
- * c is set to a negative value.
- *
- * @param s const uint8_t * string
- * @param i string offset, must be i<length
- * @param length string length
- * @param c output UChar32 variable, set to <0 in case of an error
- * @see U8_NEXT_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_NEXT(s, i, length, c) { \
-    (c)=(uint8_t)(s)[(i)++]; \
-    if((c)>=0x80) { \
-        uint8_t __t1, __t2; \
-        if( /* handle U+1000..U+CFFF inline */ \
-            (0xe0<(c) && (c)<=0xec) && \
-            (((i)+1)<(length)) && \
-            (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
-            (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
-        ) { \
-            /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
-            (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \
-            (i)+=2; \
-        } else if( /* handle U+0080..U+07FF inline */ \
-            ((c)<0xe0 && (c)>=0xc2) && \
-            ((i)<(length)) && \
-            (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
-        ) { \
-            (c)=(UChar)((((c)&0x1f)<<6)|__t1); \
-            ++(i); \
-        } else if(U8_IS_LEAD(c)) { \
-            /* function call for "complicated" and error cases */ \
-            (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -1); \
-        } else { \
-            (c)=U_SENTINEL; \
-        } \
-    } \
-}
-
-/**
- * Append a code point to a string, overwriting 1 to 4 bytes.
- * The offset points to the current end of the string contents
- * and is advanced (post-increment).
- * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
- * Otherwise, the result is undefined.
- *
- * @param s const uint8_t * string buffer
- * @param i string offset
- * @param c code point to append
- * @see U8_APPEND
- * @stable ICU 2.4
- */
-#define U8_APPEND_UNSAFE(s, i, c) { \
-    if((uint32_t)(c)<=0x7f) { \
-        (s)[(i)++]=(uint8_t)(c); \
-    } else { \
-        if((uint32_t)(c)<=0x7ff) { \
-            (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
-        } else { \
-            if((uint32_t)(c)<=0xffff) { \
-                (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
-            } else { \
-                (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
-                (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
-            } \
-            (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
-        } \
-        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
-    } \
-}
-
-/**
- * Append a code point to a string, overwriting 1 to 4 bytes.
- * The offset points to the current end of the string contents
- * and is advanced (post-increment).
- * "Safe" macro, checks for a valid code point.
- * If a non-ASCII code point is written, checks for sufficient space in the string.
- * If the code point is not valid or trail bytes do not fit,
- * then isError is set to TRUE.
- *
- * @param s const uint8_t * string buffer
- * @param i string offset, must be i<capacity
- * @param capacity size of the string buffer
- * @param c code point to append
- * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
- * @see U8_APPEND_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_APPEND(s, i, capacity, c, isError) { \
-    if((uint32_t)(c)<=0x7f) { \
-        (s)[(i)++]=(uint8_t)(c); \
-    } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \
-        (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
-        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
-    } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
-        (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
-        (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
-        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
-    } else { \
-        (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(capacity), c, &(isError)); \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the next.
- * (Post-incrementing iteration.)
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @see U8_FWD_1
- * @stable ICU 2.4
- */
-#define U8_FWD_1_UNSAFE(s, i) { \
-    (i)+=1+U8_COUNT_TRAIL_BYTES((s)[i]); \
-}
-
-/**
- * Advance the string offset from one code point boundary to the next.
- * (Post-incrementing iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * @param s const uint8_t * string
- * @param i string offset, must be i<length
- * @param length string length
- * @see U8_FWD_1_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_FWD_1(s, i, length) { \
-    uint8_t __b=(uint8_t)(s)[(i)++]; \
-    if(U8_IS_LEAD(__b)) { \
-        uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \
-        if((i)+__count>(length)) { \
-            __count=(uint8_t)((length)-(i)); \
-        } \
-        while(__count>0 && U8_IS_TRAIL((s)[i])) { \
-            ++(i); \
-            --__count; \
-        } \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the n-th next one,
- * i.e., move forward by n code points.
- * (Post-incrementing iteration.)
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param n number of code points to skip
- * @see U8_FWD_N
- * @stable ICU 2.4
- */
-#define U8_FWD_N_UNSAFE(s, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0) { \
-        U8_FWD_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the n-th next one,
- * i.e., move forward by n code points.
- * (Post-incrementing iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * @param s const uint8_t * string
- * @param i string offset, must be i<length
- * @param length string length
- * @param n number of code points to skip
- * @see U8_FWD_N_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_FWD_N(s, i, length, n) { \
-    int32_t __N=(n); \
-    while(__N>0 && (i)<(length)) { \
-        U8_FWD_1(s, i, length); \
-        --__N; \
-    } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary
- * at the start of a code point.
- * If the offset points to a UTF-8 trail byte,
- * then the offset is moved backward to the corresponding lead byte.
- * Otherwise, it is not modified.
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @see U8_SET_CP_START
- * @stable ICU 2.4
- */
-#define U8_SET_CP_START_UNSAFE(s, i) { \
-    while(U8_IS_TRAIL((s)[i])) { --(i); } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary
- * at the start of a code point.
- * If the offset points to a UTF-8 trail byte,
- * then the offset is moved backward to the corresponding lead byte.
- * Otherwise, it is not modified.
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * @param s const uint8_t * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<=i
- * @see U8_SET_CP_START_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_SET_CP_START(s, start, i) { \
-    if(U8_IS_TRAIL((s)[(i)])) { \
-        (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
-    } \
-}
-
-/* definitions with backward iteration -------------------------------------- */
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a multi-byte sequence, then the macro will read
- * the whole sequence.
- * If the offset is behind a lead byte, then that itself
- * will be returned as the code point.
- * The result is undefined if the offset is behind an illegal UTF-8 sequence.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U8_PREV
- * @stable ICU 2.4
- */
-#define U8_PREV_UNSAFE(s, i, c) { \
-    (c)=(uint8_t)(s)[--(i)]; \
-    if(U8_IS_TRAIL(c)) { \
-        uint8_t __b, __count=1, __shift=6; \
-\
-        /* c is a trail byte */ \
-        (c)&=0x3f; \
-        for(;;) { \
-            __b=(uint8_t)(s)[--(i)]; \
-            if(__b>=0xc0) { \
-                U8_MASK_LEAD_BYTE(__b, __count); \
-                (c)|=(UChar32)__b<<__shift; \
-                break; \
-            } else { \
-                (c)|=(UChar32)(__b&0x3f)<<__shift; \
-                ++__count; \
-                __shift+=6; \
-            } \
-        } \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a multi-byte sequence, then the macro will read
- * the whole sequence.
- * If the offset is behind a lead byte, then that itself
- * will be returned as the code point.
- * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
- *
- * @param s const uint8_t * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<i
- * @param c output UChar32 variable, set to <0 in case of an error
- * @see U8_PREV_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_PREV(s, start, i, c) { \
-    (c)=(uint8_t)(s)[--(i)]; \
-    if((c)>=0x80) { \
-        if((c)<=0xbf) { \
-            (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
-        } else { \
-            (c)=U_SENTINEL; \
-        } \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the previous one.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @see U8_BACK_1
- * @stable ICU 2.4
- */
-#define U8_BACK_1_UNSAFE(s, i) { \
-    while(U8_IS_TRAIL((s)[--(i)])) {} \
-}
-
-/**
- * Move the string offset from one code point boundary to the previous one.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * @param s const uint8_t * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<i
- * @see U8_BACK_1_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_BACK_1(s, start, i) { \
-    if(U8_IS_TRAIL((s)[--(i)])) { \
-        (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the n-th one before it,
- * i.e., move backward by n code points.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param n number of code points to skip
- * @see U8_BACK_N
- * @stable ICU 2.4
- */
-#define U8_BACK_N_UNSAFE(s, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0) { \
-        U8_BACK_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the n-th one before it,
- * i.e., move backward by n code points.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * @param s const uint8_t * string
- * @param start index of the start of the string
- * @param i string offset, must be start<i
- * @param n number of code points to skip
- * @see U8_BACK_N_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_BACK_N(s, start, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0 && (i)>(start)) { \
-        U8_BACK_1(s, start, i); \
-        --__N; \
-    } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary after a code point.
- * If the offset is behind a partial multi-byte sequence,
- * then the offset is incremented to behind the whole sequence.
- * Otherwise, it is not modified.
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @see U8_SET_CP_LIMIT
- * @stable ICU 2.4
- */
-#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \
-    U8_BACK_1_UNSAFE(s, i); \
-    U8_FWD_1_UNSAFE(s, i); \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary after a code point.
- * If the offset is behind a partial multi-byte sequence,
- * then the offset is incremented to behind the whole sequence.
- * Otherwise, it is not modified.
- * The input offset may be the same as the string length.
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * @param s const uint8_t * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<=i<=length
- * @param length string length
- * @see U8_SET_CP_LIMIT_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_SET_CP_LIMIT(s, start, i, length) { \
-    if((start)<(i) && (i)<(length)) { \
-        U8_BACK_1(s, start, i); \
-        U8_FWD_1(s, i, length); \
-    } \
-}
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/utf8.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/utf8.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/utf8.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/utf8.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,652 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2007, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf8.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999sep13
+*   created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 8-bit Unicode handling macros
+ * 
+ * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
+ * utf8.h is included by utf.h after unicode/umachine.h
+ * and some common definitions.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (http://icu-project.org/userguide/strings.html).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used  for if-else-while... 
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF8_H__
+#define __UTF8_H__
+
+/* utf.h must be included first. */
+#ifndef __UTF_H__
+#   include "unicode/utf.h"
+#endif
+
+/* internal definitions ----------------------------------------------------- */
+
+/**
+ * \var utf8_countTrailBytes
+ * Internal array with numbers of trail bytes for any given byte used in
+ * lead byte position.
+ * @internal
+ */
+#ifdef U_UTF8_IMPL
+U_EXPORT const uint8_t 
+#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION)
+U_CFUNC const uint8_t
+#else
+U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? */ /*U_IMPORT*/ 
+#endif
+utf8_countTrailBytes[256];
+
+/**
+ * Count the trail bytes for a UTF-8 lead byte.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ * @internal
+ */
+#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/**
+ * Function for handling "next code point" with error-checking.
+ * @internal
+ */
+U_INTERNAL UChar32 U_EXPORT2
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "append code point" with error-checking.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2
+utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
+
+/**
+ * Function for handling "previous code point" with error-checking.
+ * @internal
+ */
+U_INTERNAL UChar32 U_EXPORT2
+utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "skip backward one code point" with error-checking.
+ * @internal
+ */
+U_INTERNAL int32_t U_EXPORT2
+utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_SINGLE(c) (((c)&0x80)==0)
+
+/**
+ * Is this code unit (byte) a UTF-8 lead byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)
+
+/**
+ * Is this code unit (byte) a UTF-8 trail byte?
+ * @param c 8-bit code unit (byte)
+ * @return TRUE or FALSE
+ * @stable ICU 2.4
+ */
+#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)
+
+/**
+ * How many code units (bytes) are used for the UTF-8 encoding
+ * of this Unicode code point?
+ * @param c 32-bit code point
+ * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
+ * @stable ICU 2.4
+ */
+#define U8_LENGTH(c) \
+    ((uint32_t)(c)<=0x7f ? 1 : \
+        ((uint32_t)(c)<=0x7ff ? 2 : \
+            ((uint32_t)(c)<=0xd7ff ? 3 : \
+                ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
+                    ((uint32_t)(c)<=0xffff ? 3 : 4)\
+                ) \
+            ) \
+        ) \
+    )
+
+/**
+ * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
+ * @return 4
+ * @stable ICU 2.4
+ */
+#define U8_MAX_LENGTH 4
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ * The result is undefined if the offset points to an illegal UTF-8
+ * byte sequence.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_GET
+ * @stable ICU 2.4
+ */
+#define U8_GET_UNSAFE(s, i, c) { \
+    int32_t _u8_get_unsafe_index=(int32_t)(i); \
+    U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
+    U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
+}
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to a negative value.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param start starting string offset
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_GET(s, start, i, length, c) { \
+    int32_t _u8_get_index=(int32_t)(i); \
+    U8_SET_CP_START(s, start, _u8_get_index); \
+    U8_NEXT(s, _u8_get_index, length, c); \
+}
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * The result is undefined if the offset points to a trail byte
+ * or an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_NEXT
+ * @stable ICU 2.4
+ */
+#define U8_NEXT_UNSAFE(s, i, c) { \
+    (c)=(uint8_t)(s)[(i)++]; \
+    if((uint8_t)((c)-0xc0)<0x35) { \
+        uint8_t __count=U8_COUNT_TRAIL_BYTES(c); \
+        U8_MASK_LEAD_BYTE(c, __count); \
+        switch(__count) { \
+        /* each following branch falls through to the next one */ \
+        case 3: \
+            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+        case 2: \
+            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+        case 1: \
+            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+        /* no other branches to optimize switch() */ \
+            break; \
+        } \
+    } \
+}
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_NEXT(s, i, length, c) { \
+    (c)=(uint8_t)(s)[(i)++]; \
+    if((c)>=0x80) { \
+        uint8_t __t1, __t2; \
+        if( /* handle U+1000..U+CFFF inline */ \
+            (0xe0<(c) && (c)<=0xec) && \
+            (((i)+1)<(length)) && \
+            (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
+            (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
+        ) { \
+            /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+            (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \
+            (i)+=2; \
+        } else if( /* handle U+0080..U+07FF inline */ \
+            ((c)<0xe0 && (c)>=0xc2) && \
+            ((i)<(length)) && \
+            (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
+        ) { \
+            (c)=(UChar)((((c)&0x1f)<<6)|__t1); \
+            ++(i); \
+        } else if(U8_IS_LEAD(c)) { \
+            /* function call for "complicated" and error cases */ \
+            (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -1); \
+        } else { \
+            (c)=U_SENTINEL; \
+        } \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U8_APPEND
+ * @stable ICU 2.4
+ */
+#define U8_APPEND_UNSAFE(s, i, c) { \
+    if((uint32_t)(c)<=0x7f) { \
+        (s)[(i)++]=(uint8_t)(c); \
+    } else { \
+        if((uint32_t)(c)<=0x7ff) { \
+            (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+        } else { \
+            if((uint32_t)(c)<=0xffff) { \
+                (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+            } else { \
+                (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
+                (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
+            } \
+            (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+        } \
+        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+    } \
+}
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a non-ASCII code point is written, checks for sufficient space in the string.
+ * If the code point is not valid or trail bytes do not fit,
+ * then isError is set to TRUE.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i string offset, must be i<capacity
+ * @param capacity size of the string buffer
+ * @param c code point to append
+ * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
+ * @see U8_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_APPEND(s, i, capacity, c, isError) { \
+    if((uint32_t)(c)<=0x7f) { \
+        (s)[(i)++]=(uint8_t)(c); \
+    } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \
+        (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+    } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
+        (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+        (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+    } else { \
+        (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(capacity), c, &(isError)); \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_FWD_1
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1_UNSAFE(s, i) { \
+    (i)+=1+U8_COUNT_TRAIL_BYTES((s)[i]); \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @see U8_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1(s, i, length) { \
+    uint8_t __b=(uint8_t)(s)[(i)++]; \
+    if(U8_IS_LEAD(__b)) { \
+        uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \
+        if((i)+__count>(length)) { \
+            __count=(uint8_t)((length)-(i)); \
+        } \
+        while(__count>0 && U8_IS_TRAIL((s)[i])) { \
+            ++(i); \
+            --__count; \
+        } \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_FWD_N
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U8_FWD_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param n number of code points to skip
+ * @see U8_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N(s, i, length, n) { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)<(length)) { \
+        U8_FWD_1(s, i, length); \
+        --__N; \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START_UNSAFE(s, i) { \
+    while(U8_IS_TRAIL((s)[i])) { --(i); } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i
+ * @see U8_SET_CP_START_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START(s, start, i) { \
+    if(U8_IS_TRAIL((s)[(i)])) { \
+        (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
+    } \
+}
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_PREV
+ * @stable ICU 2.4
+ */
+#define U8_PREV_UNSAFE(s, i, c) { \
+    (c)=(uint8_t)(s)[--(i)]; \
+    if(U8_IS_TRAIL(c)) { \
+        uint8_t __b, __count=1, __shift=6; \
+\
+        /* c is a trail byte */ \
+        (c)&=0x3f; \
+        for(;;) { \
+            __b=(uint8_t)(s)[--(i)]; \
+            if(__b>=0xc0) { \
+                U8_MASK_LEAD_BYTE(__b, __count); \
+                (c)|=(UChar32)__b<<__shift; \
+                break; \
+            } else { \
+                (c)|=(UChar32)(__b&0x3f)<<__shift; \
+                ++__count; \
+                __shift+=6; \
+            } \
+        } \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_PREV(s, start, i, c) { \
+    (c)=(uint8_t)(s)[--(i)]; \
+    if((c)>=0x80) { \
+        if((c)<=0xbf) { \
+            (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
+        } else { \
+            (c)=U_SENTINEL; \
+        } \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_BACK_1
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1_UNSAFE(s, i) { \
+    while(U8_IS_TRAIL((s)[--(i)])) {} \
+}
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @see U8_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1(s, start, i) { \
+    if(U8_IS_TRAIL((s)[--(i)])) { \
+        (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_BACK_N
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        U8_BACK_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start index of the start of the string
+ * @param i string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U8_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N(s, start, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0 && (i)>(start)) { \
+        U8_BACK_1(s, start, i); \
+        --__N; \
+    } \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \
+    U8_BACK_1_UNSAFE(s, i); \
+    U8_FWD_1_UNSAFE(s, i); \
+}
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<=length
+ * @param length string length
+ * @see U8_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT(s, start, i, length) { \
+    if((start)<(i) && (i)<(length)) { \
+        U8_BACK_1(s, start, i); \
+        U8_FWD_1(s, i, length); \
+    } \
+}
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/utf_old.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/utf_old.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/utf_old.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,1171 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 2002-2005, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  utf.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2002sep21
-*   created by: Markus W. Scherer
-*/
-
-/**
- * \file 
- * \brief C API: Deprecated macros for Unicode string handling
- */
-
-/**
- * 
- * The macros in utf_old.h are all deprecated and their use discouraged.
- * Some of the design principles behind the set of UTF macros
- * have changed or proved impractical.
- * Almost all of the old "UTF macros" are at least renamed.
- * If you are looking for a new equivalent to an old macro, please see the
- * comment at the old one.
- *
- * utf_old.h is included by utf.h after unicode/umachine.h
- * and some common definitions, to not break old code.
- *
- * Brief summary of reasons for deprecation:
- * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing)
- *   was impractical.
- * - Switch on UTF_SAFE etc. (selection of unsafe/safe/strict default string processing)
- *   was of little use and impractical.
- * - Whole classes of macros became obsolete outside of the UTF_SIZE/UTF_SAFE
- *   selection framework: UTF32_ macros (all trivial)
- *   and UTF_ default and intermediate macros (all aliases).
- * - The selection framework also caused many macro aliases.
- * - Change in Unicode standard: "irregular" sequences (3.0) became illegal (3.2).
- * - Change of language in Unicode standard:
- *   Growing distinction between internal x-bit Unicode strings and external UTF-x
- *   forms, with the former more lenient.
- *   Suggests renaming of UTF16_ macros to U16_.
- * - The prefix "UTF_" without a width number confused some users.
- * - "Safe" append macros needed the addition of an error indicator output.
- * - "Safe" UTF-8 macros used legitimate (if rarely used) code point values
- *   to indicate error conditions.
- * - The use of the "_CHAR" infix for code point operations confused some users.
- *
- * More details:
- *
- * Until ICU 2.2, utf.h theoretically allowed to choose among UTF-8/16/32
- * for string processing, and among unsafe/safe/strict default macros for that.
- *
- * It proved nearly impossible to write non-trivial, high-performance code
- * that is UTF-generic.
- * Unsafe default macros would be dangerous for default string processing,
- * and the main reason for the "strict" versions disappeared:
- * Between Unicode 3.0 and 3.2 all "irregular" UTF-8 sequences became illegal.
- * The only other conditions that "strict" checked for were non-characters,
- * which are valid during processing. Only during text input/output should they
- * be checked, and at that time other well-formedness checks may be
- * necessary or useful as well.
- * This can still be done by using U16_NEXT and U_IS_UNICODE_NONCHAR
- * or U_IS_UNICODE_CHAR.
- *
- * The old UTF8_..._SAFE macros also used some normal Unicode code points
- * to indicate malformed sequences.
- * The new UTF8_ macros without suffix use negative values instead.
- *
- * The entire contents of utf32.h was moved here without replacement
- * because all those macros were trivial and
- * were meaningful only in the framework of choosing the UTF size.
- *
- * See Jitterbug 2150 and its discussion on the ICU mailing list
- * in September 2002.
- *
- * <hr>
- *
- * <em>Obsolete part</em> of pre-ICU 2.4 utf.h file documentation:
- *
- * <p>The original concept for these files was for ICU to allow
- * in principle to set which UTF (UTF-8/16/32) is used internally
- * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type
- * accordingly. UTF-16 was the default.</p>
- *
- * <p>This concept has been abandoned.
- * A lot of the ICU source code assumes UChar strings are in UTF-16.
- * This is especially true for low-level code like
- * conversion, normalization, and collation.
- * The utf.h header enforces the default of UTF-16.
- * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.</p>
- *
- * <p>Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
- * UChar is defined to be exactly wchar_t, otherwise uint16_t.</p>
- *
- * <p>UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
- * Unicode code point (Unicode scalar value, 0..0x10ffff).
- * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
- * the definition of UChar. For details see the documentation for UChar32 itself.</p>
- *
- * <p>utf.h also defines a number of C macros for handling single Unicode code points and
- * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual
- * implementations of those macros and then aliases one set of them (for UTF-16) for general use.
- * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while
- * the general alias macros always begin with UTF_...</p>
- *
- * <p>Many string operations can be done with or without error checking.
- * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe"
- * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause
- * program failures if the strings are not well-formed. The safe macros have an additional, boolean
- * parameter "strict". If strict is FALSE, then only illegal sequences are detected.
- * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates).
- * Safe macros return special error code points for illegal/irregular sequences:
- * Typically, U+ffff, or values that would result in a code unit sequence of the same length
- * as the erroneous input sequence.<br>
- * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and
- * they do not have start/length parameters for boundary checking.</p>
- *
- * <p>Here, the macros are aliased in two steps:
- * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are
- * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures.
- * Then, in a second step, the default, general alias macros are set to use either the unsafe or
- * the safe/not strict (default) or the safe/strict macro;
- * these general macros do not have a strictness parameter.</p>
- *
- * <p>It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict.
- * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for
- * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p>
- *
- * <p>For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix.
- * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias.
- * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds,
- * then the _UNSAFE version may be used.
- * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.</p>
- *
- * <hr>
- *
- * @deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead.
- */
-
-#ifndef __UTF_OLD_H__
-#define __UTF_OLD_H__
-
-#ifndef U_HIDE_DEPRECATED_API
-
-/* utf.h must be included first. */
-#ifndef __UTF_H__
-#   include "unicode/utf.h"
-#endif
-
-/* Formerly utf.h, part 1 --------------------------------------------------- */
-
-#ifdef U_USE_UTF_DEPRECATES
-/**
- * Unicode string and array offset and index type.
- * ICU always counts Unicode code units (UChars) for
- * string offsets, indexes, and lengths, not Unicode code points.
- *
- * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release.
- */
-typedef int32_t UTextOffset;
-#endif
-
-/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF_SIZE 16
-
-/**
- * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations
- * with strict=FALSE.
- *
- * @deprecated ICU 2.4. Obsolete, see utf_old.h.
- */
-#define UTF_SAFE
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#undef UTF_UNSAFE
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#undef UTF_STRICT
-
-/**
- * <p>UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8,
- * which need 1 or 2 bytes in UTF-8:<br>
- * U+0015 = NAK = Negative Acknowledge, C0 control character<br>
- * U+009f = highest C1 control character</p>
- *
- * <p>These are used by UTF8_..._SAFE macros so that they can return an error value
- * that needs the same number of code units (bytes) as were seen by
- * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().</p>
- *
- * @deprecated ICU 2.4. Obsolete, see utf_old.h.
- */
-#define UTF8_ERROR_VALUE_1 0x15
-
-/**
- * See documentation on UTF8_ERROR_VALUE_1 for details.
- *
- * @deprecated ICU 2.4. Obsolete, see utf_old.h.
- */
-#define UTF8_ERROR_VALUE_2 0x9f
-
-/**
- * Error value for all UTFs. This code point value will be set by macros with error
- * checking if an error is detected.
- *
- * @deprecated ICU 2.4. Obsolete, see utf_old.h.
- */
-#define UTF_ERROR_VALUE 0xffff
-
-/**
- * Is a given 32-bit code an error value
- * as returned by one of the macros for any UTF?
- *
- * @deprecated ICU 2.4. Obsolete, see utf_old.h.
- */
-#define UTF_IS_ERROR(c) \
-    (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
-
-/**
- * This is a combined macro: Is c a valid Unicode value _and_ not an error code?
- *
- * @deprecated ICU 2.4. Obsolete, see utf_old.h.
- */
-#define UTF_IS_VALID(c) \
-    (UTF_IS_UNICODE_CHAR(c) && \
-     (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
-
-/**
- * Is this code unit or code point a surrogate (U+d800..U+dfff)?
- * @deprecated ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h.
- */
-#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
-
-/**
- * Is a given 32-bit code point a Unicode noncharacter?
- *
- * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h.
- */
-#define UTF_IS_UNICODE_NONCHAR(c) \
-    ((c)>=0xfdd0 && \
-     ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
-     (uint32_t)(c)<=0x10ffff)
-
-/**
- * Is a given 32-bit value a Unicode code point value (0..U+10ffff)
- * that can be assigned a character?
- *
- * Code points that are not characters include:
- * - single surrogate code points (U+d800..U+dfff, 2048 code points)
- * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
- * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
- * - the highest Unicode code point value is U+10ffff
- *
- * This means that all code points below U+d800 are character code points,
- * and that boundary is tested first for performance.
- *
- * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h.
- */
-#define UTF_IS_UNICODE_CHAR(c) \
-    ((uint32_t)(c)<0xd800 || \
-        ((uint32_t)(c)>0xdfff && \
-         (uint32_t)(c)<=0x10ffff && \
-         !UTF_IS_UNICODE_NONCHAR(c)))
-
-/* Formerly utf8.h ---------------------------------------------------------- */
-
-/**
- * Count the trail bytes for a UTF-8 lead byte.
- * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h.
- */
-#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
-
-/**
- * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
- * @deprecated ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h.
- */
-#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
-
-/** Is this this code point a single code unit (byte)? @deprecated ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h. */
-#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
-/** Is this this code unit the lead code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h. */
-#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
-/** Is this this code unit a trailing code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h. */
-#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
-
-/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */
-#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
-
-/**
- * Given the lead character, how many bytes are taken by this code point.
- * ICU does not deal with code points >0x10ffff
- * unless necessary for advancing in the byte stream.
- *
- * These length macros take into account that for values >0x10ffff
- * the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff
- * with 3 bytes.
- * Code point comparisons need to be in uint32_t because UChar32
- * may be a signed type, and negative values must be recognized.
- *
- * @deprecated ICU 2.4. Use U8_LENGTH instead, see utf_old.h.
- */
-#if 1
-#   define UTF8_CHAR_LENGTH(c) \
-        ((uint32_t)(c)<=0x7f ? 1 : \
-            ((uint32_t)(c)<=0x7ff ? 2 : \
-                ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
-            ) \
-        )
-#else
-#   define UTF8_CHAR_LENGTH(c) \
-        ((uint32_t)(c)<=0x7f ? 1 : \
-            ((uint32_t)(c)<=0x7ff ? 2 : \
-                ((uint32_t)(c)<=0xffff ? 3 : \
-                    ((uint32_t)(c)<=0x10ffff ? 4 : \
-                        ((uint32_t)(c)<=0x3ffffff ? 5 : \
-                            ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
-                        ) \
-                    ) \
-                ) \
-            ) \
-        )
-#endif
-
-/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */
-#define UTF8_MAX_CHAR_LENGTH 4
-
-/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
-
-/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */
-#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
-    int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
-    UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
-    UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
-}
-
-/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */
-#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
-    int32_t _utf8_get_char_safe_index=(int32_t)(i); \
-    UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
-    UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
-}
-
-/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */
-#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
-    (c)=(s)[(i)++]; \
-    if((uint8_t)((c)-0xc0)<0x35) { \
-        uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
-        UTF8_MASK_LEAD_BYTE(c, __count); \
-        switch(__count) { \
-        /* each following branch falls through to the next one */ \
-        case 3: \
-            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
-        case 2: \
-            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
-        case 1: \
-            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
-        /* no other branches to optimize switch() */ \
-            break; \
-        } \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */
-#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
-    if((uint32_t)(c)<=0x7f) { \
-        (s)[(i)++]=(uint8_t)(c); \
-    } else { \
-        if((uint32_t)(c)<=0x7ff) { \
-            (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
-        } else { \
-            if((uint32_t)(c)<=0xffff) { \
-                (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
-            } else { \
-                (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
-                (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
-            } \
-            (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
-        } \
-        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */
-#define UTF8_FWD_1_UNSAFE(s, i) { \
-    (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
-}
-
-/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */
-#define UTF8_FWD_N_UNSAFE(s, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0) { \
-        UTF8_FWD_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */
-#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
-    while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
-}
-
-/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */
-#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
-    (c)=(s)[(i)++]; \
-    if((c)>=0x80) { \
-        if(UTF8_IS_LEAD(c)) { \
-            (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
-        } else { \
-            (c)=UTF8_ERROR_VALUE_1; \
-        } \
-    } \
-}
-
-/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */
-#define UTF8_APPEND_CHAR_SAFE(s, i, length, c)  { \
-    if((uint32_t)(c)<=0x7f) { \
-        (s)[(i)++]=(uint8_t)(c); \
-    } else { \
-        (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */
-#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
-
-/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */
-#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
-
-/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */
-#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
-
-/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */
-#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
-    (c)=(s)[--(i)]; \
-    if(UTF8_IS_TRAIL(c)) { \
-        uint8_t __b, __count=1, __shift=6; \
-\
-        /* c is a trail byte */ \
-        (c)&=0x3f; \
-        for(;;) { \
-            __b=(s)[--(i)]; \
-            if(__b>=0xc0) { \
-                UTF8_MASK_LEAD_BYTE(__b, __count); \
-                (c)|=(UChar32)__b<<__shift; \
-                break; \
-            } else { \
-                (c)|=(UChar32)(__b&0x3f)<<__shift; \
-                ++__count; \
-                __shift+=6; \
-            } \
-        } \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */
-#define UTF8_BACK_1_UNSAFE(s, i) { \
-    while(UTF8_IS_TRAIL((s)[--(i)])) {} \
-}
-
-/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */
-#define UTF8_BACK_N_UNSAFE(s, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0) { \
-        UTF8_BACK_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
-#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
-    UTF8_BACK_1_UNSAFE(s, i); \
-    UTF8_FWD_1_UNSAFE(s, i); \
-}
-
-/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */
-#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
-    (c)=(s)[--(i)]; \
-    if((c)>=0x80) { \
-        if((c)<=0xbf) { \
-            (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
-        } else { \
-            (c)=UTF8_ERROR_VALUE_1; \
-        } \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */
-#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
-
-/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */
-#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
-
-/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */
-#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
-
-/* Formerly utf16.h --------------------------------------------------------- */
-
-/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */
-#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
-
-/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */
-#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
-
-/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */
-#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
-
-/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */
-#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
-
-/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */
-#define UTF16_GET_PAIR_VALUE(first, second) \
-    (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
-
-/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
-#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
-
-/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
-#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
-
-/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
-#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
-
-/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
-#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
-
-/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */
-#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
-
-/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */
-#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
-
-/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */
-#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
-
-/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */
-#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
-
-/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */
-#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
-
-/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */
-#define UTF16_MAX_CHAR_LENGTH 2
-
-/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF16_ARRAY_SIZE(size) (size)
-
-/**
- * Get a single code point from an offset that points to any
- * of the code units that belong to that code point.
- * Assume 0<=i<length.
- *
- * This could be used for iteration together with
- * UTF16_CHAR_LENGTH() and UTF_IS_ERROR(),
- * but the use of UTF16_NEXT_CHAR[_UNSAFE]() and
- * UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that.
- * @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h.
- */
-#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
-    (c)=(s)[i]; \
-    if(UTF_IS_SURROGATE(c)) { \
-        if(UTF_IS_SURROGATE_FIRST(c)) { \
-            (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
-        } else { \
-            (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
-        } \
-    } \
-}
-
-/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
-#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
-    (c)=(s)[i]; \
-    if(UTF_IS_SURROGATE(c)) { \
-        uint16_t __c2; \
-        if(UTF_IS_SURROGATE_FIRST(c)) { \
-            if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
-                (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
-                /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
-            } else if(strict) {\
-                /* unmatched first surrogate */ \
-                (c)=UTF_ERROR_VALUE; \
-            } \
-        } else { \
-            if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
-                (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
-                /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
-            } else if(strict) {\
-                /* unmatched second surrogate */ \
-                (c)=UTF_ERROR_VALUE; \
-            } \
-        } \
-    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
-        (c)=UTF_ERROR_VALUE; \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
-#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
-    (c)=(s)[(i)++]; \
-    if(UTF_IS_FIRST_SURROGATE(c)) { \
-        (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
-#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
-    if((uint32_t)(c)<=0xffff) { \
-        (s)[(i)++]=(uint16_t)(c); \
-    } else { \
-        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
-        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
-#define UTF16_FWD_1_UNSAFE(s, i) { \
-    if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
-        ++(i); \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
-#define UTF16_FWD_N_UNSAFE(s, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0) { \
-        UTF16_FWD_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
-#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
-    if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
-        --(i); \
-    } \
-}
-
-/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
-#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
-    (c)=(s)[(i)++]; \
-    if(UTF_IS_FIRST_SURROGATE(c)) { \
-        uint16_t __c2; \
-        if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
-            ++(i); \
-            (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
-            /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
-        } else if(strict) {\
-            /* unmatched first surrogate */ \
-            (c)=UTF_ERROR_VALUE; \
-        } \
-    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
-        /* unmatched second surrogate or other non-character */ \
-        (c)=UTF_ERROR_VALUE; \
-    } \
-}
-
-/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
-#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
-    if((uint32_t)(c)<=0xffff) { \
-        (s)[(i)++]=(uint16_t)(c); \
-    } else if((uint32_t)(c)<=0x10ffff) { \
-        if((i)+1<(length)) { \
-            (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
-            (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
-        } else /* not enough space */ { \
-            (s)[(i)++]=UTF_ERROR_VALUE; \
-        } \
-    } else /* c>0x10ffff, write error value */ { \
-        (s)[(i)++]=UTF_ERROR_VALUE; \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
-#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
-
-/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
-#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
-
-/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
-#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
-
-/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
-#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
-    (c)=(s)[--(i)]; \
-    if(UTF_IS_SECOND_SURROGATE(c)) { \
-        (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
-#define UTF16_BACK_1_UNSAFE(s, i) { \
-    if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
-        --(i); \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
-#define UTF16_BACK_N_UNSAFE(s, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0) { \
-        UTF16_BACK_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
-#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
-    if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
-        ++(i); \
-    } \
-}
-
-/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
-#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
-    (c)=(s)[--(i)]; \
-    if(UTF_IS_SECOND_SURROGATE(c)) { \
-        uint16_t __c2; \
-        if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
-            --(i); \
-            (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
-            /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
-        } else if(strict) {\
-            /* unmatched second surrogate */ \
-            (c)=UTF_ERROR_VALUE; \
-        } \
-    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
-        /* unmatched first surrogate or other non-character */ \
-        (c)=UTF_ERROR_VALUE; \
-    } \
-}
-
-/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
-#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
-
-/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
-#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
-
-/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
-#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
-
-/* Formerly utf32.h --------------------------------------------------------- */
-
-/*
-* Old documentation:
-*
-*   This file defines macros to deal with UTF-32 code units and code points.
-*   Signatures and semantics are the same as for the similarly named macros
-*   in utf16.h.
-*   utf32.h is included by utf.h after unicode/umachine.h</p>
-*   and some common definitions.
-*   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
-*                  Compound statements (curly braces {}) must be used  for if-else-while...
-*                  bodies and all macro statements should be terminated with semicolon.</p>
-*/
-
-/* internal definitions ----------------------------------------------------- */
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_IS_SAFE(c, strict) \
-    (!(strict) ? \
-        (uint32_t)(c)<=0x10ffff : \
-        UTF_IS_UNICODE_CHAR(c))
-
-/*
- * For the semantics of all of these macros, see utf16.h.
- * The UTF-32 versions are trivial because any code point is
- * encoded using exactly one code unit.
- */
-
-/* single-code point definitions -------------------------------------------- */
-
-/* classes of code unit values */
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_IS_SINGLE(uchar) 1
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_IS_LEAD(uchar) 0
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_IS_TRAIL(uchar) 0
-
-/* number of code units per code point */
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_CHAR_LENGTH(c) 1
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_MAX_CHAR_LENGTH 1
-
-/* average number of code units compared to UTF-16 */
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_ARRAY_SIZE(size) (size)
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
-    (c)=(s)[i]; \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
-    (c)=(s)[i]; \
-    if(!UTF32_IS_SAFE(c, strict)) { \
-        (c)=UTF_ERROR_VALUE; \
-    } \
-}
-
-/* definitions with forward iteration --------------------------------------- */
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
-    (c)=(s)[(i)++]; \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
-    (s)[(i)++]=(c); \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_FWD_1_UNSAFE(s, i) { \
-    ++(i); \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_FWD_N_UNSAFE(s, i, n) { \
-    (i)+=(n); \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
-    (c)=(s)[(i)++]; \
-    if(!UTF32_IS_SAFE(c, strict)) { \
-        (c)=UTF_ERROR_VALUE; \
-    } \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
-    if((uint32_t)(c)<=0x10ffff) { \
-        (s)[(i)++]=(c); \
-    } else /* c>0x10ffff, write 0xfffd */ { \
-        (s)[(i)++]=0xfffd; \
-    } \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_FWD_1_SAFE(s, i, length) { \
-    ++(i); \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_FWD_N_SAFE(s, i, length, n) { \
-    if(((i)+=(n))>(length)) { \
-        (i)=(length); \
-    } \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
-}
-
-/* definitions with backward iteration -------------------------------------- */
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
-    (c)=(s)[--(i)]; \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_BACK_1_UNSAFE(s, i) { \
-    --(i); \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_BACK_N_UNSAFE(s, i, n) { \
-    (i)-=(n); \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
-    (c)=(s)[--(i)]; \
-    if(!UTF32_IS_SAFE(c, strict)) { \
-        (c)=UTF_ERROR_VALUE; \
-    } \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_BACK_1_SAFE(s, start, i) { \
-    --(i); \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_BACK_N_SAFE(s, start, i, n) { \
-    (i)-=(n); \
-    if((i)<(start)) { \
-        (i)=(start); \
-    } \
-}
-
-/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
-#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
-}
-
-/* Formerly utf.h, part 2 --------------------------------------------------- */
-
-/**
- * Estimate the number of code units for a string based on the number of UTF-16 code units.
- *
- * @deprecated ICU 2.4. Obsolete, see utf_old.h.
- */
-#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
-
-/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */
-#define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
-
-/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
-#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
-
-
-/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
-#define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
-
-/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
-#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
-
-
-/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
-#define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
-
-/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
-#define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
-
-
-/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
-#define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
-
-/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
-#define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
-
-
-/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
-#define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
-
-/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
-#define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
-
-
-/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
-#define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
-
-/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
-#define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
-
-
-/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
-#define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
-
-/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
-#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
-
-
-/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
-#define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
-
-/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
-#define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
-
-
-/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
-#define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
-
-/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
-#define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
-
-
-/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
-#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
-
-/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
-#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
-
-/* Define default macros (UTF-16 "safe") ------------------------------------ */
-
-/**
- * Does this code unit alone encode a code point (BMP, not a surrogate)?
- * Same as UTF16_IS_SINGLE.
- * @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h.
- */
-#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
-
-/**
- * Is this code unit the first one of several (a lead surrogate)?
- * Same as UTF16_IS_LEAD.
- * @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h.
- */
-#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
-
-/**
- * Is this code unit one of several but not the first one (a trail surrogate)?
- * Same as UTF16_IS_TRAIL.
- * @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h.
- */
-#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
-
-/**
- * Does this code point require multiple code units (is it a supplementary code point)?
- * Same as UTF16_NEED_MULTIPLE_UCHAR.
- * @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead.
- */
-#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
-
-/**
- * How many code units are used to encode this code point (1 or 2)?
- * Same as UTF16_CHAR_LENGTH.
- * @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h.
- */
-#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
-
-/**
- * How many code units are used at most for any Unicode code point (2)?
- * Same as UTF16_MAX_CHAR_LENGTH.
- * @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h.
- */
-#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
-
-/**
- * Set c to the code point that contains the code unit i.
- * i could point to the lead or the trail surrogate for the code point.
- * i is not modified.
- * Same as UTF16_GET_CHAR.
- * \pre 0<=i<length
- *
- * @deprecated ICU 2.4. Renamed to U16_GET, see utf_old.h.
- */
-#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
-
-/**
- * Set c to the code point that starts at code unit i
- * and advance i to beyond the code units of this code point (post-increment).
- * i must point to the first code unit of a code point.
- * Otherwise c is set to the trail unit (surrogate) itself.
- * Same as UTF16_NEXT_CHAR.
- * \pre 0<=i<length
- * \post 0<i<=length
- *
- * @deprecated ICU 2.4. Renamed to U16_NEXT, see utf_old.h.
- */
-#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
-
-/**
- * Append the code units of code point c to the string at index i
- * and advance i to beyond the new code units (post-increment).
- * The code units beginning at index i will be overwritten.
- * Same as UTF16_APPEND_CHAR.
- * \pre 0<=c<=0x10ffff
- * \pre 0<=i<length
- * \post 0<i<=length
- *
- * @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h.
- */
-#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
-
-/**
- * Advance i to beyond the code units of the code point that begins at i.
- * I.e., advance i by one code point.
- * Same as UTF16_FWD_1.
- * \pre 0<=i<length
- * \post 0<i<=length
- *
- * @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h.
- */
-#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
-
-/**
- * Advance i to beyond the code units of the n code points where the first one begins at i.
- * I.e., advance i by n code points.
- * Same as UT16_FWD_N.
- * \pre 0<=i<length
- * \post 0<i<=length
- *
- * @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h.
- */
-#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
-
-/**
- * Take the random-access index i and adjust it so that it points to the beginning
- * of a code point.
- * The input index points to any code unit of a code point and is moved to point to
- * the first code unit of the same code point. i is never incremented.
- * In other words, if i points to a trail surrogate that is preceded by a matching
- * lead surrogate, then i is decremented. Otherwise it is not modified.
- * This can be used to start an iteration with UTF_NEXT_CHAR() from a random index.
- * Same as UTF16_SET_CHAR_START.
- * \pre start<=i<length
- * \post start<=i<length
- *
- * @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h.
- */
-#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
-
-/**
- * Set c to the code point that has code units before i
- * and move i backward (towards the beginning of the string)
- * to the first code unit of this code point (pre-increment).
- * i must point to the first code unit after the last unit of a code point (i==length is allowed).
- * Same as UTF16_PREV_CHAR.
- * \pre start<i<=length
- * \post start<=i<length
- *
- * @deprecated ICU 2.4. Renamed to U16_PREV, see utf_old.h.
- */
-#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
-
-/**
- * Move i backward (towards the beginning of the string)
- * to the first code unit of the code point that has code units before i.
- * I.e., move i backward by one code point.
- * i must point to the first code unit after the last unit of a code point (i==length is allowed).
- * Same as UTF16_BACK_1.
- * \pre start<i<=length
- * \post start<=i<length
- *
- * @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h.
- */
-#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
-
-/**
- * Move i backward (towards the beginning of the string)
- * to the first code unit of the n code points that have code units before i.
- * I.e., move i backward by n code points.
- * i must point to the first code unit after the last unit of a code point (i==length is allowed).
- * Same as UTF16_BACK_N.
- * \pre start<i<=length
- * \post start<=i<length
- *
- * @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h.
- */
-#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
-
-/**
- * Take the random-access index i and adjust it so that it points beyond
- * a code point. The input index points beyond any code unit
- * of a code point and is moved to point beyond the last code unit of the same
- * code point. i is never decremented.
- * In other words, if i points to a trail surrogate that is preceded by a matching
- * lead surrogate, then i is incremented. Otherwise it is not modified.
- * This can be used to start an iteration with UTF_PREV_CHAR() from a random index.
- * Same as UTF16_SET_CHAR_LIMIT.
- * \pre start<i<=length
- * \post start<i<=length
- *
- * @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h.
- */
-#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
-
-#endif /* U_HIDE_DEPRECATED_API */
-
-#endif
-

Copied: MacRuby/trunk/icu-1060/unicode/utf_old.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/utf_old.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/utf_old.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/utf_old.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1171 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2002-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utf.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002sep21
+*   created by: Markus W. Scherer
+*/
+
+/**
+ * \file 
+ * \brief C API: Deprecated macros for Unicode string handling
+ */
+
+/**
+ * 
+ * The macros in utf_old.h are all deprecated and their use discouraged.
+ * Some of the design principles behind the set of UTF macros
+ * have changed or proved impractical.
+ * Almost all of the old "UTF macros" are at least renamed.
+ * If you are looking for a new equivalent to an old macro, please see the
+ * comment at the old one.
+ *
+ * utf_old.h is included by utf.h after unicode/umachine.h
+ * and some common definitions, to not break old code.
+ *
+ * Brief summary of reasons for deprecation:
+ * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing)
+ *   was impractical.
+ * - Switch on UTF_SAFE etc. (selection of unsafe/safe/strict default string processing)
+ *   was of little use and impractical.
+ * - Whole classes of macros became obsolete outside of the UTF_SIZE/UTF_SAFE
+ *   selection framework: UTF32_ macros (all trivial)
+ *   and UTF_ default and intermediate macros (all aliases).
+ * - The selection framework also caused many macro aliases.
+ * - Change in Unicode standard: "irregular" sequences (3.0) became illegal (3.2).
+ * - Change of language in Unicode standard:
+ *   Growing distinction between internal x-bit Unicode strings and external UTF-x
+ *   forms, with the former more lenient.
+ *   Suggests renaming of UTF16_ macros to U16_.
+ * - The prefix "UTF_" without a width number confused some users.
+ * - "Safe" append macros needed the addition of an error indicator output.
+ * - "Safe" UTF-8 macros used legitimate (if rarely used) code point values
+ *   to indicate error conditions.
+ * - The use of the "_CHAR" infix for code point operations confused some users.
+ *
+ * More details:
+ *
+ * Until ICU 2.2, utf.h theoretically allowed to choose among UTF-8/16/32
+ * for string processing, and among unsafe/safe/strict default macros for that.
+ *
+ * It proved nearly impossible to write non-trivial, high-performance code
+ * that is UTF-generic.
+ * Unsafe default macros would be dangerous for default string processing,
+ * and the main reason for the "strict" versions disappeared:
+ * Between Unicode 3.0 and 3.2 all "irregular" UTF-8 sequences became illegal.
+ * The only other conditions that "strict" checked for were non-characters,
+ * which are valid during processing. Only during text input/output should they
+ * be checked, and at that time other well-formedness checks may be
+ * necessary or useful as well.
+ * This can still be done by using U16_NEXT and U_IS_UNICODE_NONCHAR
+ * or U_IS_UNICODE_CHAR.
+ *
+ * The old UTF8_..._SAFE macros also used some normal Unicode code points
+ * to indicate malformed sequences.
+ * The new UTF8_ macros without suffix use negative values instead.
+ *
+ * The entire contents of utf32.h was moved here without replacement
+ * because all those macros were trivial and
+ * were meaningful only in the framework of choosing the UTF size.
+ *
+ * See Jitterbug 2150 and its discussion on the ICU mailing list
+ * in September 2002.
+ *
+ * <hr>
+ *
+ * <em>Obsolete part</em> of pre-ICU 2.4 utf.h file documentation:
+ *
+ * <p>The original concept for these files was for ICU to allow
+ * in principle to set which UTF (UTF-8/16/32) is used internally
+ * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type
+ * accordingly. UTF-16 was the default.</p>
+ *
+ * <p>This concept has been abandoned.
+ * A lot of the ICU source code assumes UChar strings are in UTF-16.
+ * This is especially true for low-level code like
+ * conversion, normalization, and collation.
+ * The utf.h header enforces the default of UTF-16.
+ * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.</p>
+ *
+ * <p>Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
+ * UChar is defined to be exactly wchar_t, otherwise uint16_t.</p>
+ *
+ * <p>UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.</p>
+ *
+ * <p>utf.h also defines a number of C macros for handling single Unicode code points and
+ * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual
+ * implementations of those macros and then aliases one set of them (for UTF-16) for general use.
+ * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while
+ * the general alias macros always begin with UTF_...</p>
+ *
+ * <p>Many string operations can be done with or without error checking.
+ * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe"
+ * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause
+ * program failures if the strings are not well-formed. The safe macros have an additional, boolean
+ * parameter "strict". If strict is FALSE, then only illegal sequences are detected.
+ * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates).
+ * Safe macros return special error code points for illegal/irregular sequences:
+ * Typically, U+ffff, or values that would result in a code unit sequence of the same length
+ * as the erroneous input sequence.<br>
+ * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and
+ * they do not have start/length parameters for boundary checking.</p>
+ *
+ * <p>Here, the macros are aliased in two steps:
+ * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are
+ * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures.
+ * Then, in a second step, the default, general alias macros are set to use either the unsafe or
+ * the safe/not strict (default) or the safe/strict macro;
+ * these general macros do not have a strictness parameter.</p>
+ *
+ * <p>It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict.
+ * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for
+ * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p>
+ *
+ * <p>For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix.
+ * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias.
+ * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds,
+ * then the _UNSAFE version may be used.
+ * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.</p>
+ *
+ * <hr>
+ *
+ * @deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead.
+ */
+
+#ifndef __UTF_OLD_H__
+#define __UTF_OLD_H__
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/* utf.h must be included first. */
+#ifndef __UTF_H__
+#   include "unicode/utf.h"
+#endif
+
+/* Formerly utf.h, part 1 --------------------------------------------------- */
+
+#ifdef U_USE_UTF_DEPRECATES
+/**
+ * Unicode string and array offset and index type.
+ * ICU always counts Unicode code units (UChars) for
+ * string offsets, indexes, and lengths, not Unicode code points.
+ *
+ * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release.
+ */
+typedef int32_t UTextOffset;
+#endif
+
+/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF_SIZE 16
+
+/**
+ * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations
+ * with strict=FALSE.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_SAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_UNSAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_STRICT
+
+/**
+ * <p>UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8,
+ * which need 1 or 2 bytes in UTF-8:<br>
+ * U+0015 = NAK = Negative Acknowledge, C0 control character<br>
+ * U+009f = highest C1 control character</p>
+ *
+ * <p>These are used by UTF8_..._SAFE macros so that they can return an error value
+ * that needs the same number of code units (bytes) as were seen by
+ * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().</p>
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF8_ERROR_VALUE_1 0x15
+
+/**
+ * See documentation on UTF8_ERROR_VALUE_1 for details.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF8_ERROR_VALUE_2 0x9f
+
+/**
+ * Error value for all UTFs. This code point value will be set by macros with error
+ * checking if an error is detected.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_ERROR_VALUE 0xffff
+
+/**
+ * Is a given 32-bit code an error value
+ * as returned by one of the macros for any UTF?
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_IS_ERROR(c) \
+    (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
+
+/**
+ * This is a combined macro: Is c a valid Unicode value _and_ not an error code?
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_IS_VALID(c) \
+    (UTF_IS_UNICODE_CHAR(c) && \
+     (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
+
+/**
+ * Is this code unit or code point a surrogate (U+d800..U+dfff)?
+ * @deprecated ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h.
+ */
+#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
+
+/**
+ * Is a given 32-bit code point a Unicode noncharacter?
+ *
+ * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h.
+ */
+#define UTF_IS_UNICODE_NONCHAR(c) \
+    ((c)>=0xfdd0 && \
+     ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+     (uint32_t)(c)<=0x10ffff)
+
+/**
+ * Is a given 32-bit value a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h.
+ */
+#define UTF_IS_UNICODE_CHAR(c) \
+    ((uint32_t)(c)<0xd800 || \
+        ((uint32_t)(c)>0xdfff && \
+         (uint32_t)(c)<=0x10ffff && \
+         !UTF_IS_UNICODE_NONCHAR(c)))
+
+/* Formerly utf8.h ---------------------------------------------------------- */
+
+/**
+ * Count the trail bytes for a UTF-8 lead byte.
+ * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h.
+ */
+#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ * @deprecated ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h.
+ */
+#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/** Is this this code point a single code unit (byte)? @deprecated ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h. */
+#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
+/** Is this this code unit the lead code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h. */
+#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
+/** Is this this code unit a trailing code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h. */
+#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
+
+/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */
+#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
+
+/**
+ * Given the lead character, how many bytes are taken by this code point.
+ * ICU does not deal with code points >0x10ffff
+ * unless necessary for advancing in the byte stream.
+ *
+ * These length macros take into account that for values >0x10ffff
+ * the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff
+ * with 3 bytes.
+ * Code point comparisons need to be in uint32_t because UChar32
+ * may be a signed type, and negative values must be recognized.
+ *
+ * @deprecated ICU 2.4. Use U8_LENGTH instead, see utf_old.h.
+ */
+#if 1
+#   define UTF8_CHAR_LENGTH(c) \
+        ((uint32_t)(c)<=0x7f ? 1 : \
+            ((uint32_t)(c)<=0x7ff ? 2 : \
+                ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
+            ) \
+        )
+#else
+#   define UTF8_CHAR_LENGTH(c) \
+        ((uint32_t)(c)<=0x7f ? 1 : \
+            ((uint32_t)(c)<=0x7ff ? 2 : \
+                ((uint32_t)(c)<=0xffff ? 3 : \
+                    ((uint32_t)(c)<=0x10ffff ? 4 : \
+                        ((uint32_t)(c)<=0x3ffffff ? 5 : \
+                            ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
+                        ) \
+                    ) \
+                ) \
+            ) \
+        )
+#endif
+
+/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */
+#define UTF8_MAX_CHAR_LENGTH 4
+
+/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
+
+/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */
+#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
+    int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
+    UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
+    UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
+}
+
+/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */
+#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+    int32_t _utf8_get_char_safe_index=(int32_t)(i); \
+    UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
+    UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */
+#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[(i)++]; \
+    if((uint8_t)((c)-0xc0)<0x35) { \
+        uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
+        UTF8_MASK_LEAD_BYTE(c, __count); \
+        switch(__count) { \
+        /* each following branch falls through to the next one */ \
+        case 3: \
+            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+        case 2: \
+            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+        case 1: \
+            (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+        /* no other branches to optimize switch() */ \
+            break; \
+        } \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */
+#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
+    if((uint32_t)(c)<=0x7f) { \
+        (s)[(i)++]=(uint8_t)(c); \
+    } else { \
+        if((uint32_t)(c)<=0x7ff) { \
+            (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+        } else { \
+            if((uint32_t)(c)<=0xffff) { \
+                (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+            } else { \
+                (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
+                (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
+            } \
+            (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+        } \
+        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF8_FWD_1_UNSAFE(s, i) { \
+    (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF8_FWD_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        UTF8_FWD_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
+    while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
+}
+
+/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */
+#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
+    (c)=(s)[(i)++]; \
+    if((c)>=0x80) { \
+        if(UTF8_IS_LEAD(c)) { \
+            (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
+        } else { \
+            (c)=UTF8_ERROR_VALUE_1; \
+        } \
+    } \
+}
+
+/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */
+#define UTF8_APPEND_CHAR_SAFE(s, i, length, c)  { \
+    if((uint32_t)(c)<=0x7f) { \
+        (s)[(i)++]=(uint8_t)(c); \
+    } else { \
+        (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */
+#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */
+#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */
+#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */
+#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[--(i)]; \
+    if(UTF8_IS_TRAIL(c)) { \
+        uint8_t __b, __count=1, __shift=6; \
+\
+        /* c is a trail byte */ \
+        (c)&=0x3f; \
+        for(;;) { \
+            __b=(s)[--(i)]; \
+            if(__b>=0xc0) { \
+                UTF8_MASK_LEAD_BYTE(__b, __count); \
+                (c)|=(UChar32)__b<<__shift; \
+                break; \
+            } else { \
+                (c)|=(UChar32)(__b&0x3f)<<__shift; \
+                ++__count; \
+                __shift+=6; \
+            } \
+        } \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF8_BACK_1_UNSAFE(s, i) { \
+    while(UTF8_IS_TRAIL((s)[--(i)])) {} \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF8_BACK_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        UTF8_BACK_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
+    UTF8_BACK_1_UNSAFE(s, i); \
+    UTF8_FWD_1_UNSAFE(s, i); \
+}
+
+/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */
+#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+    (c)=(s)[--(i)]; \
+    if((c)>=0x80) { \
+        if((c)<=0xbf) { \
+            (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
+        } else { \
+            (c)=UTF8_ERROR_VALUE_1; \
+        } \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */
+#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */
+#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */
+#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
+
+/* Formerly utf16.h --------------------------------------------------------- */
+
+/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */
+#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
+
+/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */
+#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
+
+/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */
+#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
+
+/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */
+#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */
+#define UTF16_GET_PAIR_VALUE(first, second) \
+    (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
+
+/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
+#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
+#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
+#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
+
+/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
+#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */
+#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */
+#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */
+#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
+
+/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */
+#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
+
+/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */
+#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */
+#define UTF16_MAX_CHAR_LENGTH 2
+
+/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF16_ARRAY_SIZE(size) (size)
+
+/**
+ * Get a single code point from an offset that points to any
+ * of the code units that belong to that code point.
+ * Assume 0<=i<length.
+ *
+ * This could be used for iteration together with
+ * UTF16_CHAR_LENGTH() and UTF_IS_ERROR(),
+ * but the use of UTF16_NEXT_CHAR[_UNSAFE]() and
+ * UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that.
+ * @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h.
+ */
+#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[i]; \
+    if(UTF_IS_SURROGATE(c)) { \
+        if(UTF_IS_SURROGATE_FIRST(c)) { \
+            (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
+        } else { \
+            (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
+        } \
+    } \
+}
+
+/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
+#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+    (c)=(s)[i]; \
+    if(UTF_IS_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if(UTF_IS_SURROGATE_FIRST(c)) { \
+            if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
+                (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
+                /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+            } else if(strict) {\
+                /* unmatched first surrogate */ \
+                (c)=UTF_ERROR_VALUE; \
+            } \
+        } else { \
+            if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
+                (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
+                /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+            } else if(strict) {\
+                /* unmatched second surrogate */ \
+                (c)=UTF_ERROR_VALUE; \
+            } \
+        } \
+    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+        (c)=UTF_ERROR_VALUE; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
+#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[(i)++]; \
+    if(UTF_IS_FIRST_SURROGATE(c)) { \
+        (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
+#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else { \
+        (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF16_FWD_1_UNSAFE(s, i) { \
+    if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
+        ++(i); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF16_FWD_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        UTF16_FWD_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
+    if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
+        --(i); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
+#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
+    (c)=(s)[(i)++]; \
+    if(UTF_IS_FIRST_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
+            ++(i); \
+            (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
+            /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+        } else if(strict) {\
+            /* unmatched first surrogate */ \
+            (c)=UTF_ERROR_VALUE; \
+        } \
+    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+        /* unmatched second surrogate or other non-character */ \
+        (c)=UTF_ERROR_VALUE; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
+#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
+    if((uint32_t)(c)<=0xffff) { \
+        (s)[(i)++]=(uint16_t)(c); \
+    } else if((uint32_t)(c)<=0x10ffff) { \
+        if((i)+1<(length)) { \
+            (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+            (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+        } else /* not enough space */ { \
+            (s)[(i)++]=UTF_ERROR_VALUE; \
+        } \
+    } else /* c>0x10ffff, write error value */ { \
+        (s)[(i)++]=UTF_ERROR_VALUE; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
+#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
+#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
+#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
+#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[--(i)]; \
+    if(UTF_IS_SECOND_SURROGATE(c)) { \
+        (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF16_BACK_1_UNSAFE(s, i) { \
+    if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
+        --(i); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF16_BACK_N_UNSAFE(s, i, n) { \
+    int32_t __N=(n); \
+    while(__N>0) { \
+        UTF16_BACK_1_UNSAFE(s, i); \
+        --__N; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
+    if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
+        ++(i); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
+#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+    (c)=(s)[--(i)]; \
+    if(UTF_IS_SECOND_SURROGATE(c)) { \
+        uint16_t __c2; \
+        if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
+            --(i); \
+            (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
+            /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+        } else if(strict) {\
+            /* unmatched second surrogate */ \
+            (c)=UTF_ERROR_VALUE; \
+        } \
+    } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+        /* unmatched first surrogate or other non-character */ \
+        (c)=UTF_ERROR_VALUE; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
+#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
+#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
+#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
+
+/* Formerly utf32.h --------------------------------------------------------- */
+
+/*
+* Old documentation:
+*
+*   This file defines macros to deal with UTF-32 code units and code points.
+*   Signatures and semantics are the same as for the similarly named macros
+*   in utf16.h.
+*   utf32.h is included by utf.h after unicode/umachine.h</p>
+*   and some common definitions.
+*   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
+*                  Compound statements (curly braces {}) must be used  for if-else-while...
+*                  bodies and all macro statements should be terminated with semicolon.</p>
+*/
+
+/* internal definitions ----------------------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_SAFE(c, strict) \
+    (!(strict) ? \
+        (uint32_t)(c)<=0x10ffff : \
+        UTF_IS_UNICODE_CHAR(c))
+
+/*
+ * For the semantics of all of these macros, see utf16.h.
+ * The UTF-32 versions are trivial because any code point is
+ * encoded using exactly one code unit.
+ */
+
+/* single-code point definitions -------------------------------------------- */
+
+/* classes of code unit values */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_SINGLE(uchar) 1
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_LEAD(uchar) 0
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_TRAIL(uchar) 0
+
+/* number of code units per code point */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_CHAR_LENGTH(c) 1
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_MAX_CHAR_LENGTH 1
+
+/* average number of code units compared to UTF-16 */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_ARRAY_SIZE(size) (size)
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[i]; \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+    (c)=(s)[i]; \
+    if(!UTF32_IS_SAFE(c, strict)) { \
+        (c)=UTF_ERROR_VALUE; \
+    } \
+}
+
+/* definitions with forward iteration --------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[(i)++]; \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
+    (s)[(i)++]=(c); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_1_UNSAFE(s, i) { \
+    ++(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_N_UNSAFE(s, i, n) { \
+    (i)+=(n); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
+    (c)=(s)[(i)++]; \
+    if(!UTF32_IS_SAFE(c, strict)) { \
+        (c)=UTF_ERROR_VALUE; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
+    if((uint32_t)(c)<=0x10ffff) { \
+        (s)[(i)++]=(c); \
+    } else /* c>0x10ffff, write 0xfffd */ { \
+        (s)[(i)++]=0xfffd; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_1_SAFE(s, i, length) { \
+    ++(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_N_SAFE(s, i, length, n) { \
+    if(((i)+=(n))>(length)) { \
+        (i)=(length); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
+}
+
+/* definitions with backward iteration -------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
+    (c)=(s)[--(i)]; \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_1_UNSAFE(s, i) { \
+    --(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_N_UNSAFE(s, i, n) { \
+    (i)-=(n); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+    (c)=(s)[--(i)]; \
+    if(!UTF32_IS_SAFE(c, strict)) { \
+        (c)=UTF_ERROR_VALUE; \
+    } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_1_SAFE(s, start, i) { \
+    --(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_N_SAFE(s, start, i, n) { \
+    (i)-=(n); \
+    if((i)<(start)) { \
+        (i)=(start); \
+    } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
+}
+
+/* Formerly utf.h, part 2 --------------------------------------------------- */
+
+/**
+ * Estimate the number of code units for a string based on the number of UTF-16 code units.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
+
+/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */
+#define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
+#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
+#define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
+#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
+#define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
+#define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
+#define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
+#define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
+#define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
+#define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
+#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
+#define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
+#define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
+#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
+
+/* Define default macros (UTF-16 "safe") ------------------------------------ */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * Same as UTF16_IS_SINGLE.
+ * @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h.
+ */
+#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
+
+/**
+ * Is this code unit the first one of several (a lead surrogate)?
+ * Same as UTF16_IS_LEAD.
+ * @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h.
+ */
+#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
+
+/**
+ * Is this code unit one of several but not the first one (a trail surrogate)?
+ * Same as UTF16_IS_TRAIL.
+ * @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h.
+ */
+#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
+
+/**
+ * Does this code point require multiple code units (is it a supplementary code point)?
+ * Same as UTF16_NEED_MULTIPLE_UCHAR.
+ * @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead.
+ */
+#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
+
+/**
+ * How many code units are used to encode this code point (1 or 2)?
+ * Same as UTF16_CHAR_LENGTH.
+ * @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h.
+ */
+#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
+
+/**
+ * How many code units are used at most for any Unicode code point (2)?
+ * Same as UTF16_MAX_CHAR_LENGTH.
+ * @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h.
+ */
+#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
+
+/**
+ * Set c to the code point that contains the code unit i.
+ * i could point to the lead or the trail surrogate for the code point.
+ * i is not modified.
+ * Same as UTF16_GET_CHAR.
+ * \pre 0<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_GET, see utf_old.h.
+ */
+#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
+
+/**
+ * Set c to the code point that starts at code unit i
+ * and advance i to beyond the code units of this code point (post-increment).
+ * i must point to the first code unit of a code point.
+ * Otherwise c is set to the trail unit (surrogate) itself.
+ * Same as UTF16_NEXT_CHAR.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_NEXT, see utf_old.h.
+ */
+#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
+
+/**
+ * Append the code units of code point c to the string at index i
+ * and advance i to beyond the new code units (post-increment).
+ * The code units beginning at index i will be overwritten.
+ * Same as UTF16_APPEND_CHAR.
+ * \pre 0<=c<=0x10ffff
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h.
+ */
+#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
+
+/**
+ * Advance i to beyond the code units of the code point that begins at i.
+ * I.e., advance i by one code point.
+ * Same as UTF16_FWD_1.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h.
+ */
+#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
+
+/**
+ * Advance i to beyond the code units of the n code points where the first one begins at i.
+ * I.e., advance i by n code points.
+ * Same as UT16_FWD_N.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h.
+ */
+#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
+
+/**
+ * Take the random-access index i and adjust it so that it points to the beginning
+ * of a code point.
+ * The input index points to any code unit of a code point and is moved to point to
+ * the first code unit of the same code point. i is never incremented.
+ * In other words, if i points to a trail surrogate that is preceded by a matching
+ * lead surrogate, then i is decremented. Otherwise it is not modified.
+ * This can be used to start an iteration with UTF_NEXT_CHAR() from a random index.
+ * Same as UTF16_SET_CHAR_START.
+ * \pre start<=i<length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h.
+ */
+#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
+
+/**
+ * Set c to the code point that has code units before i
+ * and move i backward (towards the beginning of the string)
+ * to the first code unit of this code point (pre-increment).
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_PREV_CHAR.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_PREV, see utf_old.h.
+ */
+#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
+
+/**
+ * Move i backward (towards the beginning of the string)
+ * to the first code unit of the code point that has code units before i.
+ * I.e., move i backward by one code point.
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_BACK_1.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h.
+ */
+#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
+
+/**
+ * Move i backward (towards the beginning of the string)
+ * to the first code unit of the n code points that have code units before i.
+ * I.e., move i backward by n code points.
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_BACK_N.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h.
+ */
+#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
+
+/**
+ * Take the random-access index i and adjust it so that it points beyond
+ * a code point. The input index points beyond any code unit
+ * of a code point and is moved to point beyond the last code unit of the same
+ * code point. i is never decremented.
+ * In other words, if i points to a trail surrogate that is preceded by a matching
+ * lead surrogate, then i is incremented. Otherwise it is not modified.
+ * This can be used to start an iteration with UTF_PREV_CHAR() from a random index.
+ * Same as UTF16_SET_CHAR_LIMIT.
+ * \pre start<i<=length
+ * \post start<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h.
+ */
+#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+#endif
+

Deleted: MacRuby/trunk/icu-1060/unicode/utmscale.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/utmscale.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/utmscale.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,481 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 2004 - 2008, International Business Machines Corporation and
-* others. All Rights Reserved.
-*******************************************************************************
-*/
-
-#ifndef UTMSCALE_H
-#define UTMSCALE_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-/** 
- * \file
- * \brief C API: Universal Time Scale
- *
- * There are quite a few different conventions for binary datetime, depending on different
- * platforms and protocols. Some of these have severe drawbacks. For example, people using
- * Unix time (seconds since Jan 1, 1970) think that they are safe until near the year 2038.
- * But cases can and do arise where arithmetic manipulations causes serious problems. Consider
- * the computation of the average of two datetimes, for example: if one calculates them with
- * <code>averageTime = (time1 + time2)/2</code>, there will be overflow even with dates
- * around the present. Moreover, even if these problems don't occur, there is the issue of
- * conversion back and forth between different systems.
- *
- * <p>
- * Binary datetimes differ in a number of ways: the datatype, the unit,
- * and the epoch (origin). We'll refer to these as time scales. For example:
- *
- * <table border="1" cellspacing="0" cellpadding="4">
- *  <caption>Table 1: Binary Time Scales</caption>
- *  <tr>
- *    <th align="left">Source</th>
- *    <th align="left">Datatype</th>
- *    <th align="left">Unit</th>
- *    <th align="left">Epoch</th>
- *  </tr>
- *
- *  <tr>
- *    <td>UDTS_JAVA_TIME</td>
- *    <td>int64_t</td>
- *    <td>milliseconds</td>
- *    <td>Jan 1, 1970</td>
- *  </tr>
- *  <tr>
- *
- *    <td>UDTS_UNIX_TIME</td>
- *    <td>int32_t or int64_t</td>
- *    <td>seconds</td>
- *    <td>Jan 1, 1970</td>
- *  </tr>
- *  <tr>
- *    <td>UDTS_ICU4C_TIME</td>
- *
- *    <td>double</td>
- *    <td>milliseconds</td>
- *    <td>Jan 1, 1970</td>
- *  </tr>
- *  <tr>
- *    <td>UDTS_WINDOWS_FILE_TIME</td>
- *    <td>int64_t</td>
- *
- *    <td>ticks (100 nanoseconds)</td>
- *    <td>Jan 1, 1601</td>
- *  </tr>
- *  <tr>
- *    <td>UDTS_DOTNET_DATE_TIME</td>
- *    <td>int64_t</td>
- *    <td>ticks (100 nanoseconds)</td>
- *
- *    <td>Jan 1, 0001</td>
- *  </tr>
- *  <tr>
- *    <td>UDTS_MAC_OLD_TIME</td>
- *    <td>int32_t or int64_t</td>
- *    <td>seconds</td>
- *    <td>Jan 1, 1904</td>
- *
- *  </tr>
- *  <tr>
- *    <td>UDTS_MAC_TIME</td>
- *    <td>double</td>
- *    <td>seconds</td>
- *    <td>Jan 1, 2001</td>
- *  </tr>
- *
- *  <tr>
- *    <td>UDTS_EXCEL_TIME</td>
- *    <td>?</td>
- *    <td>days</td>
- *    <td>Dec 31, 1899</td>
- *  </tr>
- *  <tr>
- *
- *    <td>UDTS_DB2_TIME</td>
- *    <td>?</td>
- *    <td>days</td>
- *    <td>Dec 31, 1899</td>
- *  </tr>
- *
- *  <tr>
- *    <td>UDTS_UNIX_MICROSECONDS_TIME</td>
- *    <td>int64_t</td>
- *    <td>microseconds</td>
- *    <td>Jan 1, 1970</td>
- *  </tr>
- * </table>
- *
- * <p>
- * All of the epochs start at 00:00 am (the earliest possible time on the day in question),
- * and are assumed to be UTC.
- *
- * <p>
- * The ranges for different datatypes are given in the following table (all values in years).
- * The range of years includes the entire range expressible with positive and negative
- * values of the datatype. The range of years for double is the range that would be allowed
- * without losing precision to the corresponding unit.
- *
- * <table border="1" cellspacing="0" cellpadding="4">
- *  <tr>
- *    <th align="left">Units</th>
- *    <th align="left">int64_t</th>
- *    <th align="left">double</th>
- *    <th align="left">int32_t</th>
- *  </tr>
- *
- *  <tr>
- *    <td>1 sec</td>
- *    <td align="right">5.84542x10<sup>11</sup></td>
- *    <td align="right">285,420,920.94</td>
- *    <td align="right">136.10</td>
- *  </tr>
- *  <tr>
- *
- *    <td>1 millisecond</td>
- *    <td align="right">584,542,046.09</td>
- *    <td align="right">285,420.92</td>
- *    <td align="right">0.14</td>
- *  </tr>
- *  <tr>
- *    <td>1 microsecond</td>
- *
- *    <td align="right">584,542.05</td>
- *    <td align="right">285.42</td>
- *    <td align="right">0.00</td>
- *  </tr>
- *  <tr>
- *    <td>100 nanoseconds (tick)</td>
- *    <td align="right">58,454.20</td>
- *    <td align="right">28.54</td>
- *    <td align="right">0.00</td>
- *  </tr>
- *  <tr>
- *    <td>1 nanosecond</td>
- *    <td align="right">584.5420461</td>
- *    <td align="right">0.2854</td>
- *    <td align="right">0.00</td>
- *  </tr>
- * </table>
- *
- * <p>
- * These functions implement a universal time scale which can be used as a 'pivot',
- * and provide conversion functions to and from all other major time scales.
- * This datetimes to be converted to the pivot time, safely manipulated,
- * and converted back to any other datetime time scale.
- *
- *<p>
- * So what to use for this pivot? Java time has plenty of range, but cannot represent
- * .NET <code>System.DateTime</code> values without severe loss of precision. ICU4C time addresses this by using a
- * <code>double</code> that is otherwise equivalent to the Java time. However, there are disadvantages
- * with <code>doubles</code>. They provide for much more graceful degradation in arithmetic operations.
- * But they only have 53 bits of accuracy, which means that they will lose precision when
- * converting back and forth to ticks. What would really be nice would be a
- * <code>long double</code> (80 bits -- 64 bit mantissa), but that is not supported on most systems.
- *
- *<p>
- * The Unix extended time uses a structure with two components: time in seconds and a
- * fractional field (microseconds). However, this is clumsy, slow, and
- * prone to error (you always have to keep track of overflow and underflow in the
- * fractional field). <code>BigDecimal</code> would allow for arbitrary precision and arbitrary range,
- * but we do not want to use this as the normal type, because it is slow and does not
- * have a fixed size.
- *
- *<p>
- * Because of these issues, we ended up concluding that the .NET framework's
- * <code>System.DateTime</code> would be the best pivot. However, we use the full range
- * allowed by the datatype, allowing for datetimes back to 29,000 BC and up to 29,000 AD.
- * This time scale is very fine grained, does not lose precision, and covers a range that
- * will meet almost all requirements. It will not handle the range that Java times do,
- * but frankly, being able to handle dates before 29,000 BC or after 29,000 AD is of very limited interest.
- *
- */
-
-/**
- * <code>UDateTimeScale</code> values are used to specify the time scale used for
- * conversion into or out if the universal time scale.
- *
- * @stable ICU 3.2
- */
-typedef enum UDateTimeScale {
-    /**
-     * Used in the JDK. Data is a Java <code>long</code> (<code>int64_t</code>). Value
-     * is milliseconds since January 1, 1970.
-     *
-     * @stable ICU 3.2
-     */
-    UDTS_JAVA_TIME = 0,
-
-    /**
-     * Used on Unix systems. Data is <code>int32_t</code> or <code>int64_t</code>. Value
-     * is seconds since January 1, 1970.
-     *
-     * @stable ICU 3.2
-     */
-    UDTS_UNIX_TIME,
-    
-    /**
-     * Used in IUC4C. Data is a <code>double</code>. Value
-     * is milliseconds since January 1, 1970.
-     *
-     * @stable ICU 3.2
-     */
-    UDTS_ICU4C_TIME,
-    
-    /**
-     * Used in Windows for file times. Data is an <code>int64_t</code>. Value
-     * is ticks (1 tick == 100 nanoseconds) since January 1, 1601.
-     *
-     * @stable ICU 3.2
-     */
-    UDTS_WINDOWS_FILE_TIME,
-    
-    /**
-     * Used in the .NET framework's <code>System.DateTime</code> structure. Data is an <code>int64_t</code>. Value
-     * is ticks (1 tick == 100 nanoseconds) since January 1, 0001.
-     *
-     * @stable ICU 3.2
-     */
-    UDTS_DOTNET_DATE_TIME,
-    
-    /**
-     * Used in older Macintosh systems. Data is <code>int32_t</code> or <code>int64_t</code>. Value
-     * is seconds since January 1, 1904.
-     *
-     * @stable ICU 3.2
-     */
-    UDTS_MAC_OLD_TIME,
-    
-    /**
-     * Used in newer Macintosh systems. Data is a <code>double</code>. Value
-     * is seconds since January 1, 2001.
-     *
-     * @stable ICU 3.2
-     */
-    UDTS_MAC_TIME,
-    
-    /**
-     * Used in Excel. Data is an <code>?unknown?</code>. Value
-     * is days since December 31, 1899.
-     *
-     * @stable ICU 3.2
-     */
-    UDTS_EXCEL_TIME,
-    
-    /**
-     * Used in DB2. Data is an <code>?unknown?</code>. Value
-     * is days since December 31, 1899.
-     *
-     * @stable ICU 3.2
-     */
-    UDTS_DB2_TIME,
-
-    /**
-     * Data is a <code>long</code>. Value is microseconds since January 1, 1970.
-     * Similar to Unix time (linear value from 1970) and struct timeval
-     * (microseconds resolution).
-     *
-     * @stable ICU 4.0
-     */
-    UDTS_UNIX_MICROSECONDS_TIME,
-
-    /**
-     * The first unused time scale value. The limit of this enum
-     */
-    UDTS_MAX_SCALE
-} UDateTimeScale;
-
-/**
- * <code>UTimeScaleValue</code> values are used to specify the time scale values
- * to <code>utmscale_getTimeScaleValue</code>.
- *
- * @see utmscale_getTimeScaleValue
- *
- * @stable ICU 3.2
- */
-typedef enum UTimeScaleValue {
-    /**
-     * The constant used to select the units vale
-     * for a time scale.
-     * 
-     * @see utmscale_getTimeScaleValue
-     *
-     * @stable ICU 3.2
-     */
-    UTSV_UNITS_VALUE = 0,
-
-    /**
-     * The constant used to select the epoch offset value
-     * for a time scale.
-     * 
-     * @see utmscale_getTimeScaleValue
-     *
-     * @stable ICU 3.2
-     */
-    UTSV_EPOCH_OFFSET_VALUE=1,
-
-    /**
-     * The constant used to select the minimum from value
-     * for a time scale.
-     * 
-     * @see utmscale_getTimeScaleValue
-     *
-     * @stable ICU 3.2
-     */
-    UTSV_FROM_MIN_VALUE=2,
-
-    /**
-     * The constant used to select the maximum from value
-     * for a time scale.
-     * 
-     * @see utmscale_getTimeScaleValue
-     *
-     * @stable ICU 3.2
-     */
-    UTSV_FROM_MAX_VALUE=3,
-
-    /**
-     * The constant used to select the minimum to value
-     * for a time scale.
-     * 
-     * @see utmscale_getTimeScaleValue
-     *
-     * @stable ICU 3.2
-     */
-    UTSV_TO_MIN_VALUE=4,
-
-    /**
-     * The constant used to select the maximum to value
-     * for a time scale.
-     * 
-     * @see utmscale_getTimeScaleValue
-     *
-     * @stable ICU 3.2
-     */
-    UTSV_TO_MAX_VALUE=5,
-
-#ifndef U_HIDE_INTERNAL_API
-    /**
-     * The constant used to select the epoch plus one value
-     * for a time scale.
-     * 
-     * NOTE: This is an internal value. DO NOT USE IT. May not
-     * actually be equal to the epoch offset value plus one.
-     * 
-     * @see utmscale_getTimeScaleValue
-     *
-     * @internal ICU 3.2
-     */
-    UTSV_EPOCH_OFFSET_PLUS_1_VALUE=6,
-
-    /**
-     * The constant used to select the epoch plus one value
-     * for a time scale.
-     * 
-     * NOTE: This is an internal value. DO NOT USE IT. May not
-     * actually be equal to the epoch offset value plus one.
-     * 
-     * @see utmscale_getTimeScaleValue
-     *
-     * @internal ICU 3.2
-     */
-    UTSV_EPOCH_OFFSET_MINUS_1_VALUE=7,
-
-    /**
-     * The constant used to select the units round value
-     * for a time scale.
-     * 
-     * NOTE: This is an internal value. DO NOT USE IT.
-     * 
-     * @see utmscale_getTimeScaleValue
-     *
-     * @internal ICU 3.2
-     */
-    UTSV_UNITS_ROUND_VALUE=8,
-
-    /**
-     * The constant used to select the minimum safe rounding value
-     * for a time scale.
-     * 
-     * NOTE: This is an internal value. DO NOT USE IT.
-     * 
-     * @see utmscale_getTimeScaleValue
-     *
-     * @internal ICU 3.2
-     */
-    UTSV_MIN_ROUND_VALUE=9,
-
-    /**
-     * The constant used to select the maximum safe rounding value
-     * for a time scale.
-     * 
-     * NOTE: This is an internal value. DO NOT USE IT.
-     * 
-     * @see utmscale_getTimeScaleValue
-     *
-     * @internal ICU 3.2
-     */
-    UTSV_MAX_ROUND_VALUE=10,
-
-#endif /* U_HIDE_INTERNAL_API */
-
-    /**
-     * The number of time scale values, in other words limit of this enum.
-     * 
-     * @see utmscale_getTimeScaleValue
-     */
-    UTSV_MAX_SCALE_VALUE=11
-
-} UTimeScaleValue;
-
-/**
- * Get a value associated with a particular time scale.
- * 
- * @param timeScale The time scale
- * @param value A constant representing the value to get
- * @param status The status code. Set to <code>U_ILLEGAL_ARGUMENT_ERROR</code> if arguments are invalid.
- * @return - the value.
- * 
- * @stable ICU 3.2
- */
-U_STABLE int64_t U_EXPORT2
-    utmscale_getTimeScaleValue(UDateTimeScale timeScale, UTimeScaleValue value, UErrorCode *status);
-
-/* Conversion to 'universal time scale' */
-
-/**
- * Convert a <code>int64_t</code> datetime from the given time scale to the universal time scale.
- *
- * @param otherTime The <code>int64_t</code> datetime
- * @param timeScale The time scale to convert from
- * @param status The status code. Set to <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the conversion is out of range.
- * 
- * @return The datetime converted to the universal time scale
- *
- * @stable ICU 3.2
- */
-U_STABLE int64_t U_EXPORT2
-    utmscale_fromInt64(int64_t otherTime, UDateTimeScale timeScale, UErrorCode *status);
-
-/* Conversion from 'universal time scale' */
-
-/**
- * Convert a datetime from the universal time scale to a <code>int64_t</code> in the given time scale.
- *
- * @param universalTime The datetime in the universal time scale
- * @param timeScale The time scale to convert to
- * @param status The status code. Set to <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the conversion is out of range.
- * 
- * @return The datetime converted to the given time scale
- *
- * @stable ICU 3.2
- */
-U_STABLE int64_t U_EXPORT2
-    utmscale_toInt64(int64_t universalTime, UDateTimeScale timeScale, UErrorCode *status);
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif
-

Copied: MacRuby/trunk/icu-1060/unicode/utmscale.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/utmscale.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/utmscale.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/utmscale.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,481 @@
+/*
+*******************************************************************************
+* Copyright (C) 2004 - 2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*/
+
+#ifndef UTMSCALE_H
+#define UTMSCALE_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+/** 
+ * \file
+ * \brief C API: Universal Time Scale
+ *
+ * There are quite a few different conventions for binary datetime, depending on different
+ * platforms and protocols. Some of these have severe drawbacks. For example, people using
+ * Unix time (seconds since Jan 1, 1970) think that they are safe until near the year 2038.
+ * But cases can and do arise where arithmetic manipulations causes serious problems. Consider
+ * the computation of the average of two datetimes, for example: if one calculates them with
+ * <code>averageTime = (time1 + time2)/2</code>, there will be overflow even with dates
+ * around the present. Moreover, even if these problems don't occur, there is the issue of
+ * conversion back and forth between different systems.
+ *
+ * <p>
+ * Binary datetimes differ in a number of ways: the datatype, the unit,
+ * and the epoch (origin). We'll refer to these as time scales. For example:
+ *
+ * <table border="1" cellspacing="0" cellpadding="4">
+ *  <caption>Table 1: Binary Time Scales</caption>
+ *  <tr>
+ *    <th align="left">Source</th>
+ *    <th align="left">Datatype</th>
+ *    <th align="left">Unit</th>
+ *    <th align="left">Epoch</th>
+ *  </tr>
+ *
+ *  <tr>
+ *    <td>UDTS_JAVA_TIME</td>
+ *    <td>int64_t</td>
+ *    <td>milliseconds</td>
+ *    <td>Jan 1, 1970</td>
+ *  </tr>
+ *  <tr>
+ *
+ *    <td>UDTS_UNIX_TIME</td>
+ *    <td>int32_t or int64_t</td>
+ *    <td>seconds</td>
+ *    <td>Jan 1, 1970</td>
+ *  </tr>
+ *  <tr>
+ *    <td>UDTS_ICU4C_TIME</td>
+ *
+ *    <td>double</td>
+ *    <td>milliseconds</td>
+ *    <td>Jan 1, 1970</td>
+ *  </tr>
+ *  <tr>
+ *    <td>UDTS_WINDOWS_FILE_TIME</td>
+ *    <td>int64_t</td>
+ *
+ *    <td>ticks (100 nanoseconds)</td>
+ *    <td>Jan 1, 1601</td>
+ *  </tr>
+ *  <tr>
+ *    <td>UDTS_DOTNET_DATE_TIME</td>
+ *    <td>int64_t</td>
+ *    <td>ticks (100 nanoseconds)</td>
+ *
+ *    <td>Jan 1, 0001</td>
+ *  </tr>
+ *  <tr>
+ *    <td>UDTS_MAC_OLD_TIME</td>
+ *    <td>int32_t or int64_t</td>
+ *    <td>seconds</td>
+ *    <td>Jan 1, 1904</td>
+ *
+ *  </tr>
+ *  <tr>
+ *    <td>UDTS_MAC_TIME</td>
+ *    <td>double</td>
+ *    <td>seconds</td>
+ *    <td>Jan 1, 2001</td>
+ *  </tr>
+ *
+ *  <tr>
+ *    <td>UDTS_EXCEL_TIME</td>
+ *    <td>?</td>
+ *    <td>days</td>
+ *    <td>Dec 31, 1899</td>
+ *  </tr>
+ *  <tr>
+ *
+ *    <td>UDTS_DB2_TIME</td>
+ *    <td>?</td>
+ *    <td>days</td>
+ *    <td>Dec 31, 1899</td>
+ *  </tr>
+ *
+ *  <tr>
+ *    <td>UDTS_UNIX_MICROSECONDS_TIME</td>
+ *    <td>int64_t</td>
+ *    <td>microseconds</td>
+ *    <td>Jan 1, 1970</td>
+ *  </tr>
+ * </table>
+ *
+ * <p>
+ * All of the epochs start at 00:00 am (the earliest possible time on the day in question),
+ * and are assumed to be UTC.
+ *
+ * <p>
+ * The ranges for different datatypes are given in the following table (all values in years).
+ * The range of years includes the entire range expressible with positive and negative
+ * values of the datatype. The range of years for double is the range that would be allowed
+ * without losing precision to the corresponding unit.
+ *
+ * <table border="1" cellspacing="0" cellpadding="4">
+ *  <tr>
+ *    <th align="left">Units</th>
+ *    <th align="left">int64_t</th>
+ *    <th align="left">double</th>
+ *    <th align="left">int32_t</th>
+ *  </tr>
+ *
+ *  <tr>
+ *    <td>1 sec</td>
+ *    <td align="right">5.84542x10<sup>11</sup></td>
+ *    <td align="right">285,420,920.94</td>
+ *    <td align="right">136.10</td>
+ *  </tr>
+ *  <tr>
+ *
+ *    <td>1 millisecond</td>
+ *    <td align="right">584,542,046.09</td>
+ *    <td align="right">285,420.92</td>
+ *    <td align="right">0.14</td>
+ *  </tr>
+ *  <tr>
+ *    <td>1 microsecond</td>
+ *
+ *    <td align="right">584,542.05</td>
+ *    <td align="right">285.42</td>
+ *    <td align="right">0.00</td>
+ *  </tr>
+ *  <tr>
+ *    <td>100 nanoseconds (tick)</td>
+ *    <td align="right">58,454.20</td>
+ *    <td align="right">28.54</td>
+ *    <td align="right">0.00</td>
+ *  </tr>
+ *  <tr>
+ *    <td>1 nanosecond</td>
+ *    <td align="right">584.5420461</td>
+ *    <td align="right">0.2854</td>
+ *    <td align="right">0.00</td>
+ *  </tr>
+ * </table>
+ *
+ * <p>
+ * These functions implement a universal time scale which can be used as a 'pivot',
+ * and provide conversion functions to and from all other major time scales.
+ * This datetimes to be converted to the pivot time, safely manipulated,
+ * and converted back to any other datetime time scale.
+ *
+ *<p>
+ * So what to use for this pivot? Java time has plenty of range, but cannot represent
+ * .NET <code>System.DateTime</code> values without severe loss of precision. ICU4C time addresses this by using a
+ * <code>double</code> that is otherwise equivalent to the Java time. However, there are disadvantages
+ * with <code>doubles</code>. They provide for much more graceful degradation in arithmetic operations.
+ * But they only have 53 bits of accuracy, which means that they will lose precision when
+ * converting back and forth to ticks. What would really be nice would be a
+ * <code>long double</code> (80 bits -- 64 bit mantissa), but that is not supported on most systems.
+ *
+ *<p>
+ * The Unix extended time uses a structure with two components: time in seconds and a
+ * fractional field (microseconds). However, this is clumsy, slow, and
+ * prone to error (you always have to keep track of overflow and underflow in the
+ * fractional field). <code>BigDecimal</code> would allow for arbitrary precision and arbitrary range,
+ * but we do not want to use this as the normal type, because it is slow and does not
+ * have a fixed size.
+ *
+ *<p>
+ * Because of these issues, we ended up concluding that the .NET framework's
+ * <code>System.DateTime</code> would be the best pivot. However, we use the full range
+ * allowed by the datatype, allowing for datetimes back to 29,000 BC and up to 29,000 AD.
+ * This time scale is very fine grained, does not lose precision, and covers a range that
+ * will meet almost all requirements. It will not handle the range that Java times do,
+ * but frankly, being able to handle dates before 29,000 BC or after 29,000 AD is of very limited interest.
+ *
+ */
+
+/**
+ * <code>UDateTimeScale</code> values are used to specify the time scale used for
+ * conversion into or out if the universal time scale.
+ *
+ * @stable ICU 3.2
+ */
+typedef enum UDateTimeScale {
+    /**
+     * Used in the JDK. Data is a Java <code>long</code> (<code>int64_t</code>). Value
+     * is milliseconds since January 1, 1970.
+     *
+     * @stable ICU 3.2
+     */
+    UDTS_JAVA_TIME = 0,
+
+    /**
+     * Used on Unix systems. Data is <code>int32_t</code> or <code>int64_t</code>. Value
+     * is seconds since January 1, 1970.
+     *
+     * @stable ICU 3.2
+     */
+    UDTS_UNIX_TIME,
+    
+    /**
+     * Used in IUC4C. Data is a <code>double</code>. Value
+     * is milliseconds since January 1, 1970.
+     *
+     * @stable ICU 3.2
+     */
+    UDTS_ICU4C_TIME,
+    
+    /**
+     * Used in Windows for file times. Data is an <code>int64_t</code>. Value
+     * is ticks (1 tick == 100 nanoseconds) since January 1, 1601.
+     *
+     * @stable ICU 3.2
+     */
+    UDTS_WINDOWS_FILE_TIME,
+    
+    /**
+     * Used in the .NET framework's <code>System.DateTime</code> structure. Data is an <code>int64_t</code>. Value
+     * is ticks (1 tick == 100 nanoseconds) since January 1, 0001.
+     *
+     * @stable ICU 3.2
+     */
+    UDTS_DOTNET_DATE_TIME,
+    
+    /**
+     * Used in older Macintosh systems. Data is <code>int32_t</code> or <code>int64_t</code>. Value
+     * is seconds since January 1, 1904.
+     *
+     * @stable ICU 3.2
+     */
+    UDTS_MAC_OLD_TIME,
+    
+    /**
+     * Used in newer Macintosh systems. Data is a <code>double</code>. Value
+     * is seconds since January 1, 2001.
+     *
+     * @stable ICU 3.2
+     */
+    UDTS_MAC_TIME,
+    
+    /**
+     * Used in Excel. Data is an <code>?unknown?</code>. Value
+     * is days since December 31, 1899.
+     *
+     * @stable ICU 3.2
+     */
+    UDTS_EXCEL_TIME,
+    
+    /**
+     * Used in DB2. Data is an <code>?unknown?</code>. Value
+     * is days since December 31, 1899.
+     *
+     * @stable ICU 3.2
+     */
+    UDTS_DB2_TIME,
+
+    /**
+     * Data is a <code>long</code>. Value is microseconds since January 1, 1970.
+     * Similar to Unix time (linear value from 1970) and struct timeval
+     * (microseconds resolution).
+     *
+     * @stable ICU 4.0
+     */
+    UDTS_UNIX_MICROSECONDS_TIME,
+
+    /**
+     * The first unused time scale value. The limit of this enum
+     */
+    UDTS_MAX_SCALE
+} UDateTimeScale;
+
+/**
+ * <code>UTimeScaleValue</code> values are used to specify the time scale values
+ * to <code>utmscale_getTimeScaleValue</code>.
+ *
+ * @see utmscale_getTimeScaleValue
+ *
+ * @stable ICU 3.2
+ */
+typedef enum UTimeScaleValue {
+    /**
+     * The constant used to select the units vale
+     * for a time scale.
+     * 
+     * @see utmscale_getTimeScaleValue
+     *
+     * @stable ICU 3.2
+     */
+    UTSV_UNITS_VALUE = 0,
+
+    /**
+     * The constant used to select the epoch offset value
+     * for a time scale.
+     * 
+     * @see utmscale_getTimeScaleValue
+     *
+     * @stable ICU 3.2
+     */
+    UTSV_EPOCH_OFFSET_VALUE=1,
+
+    /**
+     * The constant used to select the minimum from value
+     * for a time scale.
+     * 
+     * @see utmscale_getTimeScaleValue
+     *
+     * @stable ICU 3.2
+     */
+    UTSV_FROM_MIN_VALUE=2,
+
+    /**
+     * The constant used to select the maximum from value
+     * for a time scale.
+     * 
+     * @see utmscale_getTimeScaleValue
+     *
+     * @stable ICU 3.2
+     */
+    UTSV_FROM_MAX_VALUE=3,
+
+    /**
+     * The constant used to select the minimum to value
+     * for a time scale.
+     * 
+     * @see utmscale_getTimeScaleValue
+     *
+     * @stable ICU 3.2
+     */
+    UTSV_TO_MIN_VALUE=4,
+
+    /**
+     * The constant used to select the maximum to value
+     * for a time scale.
+     * 
+     * @see utmscale_getTimeScaleValue
+     *
+     * @stable ICU 3.2
+     */
+    UTSV_TO_MAX_VALUE=5,
+
+#ifndef U_HIDE_INTERNAL_API
+    /**
+     * The constant used to select the epoch plus one value
+     * for a time scale.
+     * 
+     * NOTE: This is an internal value. DO NOT USE IT. May not
+     * actually be equal to the epoch offset value plus one.
+     * 
+     * @see utmscale_getTimeScaleValue
+     *
+     * @internal ICU 3.2
+     */
+    UTSV_EPOCH_OFFSET_PLUS_1_VALUE=6,
+
+    /**
+     * The constant used to select the epoch plus one value
+     * for a time scale.
+     * 
+     * NOTE: This is an internal value. DO NOT USE IT. May not
+     * actually be equal to the epoch offset value plus one.
+     * 
+     * @see utmscale_getTimeScaleValue
+     *
+     * @internal ICU 3.2
+     */
+    UTSV_EPOCH_OFFSET_MINUS_1_VALUE=7,
+
+    /**
+     * The constant used to select the units round value
+     * for a time scale.
+     * 
+     * NOTE: This is an internal value. DO NOT USE IT.
+     * 
+     * @see utmscale_getTimeScaleValue
+     *
+     * @internal ICU 3.2
+     */
+    UTSV_UNITS_ROUND_VALUE=8,
+
+    /**
+     * The constant used to select the minimum safe rounding value
+     * for a time scale.
+     * 
+     * NOTE: This is an internal value. DO NOT USE IT.
+     * 
+     * @see utmscale_getTimeScaleValue
+     *
+     * @internal ICU 3.2
+     */
+    UTSV_MIN_ROUND_VALUE=9,
+
+    /**
+     * The constant used to select the maximum safe rounding value
+     * for a time scale.
+     * 
+     * NOTE: This is an internal value. DO NOT USE IT.
+     * 
+     * @see utmscale_getTimeScaleValue
+     *
+     * @internal ICU 3.2
+     */
+    UTSV_MAX_ROUND_VALUE=10,
+
+#endif /* U_HIDE_INTERNAL_API */
+
+    /**
+     * The number of time scale values, in other words limit of this enum.
+     * 
+     * @see utmscale_getTimeScaleValue
+     */
+    UTSV_MAX_SCALE_VALUE=11
+
+} UTimeScaleValue;
+
+/**
+ * Get a value associated with a particular time scale.
+ * 
+ * @param timeScale The time scale
+ * @param value A constant representing the value to get
+ * @param status The status code. Set to <code>U_ILLEGAL_ARGUMENT_ERROR</code> if arguments are invalid.
+ * @return - the value.
+ * 
+ * @stable ICU 3.2
+ */
+U_STABLE int64_t U_EXPORT2
+    utmscale_getTimeScaleValue(UDateTimeScale timeScale, UTimeScaleValue value, UErrorCode *status);
+
+/* Conversion to 'universal time scale' */
+
+/**
+ * Convert a <code>int64_t</code> datetime from the given time scale to the universal time scale.
+ *
+ * @param otherTime The <code>int64_t</code> datetime
+ * @param timeScale The time scale to convert from
+ * @param status The status code. Set to <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the conversion is out of range.
+ * 
+ * @return The datetime converted to the universal time scale
+ *
+ * @stable ICU 3.2
+ */
+U_STABLE int64_t U_EXPORT2
+    utmscale_fromInt64(int64_t otherTime, UDateTimeScale timeScale, UErrorCode *status);
+
+/* Conversion from 'universal time scale' */
+
+/**
+ * Convert a datetime from the universal time scale to a <code>int64_t</code> in the given time scale.
+ *
+ * @param universalTime The datetime in the universal time scale
+ * @param timeScale The time scale to convert to
+ * @param status The status code. Set to <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the conversion is out of range.
+ * 
+ * @return The datetime converted to the given time scale
+ *
+ * @stable ICU 3.2
+ */
+U_STABLE int64_t U_EXPORT2
+    utmscale_toInt64(int64_t universalTime, UDateTimeScale timeScale, UErrorCode *status);
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
+

Deleted: MacRuby/trunk/icu-1060/unicode/utrace.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/utrace.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/utrace.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,358 +0,0 @@
-/*
-*******************************************************************************
-*
-*   Copyright (C) 2003-2006, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  utrace.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2003aug06
-*   created by: Markus W. Scherer
-*
-*   Definitions for ICU tracing/logging.
-*
-*/
-
-#ifndef __UTRACE_H__
-#define __UTRACE_H__
-
-#include <stdarg.h>
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief C API:  Definitions for ICU tracing/logging. 
- *
- * This provides API for debugging the internals of ICU without the use of
- * a traditional debugger.
- *
- * By default, tracing is disabled in ICU. If you need to debug ICU with 
- * tracing, please compile ICU with the --enable-tracing configure option.
- */
- 
-U_CDECL_BEGIN
-
-/**
- * Trace severity levels.  Higher levels increase the verbosity of the trace output.
- * @see utrace_setLevel
- * @stable ICU 2.8
- */
-typedef enum UTraceLevel {
-    /** Disable all tracing  @stable ICU 2.8*/
-    UTRACE_OFF=-1,
-    /** Trace error conditions only  @stable ICU 2.8*/
-    UTRACE_ERROR=0,
-    /** Trace errors and warnings  @stable ICU 2.8*/
-    UTRACE_WARNING=3,
-    /** Trace opens and closes of ICU services  @stable ICU 2.8*/
-    UTRACE_OPEN_CLOSE=5,
-    /** Trace an intermediate number of ICU operations  @stable ICU 2.8*/
-    UTRACE_INFO=7,
-    /** Trace the maximum number of ICU operations  @stable ICU 2.8*/
-    UTRACE_VERBOSE=9
-} UTraceLevel;
-
-/**
- *  These are the ICU functions that will be traced when tracing is enabled.
- *  @stable ICU 2.8
- */
-typedef enum UTraceFunctionNumber {
-    UTRACE_FUNCTION_START=0,
-    UTRACE_U_INIT=UTRACE_FUNCTION_START,
-    UTRACE_U_CLEANUP,
-    UTRACE_FUNCTION_LIMIT,
-
-    UTRACE_CONVERSION_START=0x1000,
-    UTRACE_UCNV_OPEN=UTRACE_CONVERSION_START,
-    UTRACE_UCNV_OPEN_PACKAGE,
-    UTRACE_UCNV_OPEN_ALGORITHMIC,
-    UTRACE_UCNV_CLONE,
-    UTRACE_UCNV_CLOSE,
-    UTRACE_UCNV_FLUSH_CACHE,
-    UTRACE_UCNV_LOAD,
-    UTRACE_UCNV_UNLOAD,
-    UTRACE_CONVERSION_LIMIT,
-
-    UTRACE_COLLATION_START=0x2000,
-    UTRACE_UCOL_OPEN=UTRACE_COLLATION_START,
-    UTRACE_UCOL_CLOSE,
-    UTRACE_UCOL_STRCOLL,
-    UTRACE_UCOL_GET_SORTKEY,
-    UTRACE_UCOL_GETLOCALE,
-    UTRACE_UCOL_NEXTSORTKEYPART,
-    UTRACE_UCOL_STRCOLLITER,
-    UTRACE_UCOL_OPEN_FROM_SHORT_STRING,
-    UTRACE_COLLATION_LIMIT
-} UTraceFunctionNumber;
-
-/**
- * Setter for the trace level.
- * @param traceLevel A UTraceLevel value.
- * @stable ICU 2.8
- */
-U_STABLE void U_EXPORT2
-utrace_setLevel(int32_t traceLevel);
-
-/**
- * Getter for the trace level.
- * @return The UTraceLevel value being used by ICU.
- * @stable ICU 2.8
- */
-U_STABLE int32_t U_EXPORT2
-utrace_getLevel(void);
-
-/* Trace function pointers types  ----------------------------- */
-
-/**
-  *  Type signature for the trace function to be called when entering a function.
-  *  @param context value supplied at the time the trace functions are set.
-  *  @param fnNumber Enum value indicating the ICU function being entered.
-  *  @stable ICU 2.8
-  */
-typedef void U_CALLCONV
-UTraceEntry(const void *context, int32_t fnNumber);
-
-/**
-  *  Type signature for the trace function to be called when exiting from a function.
-  *  @param context value supplied at the time the trace functions are set.
-  *  @param fnNumber Enum value indicating the ICU function being exited.
-  *  @param fmt     A formatting string that describes the number and types
-  *                 of arguments included with the variable args.  The fmt
-  *                 string has the same form as the utrace_vformat format
-  *                 string.
-  *  @param args    A variable arguments list.  Contents are described by
-  *                 the fmt parameter.
-  *  @see   utrace_vformat
-  *  @stable ICU 2.8
-  */
-typedef void U_CALLCONV
-UTraceExit(const void *context, int32_t fnNumber, 
-           const char *fmt, va_list args);
-
-/**
-  *  Type signature for the trace function to be called from within an ICU function
-  *  to display data or messages.
-  *  @param context  value supplied at the time the trace functions are set.
-  *  @param fnNumber Enum value indicating the ICU function being exited.
-  *  @param level    The current tracing level
-  *  @param fmt      A format string describing the tracing data that is supplied
-  *                  as variable args
-  *  @param args     The data being traced, passed as variable args.
-  *  @stable ICU 2.8
-  */
-typedef void U_CALLCONV
-UTraceData(const void *context, int32_t fnNumber, int32_t level,
-           const char *fmt, va_list args);
-
-/**
-  *  Set ICU Tracing functions.  Installs application-provided tracing
-  *  functions into ICU.  After doing this, subsequent ICU operations
-  *  will call back to the installed functions, providing a trace
-  *  of the use of ICU.  Passing a NULL pointer for a tracing function
-  *  is allowed, and inhibits tracing action at points where that function
-  *  would be called.
-  *  <p>
-  *  Tracing and Threads:  Tracing functions are global to a process, and
-  *  will be called in response to ICU operations performed by any
-  *  thread.  If tracing of an individual thread is desired, the
-  *  tracing functions must themselves filter by checking that the
-  *  current thread is the desired thread.
-  *
-  *  @param context an uninterpretted pointer.  Whatever is passed in
-  *                 here will in turn be passed to each of the tracing
-  *                 functions UTraceEntry, UTraceExit and UTraceData.
-  *                 ICU does not use or alter this pointer.
-  *  @param e       Callback function to be called on entry to a 
-  *                 a traced ICU function.
-  *  @param x       Callback function to be called on exit from a
-  *                 traced ICU function.
-  *  @param d       Callback function to be called from within a 
-  *                 traced ICU function, for the purpose of providing
-  *                 data to the trace.
-  *
-  *  @stable ICU 2.8
-  */
-U_STABLE void U_EXPORT2
-utrace_setFunctions(const void *context,
-                    UTraceEntry *e, UTraceExit *x, UTraceData *d);
-
-/**
-  * Get the currently installed ICU tracing functions.   Note that a null function
-  *   pointer will be returned if no trace function has been set.
-  *
-  * @param context  The currently installed tracing context.
-  * @param e        The currently installed UTraceEntry function.
-  * @param x        The currently installed UTraceExit function.
-  * @param d        The currently installed UTraceData function.
-  * @stable ICU 2.8
-  */
-U_STABLE void U_EXPORT2
-utrace_getFunctions(const void **context,
-                    UTraceEntry **e, UTraceExit **x, UTraceData **d);
-
-
-
-/*
- *
- * ICU trace format string syntax
- *
- * Format Strings are passed to UTraceData functions, and define the
- * number and types of the trace data being passed on each call.
- *
- * The UTraceData function, which is supplied by the application,
- * not by ICU, can either forward the trace data (passed via
- * varargs) and the format string back to ICU for formatting into
- * a displayable string, or it can interpret the format itself,
- * and do as it wishes with the trace data.
- *
- *
- * Goals for the format string
- * - basic data output
- * - easy to use for trace programmer
- * - sufficient provision for data types for trace output readability
- * - well-defined types and binary portable APIs
- *
- * Non-goals
- * - printf compatibility
- * - fancy formatting
- * - argument reordering and other internationalization features
- *
- * ICU trace format strings contain plain text with argument inserts,
- * much like standard printf format strings.
- * Each insert begins with a '%', then optionally contains a 'v',
- * then exactly one type character.
- * Two '%' in a row represent a '%' instead of an insert.
- * The trace format strings need not have \n at the end.
- *
- *
- * Types
- * -----
- *
- * Type characters:
- * - c A char character in the default codepage.
- * - s A NUL-terminated char * string in the default codepage.
- * - S A UChar * string.  Requires two params, (ptr, length).  Length=-1 for nul term.
- * - b A byte (8-bit integer).
- * - h A 16-bit integer.  Also a 16 bit Unicode code unit.
- * - d A 32-bit integer.  Also a 20 bit Unicode code point value. 
- * - l A 64-bit integer.
- * - p A data pointer.
- *
- * Vectors
- * -------
- *
- * If the 'v' is not specified, then one item of the specified type
- * is passed in.
- * If the 'v' (for "vector") is specified, then a vector of items of the
- * specified type is passed in, via a pointer to the first item
- * and an int32_t value for the length of the vector.
- * Length==-1 means zero or NUL termination.  Works for vectors of all types.
- *
- * Note:  %vS is a vector of (UChar *) strings.  The strings must
- *        be nul terminated as there is no way to provide a
- *        separate length parameter for each string.  The length
- *        parameter (required for all vectors) is the number of
- *        strings, not the length of the strings.
- *
- * Examples
- * --------
- *
- * These examples show the parameters that will be passed to an application's
- *   UTraceData() function for various formats.
- *
- * - the precise formatting is up to the application!
- * - the examples use type casts for arguments only to _show_ the types of
- *   arguments without needing variable declarations in the examples;
- *   the type casts will not be necessary in actual code
- *
- * UTraceDataFunc(context, fnNumber, level,
- *              "There is a character %c in the string %s.",   // Format String 
- *              (char)c, (const char *)s);                     // varargs parameters
- * ->   There is a character 0x42 'B' in the string "Bravo".
- *
- * UTraceDataFunc(context, fnNumber, level,
- *              "Vector of bytes %vb vector of chars %vc",
- *              (const uint8_t *)bytes, (int32_t)bytesLength,
- *              (const char *)chars, (int32_t)charsLength);
- * ->  Vector of bytes
- *      42 63 64 3f [4]
- *     vector of chars
- *      "Bcd?"[4]
- *
- * UTraceDataFunc(context, fnNumber, level,
- *              "An int32_t %d and a whole bunch of them %vd",
- *              (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength);
- * ->   An int32_t 0xfffffffb and a whole bunch of them
- *      fffffffb 00000005 0000010a [3]
- *
- */
-
-
-
-/**
-  *  Trace output Formatter.  An application's UTraceData tracing functions may call
-  *                 back to this function to format the trace output in a
-  *                 human readable form.  Note that a UTraceData function may choose
-  *                 to not format the data;  it could, for example, save it in
-  *                 in the raw form it was received (more compact), leaving
-  *                 formatting for a later trace analyis tool.
-  *  @param outBuf  pointer to a buffer to receive the formatted output.  Output
-  *                 will be nul terminated if there is space in the buffer -
-  *                 if the length of the requested output < the output buffer size.
-  *  @param capacity  Length of the output buffer.
-  *  @param indent  Number of spaces to indent the output.  Intended to allow
-  *                 data displayed from nested functions to be indented for readability.
-  *  @param fmt     Format specification for the data to output
-  *  @param args    Data to be formatted.
-  *  @return        Length of formatted output, including the terminating NUL.
-  *                 If buffer capacity is insufficient, the required capacity is returned. 
-  *  @stable ICU 2.8
-  */
-U_STABLE int32_t U_EXPORT2
-utrace_vformat(char *outBuf, int32_t capacity,
-              int32_t indent, const char *fmt,  va_list args);
-
-/**
-  *  Trace output Formatter.  An application's UTraceData tracing functions may call
-  *                 this function to format any additional trace data, beyond that
-  *                 provided by default, in human readable form with the same
-  *                 formatting conventions used by utrace_vformat().
-  *  @param outBuf  pointer to a buffer to receive the formatted output.  Output
-  *                 will be nul terminated if there is space in the buffer -
-  *                 if the length of the requested output < the output buffer size.
-  *  @param capacity  Length of the output buffer.
-  *  @param indent  Number of spaces to indent the output.  Intended to allow
-  *                 data displayed from nested functions to be indented for readability.
-  *  @param fmt     Format specification for the data to output
-  *  @param ...     Data to be formatted.
-  *  @return        Length of formatted output, including the terminating NUL.
-  *                 If buffer capacity is insufficient, the required capacity is returned. 
-  *  @stable ICU 2.8
-  */
-U_STABLE int32_t U_EXPORT2
-utrace_format(char *outBuf, int32_t capacity,
-              int32_t indent, const char *fmt,  ...);
-
-
-
-/* Trace function numbers --------------------------------------------------- */
-
-/**
- * Get the name of a function from its trace function number.
- *
- * @param fnNumber The trace number for an ICU function.
- * @return The name string for the function.
- *
- * @see UTraceFunctionNumber
- * @stable ICU 2.8
- */
-U_STABLE const char * U_EXPORT2
-utrace_functionName(int32_t fnNumber);
-
-U_CDECL_END
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/utrace.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/utrace.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/utrace.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/utrace.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,358 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2003-2006, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  utrace.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003aug06
+*   created by: Markus W. Scherer
+*
+*   Definitions for ICU tracing/logging.
+*
+*/
+
+#ifndef __UTRACE_H__
+#define __UTRACE_H__
+
+#include <stdarg.h>
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API:  Definitions for ICU tracing/logging. 
+ *
+ * This provides API for debugging the internals of ICU without the use of
+ * a traditional debugger.
+ *
+ * By default, tracing is disabled in ICU. If you need to debug ICU with 
+ * tracing, please compile ICU with the --enable-tracing configure option.
+ */
+ 
+U_CDECL_BEGIN
+
+/**
+ * Trace severity levels.  Higher levels increase the verbosity of the trace output.
+ * @see utrace_setLevel
+ * @stable ICU 2.8
+ */
+typedef enum UTraceLevel {
+    /** Disable all tracing  @stable ICU 2.8*/
+    UTRACE_OFF=-1,
+    /** Trace error conditions only  @stable ICU 2.8*/
+    UTRACE_ERROR=0,
+    /** Trace errors and warnings  @stable ICU 2.8*/
+    UTRACE_WARNING=3,
+    /** Trace opens and closes of ICU services  @stable ICU 2.8*/
+    UTRACE_OPEN_CLOSE=5,
+    /** Trace an intermediate number of ICU operations  @stable ICU 2.8*/
+    UTRACE_INFO=7,
+    /** Trace the maximum number of ICU operations  @stable ICU 2.8*/
+    UTRACE_VERBOSE=9
+} UTraceLevel;
+
+/**
+ *  These are the ICU functions that will be traced when tracing is enabled.
+ *  @stable ICU 2.8
+ */
+typedef enum UTraceFunctionNumber {
+    UTRACE_FUNCTION_START=0,
+    UTRACE_U_INIT=UTRACE_FUNCTION_START,
+    UTRACE_U_CLEANUP,
+    UTRACE_FUNCTION_LIMIT,
+
+    UTRACE_CONVERSION_START=0x1000,
+    UTRACE_UCNV_OPEN=UTRACE_CONVERSION_START,
+    UTRACE_UCNV_OPEN_PACKAGE,
+    UTRACE_UCNV_OPEN_ALGORITHMIC,
+    UTRACE_UCNV_CLONE,
+    UTRACE_UCNV_CLOSE,
+    UTRACE_UCNV_FLUSH_CACHE,
+    UTRACE_UCNV_LOAD,
+    UTRACE_UCNV_UNLOAD,
+    UTRACE_CONVERSION_LIMIT,
+
+    UTRACE_COLLATION_START=0x2000,
+    UTRACE_UCOL_OPEN=UTRACE_COLLATION_START,
+    UTRACE_UCOL_CLOSE,
+    UTRACE_UCOL_STRCOLL,
+    UTRACE_UCOL_GET_SORTKEY,
+    UTRACE_UCOL_GETLOCALE,
+    UTRACE_UCOL_NEXTSORTKEYPART,
+    UTRACE_UCOL_STRCOLLITER,
+    UTRACE_UCOL_OPEN_FROM_SHORT_STRING,
+    UTRACE_COLLATION_LIMIT
+} UTraceFunctionNumber;
+
+/**
+ * Setter for the trace level.
+ * @param traceLevel A UTraceLevel value.
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+utrace_setLevel(int32_t traceLevel);
+
+/**
+ * Getter for the trace level.
+ * @return The UTraceLevel value being used by ICU.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+utrace_getLevel(void);
+
+/* Trace function pointers types  ----------------------------- */
+
+/**
+  *  Type signature for the trace function to be called when entering a function.
+  *  @param context value supplied at the time the trace functions are set.
+  *  @param fnNumber Enum value indicating the ICU function being entered.
+  *  @stable ICU 2.8
+  */
+typedef void U_CALLCONV
+UTraceEntry(const void *context, int32_t fnNumber);
+
+/**
+  *  Type signature for the trace function to be called when exiting from a function.
+  *  @param context value supplied at the time the trace functions are set.
+  *  @param fnNumber Enum value indicating the ICU function being exited.
+  *  @param fmt     A formatting string that describes the number and types
+  *                 of arguments included with the variable args.  The fmt
+  *                 string has the same form as the utrace_vformat format
+  *                 string.
+  *  @param args    A variable arguments list.  Contents are described by
+  *                 the fmt parameter.
+  *  @see   utrace_vformat
+  *  @stable ICU 2.8
+  */
+typedef void U_CALLCONV
+UTraceExit(const void *context, int32_t fnNumber, 
+           const char *fmt, va_list args);
+
+/**
+  *  Type signature for the trace function to be called from within an ICU function
+  *  to display data or messages.
+  *  @param context  value supplied at the time the trace functions are set.
+  *  @param fnNumber Enum value indicating the ICU function being exited.
+  *  @param level    The current tracing level
+  *  @param fmt      A format string describing the tracing data that is supplied
+  *                  as variable args
+  *  @param args     The data being traced, passed as variable args.
+  *  @stable ICU 2.8
+  */
+typedef void U_CALLCONV
+UTraceData(const void *context, int32_t fnNumber, int32_t level,
+           const char *fmt, va_list args);
+
+/**
+  *  Set ICU Tracing functions.  Installs application-provided tracing
+  *  functions into ICU.  After doing this, subsequent ICU operations
+  *  will call back to the installed functions, providing a trace
+  *  of the use of ICU.  Passing a NULL pointer for a tracing function
+  *  is allowed, and inhibits tracing action at points where that function
+  *  would be called.
+  *  <p>
+  *  Tracing and Threads:  Tracing functions are global to a process, and
+  *  will be called in response to ICU operations performed by any
+  *  thread.  If tracing of an individual thread is desired, the
+  *  tracing functions must themselves filter by checking that the
+  *  current thread is the desired thread.
+  *
+  *  @param context an uninterpretted pointer.  Whatever is passed in
+  *                 here will in turn be passed to each of the tracing
+  *                 functions UTraceEntry, UTraceExit and UTraceData.
+  *                 ICU does not use or alter this pointer.
+  *  @param e       Callback function to be called on entry to a 
+  *                 a traced ICU function.
+  *  @param x       Callback function to be called on exit from a
+  *                 traced ICU function.
+  *  @param d       Callback function to be called from within a 
+  *                 traced ICU function, for the purpose of providing
+  *                 data to the trace.
+  *
+  *  @stable ICU 2.8
+  */
+U_STABLE void U_EXPORT2
+utrace_setFunctions(const void *context,
+                    UTraceEntry *e, UTraceExit *x, UTraceData *d);
+
+/**
+  * Get the currently installed ICU tracing functions.   Note that a null function
+  *   pointer will be returned if no trace function has been set.
+  *
+  * @param context  The currently installed tracing context.
+  * @param e        The currently installed UTraceEntry function.
+  * @param x        The currently installed UTraceExit function.
+  * @param d        The currently installed UTraceData function.
+  * @stable ICU 2.8
+  */
+U_STABLE void U_EXPORT2
+utrace_getFunctions(const void **context,
+                    UTraceEntry **e, UTraceExit **x, UTraceData **d);
+
+
+
+/*
+ *
+ * ICU trace format string syntax
+ *
+ * Format Strings are passed to UTraceData functions, and define the
+ * number and types of the trace data being passed on each call.
+ *
+ * The UTraceData function, which is supplied by the application,
+ * not by ICU, can either forward the trace data (passed via
+ * varargs) and the format string back to ICU for formatting into
+ * a displayable string, or it can interpret the format itself,
+ * and do as it wishes with the trace data.
+ *
+ *
+ * Goals for the format string
+ * - basic data output
+ * - easy to use for trace programmer
+ * - sufficient provision for data types for trace output readability
+ * - well-defined types and binary portable APIs
+ *
+ * Non-goals
+ * - printf compatibility
+ * - fancy formatting
+ * - argument reordering and other internationalization features
+ *
+ * ICU trace format strings contain plain text with argument inserts,
+ * much like standard printf format strings.
+ * Each insert begins with a '%', then optionally contains a 'v',
+ * then exactly one type character.
+ * Two '%' in a row represent a '%' instead of an insert.
+ * The trace format strings need not have \n at the end.
+ *
+ *
+ * Types
+ * -----
+ *
+ * Type characters:
+ * - c A char character in the default codepage.
+ * - s A NUL-terminated char * string in the default codepage.
+ * - S A UChar * string.  Requires two params, (ptr, length).  Length=-1 for nul term.
+ * - b A byte (8-bit integer).
+ * - h A 16-bit integer.  Also a 16 bit Unicode code unit.
+ * - d A 32-bit integer.  Also a 20 bit Unicode code point value. 
+ * - l A 64-bit integer.
+ * - p A data pointer.
+ *
+ * Vectors
+ * -------
+ *
+ * If the 'v' is not specified, then one item of the specified type
+ * is passed in.
+ * If the 'v' (for "vector") is specified, then a vector of items of the
+ * specified type is passed in, via a pointer to the first item
+ * and an int32_t value for the length of the vector.
+ * Length==-1 means zero or NUL termination.  Works for vectors of all types.
+ *
+ * Note:  %vS is a vector of (UChar *) strings.  The strings must
+ *        be nul terminated as there is no way to provide a
+ *        separate length parameter for each string.  The length
+ *        parameter (required for all vectors) is the number of
+ *        strings, not the length of the strings.
+ *
+ * Examples
+ * --------
+ *
+ * These examples show the parameters that will be passed to an application's
+ *   UTraceData() function for various formats.
+ *
+ * - the precise formatting is up to the application!
+ * - the examples use type casts for arguments only to _show_ the types of
+ *   arguments without needing variable declarations in the examples;
+ *   the type casts will not be necessary in actual code
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ *              "There is a character %c in the string %s.",   // Format String 
+ *              (char)c, (const char *)s);                     // varargs parameters
+ * ->   There is a character 0x42 'B' in the string "Bravo".
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ *              "Vector of bytes %vb vector of chars %vc",
+ *              (const uint8_t *)bytes, (int32_t)bytesLength,
+ *              (const char *)chars, (int32_t)charsLength);
+ * ->  Vector of bytes
+ *      42 63 64 3f [4]
+ *     vector of chars
+ *      "Bcd?"[4]
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ *              "An int32_t %d and a whole bunch of them %vd",
+ *              (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength);
+ * ->   An int32_t 0xfffffffb and a whole bunch of them
+ *      fffffffb 00000005 0000010a [3]
+ *
+ */
+
+
+
+/**
+  *  Trace output Formatter.  An application's UTraceData tracing functions may call
+  *                 back to this function to format the trace output in a
+  *                 human readable form.  Note that a UTraceData function may choose
+  *                 to not format the data;  it could, for example, save it in
+  *                 in the raw form it was received (more compact), leaving
+  *                 formatting for a later trace analyis tool.
+  *  @param outBuf  pointer to a buffer to receive the formatted output.  Output
+  *                 will be nul terminated if there is space in the buffer -
+  *                 if the length of the requested output < the output buffer size.
+  *  @param capacity  Length of the output buffer.
+  *  @param indent  Number of spaces to indent the output.  Intended to allow
+  *                 data displayed from nested functions to be indented for readability.
+  *  @param fmt     Format specification for the data to output
+  *  @param args    Data to be formatted.
+  *  @return        Length of formatted output, including the terminating NUL.
+  *                 If buffer capacity is insufficient, the required capacity is returned. 
+  *  @stable ICU 2.8
+  */
+U_STABLE int32_t U_EXPORT2
+utrace_vformat(char *outBuf, int32_t capacity,
+              int32_t indent, const char *fmt,  va_list args);
+
+/**
+  *  Trace output Formatter.  An application's UTraceData tracing functions may call
+  *                 this function to format any additional trace data, beyond that
+  *                 provided by default, in human readable form with the same
+  *                 formatting conventions used by utrace_vformat().
+  *  @param outBuf  pointer to a buffer to receive the formatted output.  Output
+  *                 will be nul terminated if there is space in the buffer -
+  *                 if the length of the requested output < the output buffer size.
+  *  @param capacity  Length of the output buffer.
+  *  @param indent  Number of spaces to indent the output.  Intended to allow
+  *                 data displayed from nested functions to be indented for readability.
+  *  @param fmt     Format specification for the data to output
+  *  @param ...     Data to be formatted.
+  *  @return        Length of formatted output, including the terminating NUL.
+  *                 If buffer capacity is insufficient, the required capacity is returned. 
+  *  @stable ICU 2.8
+  */
+U_STABLE int32_t U_EXPORT2
+utrace_format(char *outBuf, int32_t capacity,
+              int32_t indent, const char *fmt,  ...);
+
+
+
+/* Trace function numbers --------------------------------------------------- */
+
+/**
+ * Get the name of a function from its trace function number.
+ *
+ * @param fnNumber The trace number for an ICU function.
+ * @return The name string for the function.
+ *
+ * @see UTraceFunctionNumber
+ * @stable ICU 2.8
+ */
+U_STABLE const char * U_EXPORT2
+utrace_functionName(int32_t fnNumber);
+
+U_CDECL_END
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/utrans.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/utrans.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/utrans.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,583 +0,0 @@
-/*
-*******************************************************************************
-*   Copyright (C) 1997-2005, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*******************************************************************************
-*   Date        Name        Description
-*   06/21/00    aliu        Creation.
-*******************************************************************************
-*/
-
-#ifndef UTRANS_H
-#define UTRANS_H
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_TRANSLITERATION
-
-#include "unicode/urep.h"
-#include "unicode/parseerr.h"
-#include "unicode/uenum.h"
-
-/********************************************************************
- * General Notes
- ********************************************************************
- */
-/**
- * \file
- * \brief C API: Transliterator
- *
- * <h2> Transliteration </h2>
- * The data structures and functions described in this header provide
- * transliteration services.  Transliteration services are implemented
- * as C++ classes.  The comments and documentation in this header
- * assume the reader is familiar with the C++ headers translit.h and
- * associated documentation.
- *
- * A significant but incomplete subset of the C++ transliteration
- * services are available to C code through this header.  In order to
- * access more complex transliteration services, refer to the C++
- * headers and documentation.
- *
- * There are two sets of functions for working with transliterator IDs:
- *
- * An old, deprecated set uses char * IDs, which works for true and pure
- * identifiers that these APIs were designed for,
- * for example "Cyrillic-Latin".
- * It does not work when the ID contains filters ("[:Script=Cyrl:]")
- * or even a complete set of rules because then the ID string contains more
- * than just "invariant" characters (see utypes.h).
- *
- * A new set of functions replaces the old ones and uses UChar * IDs,
- * paralleling the UnicodeString IDs in the C++ API. (New in ICU 2.8.)
- */
-
-/********************************************************************
- * Data Structures
- ********************************************************************/
-
-/**
- * An opaque transliterator for use in C.  Open with utrans_openxxx()
- * and close with utrans_close() when done.  Equivalent to the C++ class
- * Transliterator and its subclasses.
- * @see Transliterator
- * @stable ICU 2.0
- */
-typedef void* UTransliterator;
-
-/**
- * Direction constant indicating the direction in a transliterator,
- * e.g., the forward or reverse rules of a RuleBasedTransliterator.
- * Specified when a transliterator is opened.  An "A-B" transliterator
- * transliterates A to B when operating in the forward direction, and
- * B to A when operating in the reverse direction.
- * @stable ICU 2.0
- */
-typedef enum UTransDirection {
-    
-    /**
-     * UTRANS_FORWARD means from &lt;source&gt; to &lt;target&gt; for a
-     * transliterator with ID &lt;source&gt;-&lt;target&gt;.  For a transliterator
-     * opened using a rule, it means forward direction rules, e.g.,
-     * "A > B".
-     */
-    UTRANS_FORWARD,
-
-    /**
-     * UTRANS_REVERSE means from &lt;target&gt; to &lt;source&gt; for a
-     * transliterator with ID &lt;source&gt;-&lt;target&gt;.  For a transliterator
-     * opened using a rule, it means reverse direction rules, e.g.,
-     * "A < B".
-     */
-    UTRANS_REVERSE
-
-} UTransDirection;
-
-/**
- * Position structure for utrans_transIncremental() incremental
- * transliteration.  This structure defines two substrings of the text
- * being transliterated.  The first region, [contextStart,
- * contextLimit), defines what characters the transliterator will read
- * as context.  The second region, [start, limit), defines what
- * characters will actually be transliterated.  The second region
- * should be a subset of the first.
- *
- * <p>After a transliteration operation, some of the indices in this
- * structure will be modified.  See the field descriptions for
- * details.
- *
- * <p>contextStart <= start <= limit <= contextLimit
- *
- * <p>Note: All index values in this structure must be at code point
- * boundaries.  That is, none of them may occur between two code units
- * of a surrogate pair.  If any index does split a surrogate pair,
- * results are unspecified.
- *
- * @stable ICU 2.0
- */
-typedef struct UTransPosition {
-
-    /**
-     * Beginning index, inclusive, of the context to be considered for
-     * a transliteration operation.  The transliterator will ignore
-     * anything before this index.  INPUT/OUTPUT parameter: This parameter
-     * is updated by a transliteration operation to reflect the maximum
-     * amount of antecontext needed by a transliterator.
-     * @stable ICU 2.4
-     */
-    int32_t contextStart;
-    
-    /**
-     * Ending index, exclusive, of the context to be considered for a
-     * transliteration operation.  The transliterator will ignore
-     * anything at or after this index.  INPUT/OUTPUT parameter: This
-     * parameter is updated to reflect changes in the length of the
-     * text, but points to the same logical position in the text.
-     * @stable ICU 2.4
-     */
-    int32_t contextLimit;
-    
-    /**
-     * Beginning index, inclusive, of the text to be transliteratd.
-     * INPUT/OUTPUT parameter: This parameter is advanced past
-     * characters that have already been transliterated by a
-     * transliteration operation.
-     * @stable ICU 2.4
-     */
-    int32_t start;
-    
-    /**
-     * Ending index, exclusive, of the text to be transliteratd.
-     * INPUT/OUTPUT parameter: This parameter is updated to reflect
-     * changes in the length of the text, but points to the same
-     * logical position in the text.
-     * @stable ICU 2.4
-     */
-    int32_t limit;
-
-} UTransPosition;
-
-/********************************************************************
- * General API
- ********************************************************************/
-
-/**
- * Open a custom transliterator, given a custom rules string 
- * OR 
- * a system transliterator, given its ID.  
- * Any non-NULL result from this function should later be closed with
- * utrans_close().
- *
- * @param id a valid transliterator ID
- * @param idLength the length of the ID string, or -1 if NUL-terminated
- * @param dir the desired direction
- * @param rules the transliterator rules.  See the C++ header rbt.h for
- *              rules syntax. If NULL then a system transliterator matching
- *              the ID is returned.
- * @param rulesLength the length of the rules, or -1 if the rules
- *                    are NUL-terminated.
- * @param parseError a pointer to a UParseError struct to receive the details
- *                   of any parsing errors. This parameter may be NULL if no
- *                   parsing error details are desired.
- * @param pErrorCode a pointer to the UErrorCode
- * @return a transliterator pointer that may be passed to other
- *         utrans_xxx() functions, or NULL if the open call fails.
- * @stable ICU 2.8
- */
-U_STABLE UTransliterator* U_EXPORT2
-utrans_openU(const UChar *id,
-             int32_t idLength,
-             UTransDirection dir,
-             const UChar *rules,
-             int32_t rulesLength,
-             UParseError *parseError,
-             UErrorCode *pErrorCode);
-
-/**
- * Open an inverse of an existing transliterator.  For this to work,
- * the inverse must be registered with the system.  For example, if
- * the Transliterator "A-B" is opened, and then its inverse is opened,
- * the result is the Transliterator "B-A", if such a transliterator is
- * registered with the system.  Otherwise the result is NULL and a
- * failing UErrorCode is set.  Any non-NULL result from this function
- * should later be closed with utrans_close().
- *
- * @param trans the transliterator to open the inverse of.
- * @param status a pointer to the UErrorCode
- * @return a pointer to a newly-opened transliterator that is the
- * inverse of trans, or NULL if the open call fails.
- * @stable ICU 2.0
- */
-U_STABLE UTransliterator* U_EXPORT2 
-utrans_openInverse(const UTransliterator* trans,
-                   UErrorCode* status);
-
-/**
- * Create a copy of a transliterator.  Any non-NULL result from this
- * function should later be closed with utrans_close().
- *
- * @param trans the transliterator to be copied.
- * @param status a pointer to the UErrorCode
- * @return a transliterator pointer that may be passed to other
- * utrans_xxx() functions, or NULL if the clone call fails.
- * @stable ICU 2.0
- */
-U_STABLE UTransliterator* U_EXPORT2 
-utrans_clone(const UTransliterator* trans,
-             UErrorCode* status);
-
-/**
- * Close a transliterator.  Any non-NULL pointer returned by
- * utrans_openXxx() or utrans_clone() should eventually be closed.
- * @param trans the transliterator to be closed.
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-utrans_close(UTransliterator* trans);
-
-/**
- * Return the programmatic identifier for this transliterator.
- * If this identifier is passed to utrans_openU(), it will open
- * a transliterator equivalent to this one, if the ID has been
- * registered.
- *
- * @param trans the transliterator to return the ID of.
- * @param resultLength pointer to an output variable receiving the length
- *        of the ID string; can be NULL
- * @return the NUL-terminated ID string. This pointer remains
- * valid until utrans_close() is called on this transliterator.
- *
- * @stable ICU 2.8
- */
-U_STABLE const UChar * U_EXPORT2
-utrans_getUnicodeID(const UTransliterator *trans,
-                    int32_t *resultLength);
-
-/**
- * Register an open transliterator with the system.  When
- * utrans_open() is called with an ID string that is equal to that
- * returned by utrans_getID(adoptedTrans,...), then
- * utrans_clone(adoptedTrans,...) is returned.
- *
- * <p>NOTE: After this call the system owns the adoptedTrans and will
- * close it.  The user must not call utrans_close() on adoptedTrans.
- *
- * @param adoptedTrans a transliterator, typically the result of
- * utrans_openRules(), to be registered with the system.
- * @param status a pointer to the UErrorCode
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-utrans_register(UTransliterator* adoptedTrans,
-                UErrorCode* status);
-
-/**
- * Unregister a transliterator from the system.  After this call the
- * system will no longer recognize the given ID when passed to
- * utrans_open(). If the ID is invalid then nothing is done.
- *
- * @param id an ID to unregister
- * @param idLength the length of id, or -1 if id is zero-terminated
- * @stable ICU 2.8
- */
-U_STABLE void U_EXPORT2
-utrans_unregisterID(const UChar* id, int32_t idLength);
-
-/**
- * Set the filter used by a transliterator.  A filter can be used to
- * make the transliterator pass certain characters through untouched.
- * The filter is expressed using a UnicodeSet pattern.  If the
- * filterPattern is NULL or the empty string, then the transliterator
- * will be reset to use no filter.
- *
- * @param trans the transliterator
- * @param filterPattern a pattern string, in the form accepted by
- * UnicodeSet, specifying which characters to apply the
- * transliteration to.  May be NULL or the empty string to indicate no
- * filter.
- * @param filterPatternLen the length of filterPattern, or -1 if
- * filterPattern is zero-terminated
- * @param status a pointer to the UErrorCode
- * @see UnicodeSet
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-utrans_setFilter(UTransliterator* trans,
-                 const UChar* filterPattern,
-                 int32_t filterPatternLen,
-                 UErrorCode* status);
-
-/**
- * Return the number of system transliterators.
- * It is recommended to use utrans_openIDs() instead.
- *
- * @return the number of system transliterators.
- * @stable ICU 2.0
- */
-U_STABLE int32_t U_EXPORT2 
-utrans_countAvailableIDs(void);
-
-/**
- * Return a UEnumeration for the available transliterators.
- *
- * @param pErrorCode Pointer to the UErrorCode in/out parameter.
- * @return UEnumeration for the available transliterators.
- *         Close with uenum_close().
- *
- * @stable ICU 2.8
- */
-U_STABLE UEnumeration * U_EXPORT2
-utrans_openIDs(UErrorCode *pErrorCode);
-
-/********************************************************************
- * Transliteration API
- ********************************************************************/
-
-/**
- * Transliterate a segment of a UReplaceable string.  The string is
- * passed in as a UReplaceable pointer rep and a UReplaceableCallbacks
- * function pointer struct repFunc.  Functions in the repFunc struct
- * will be called in order to modify the rep string.
- *
- * @param trans the transliterator
- * @param rep a pointer to the string.  This will be passed to the
- * repFunc functions.
- * @param repFunc a set of function pointers that will be used to
- * modify the string pointed to by rep.
- * @param start the beginning index, inclusive; <code>0 <= start <=
- * limit</code>.
- * @param limit pointer to the ending index, exclusive; <code>start <=
- * limit <= repFunc->length(rep)</code>.  Upon return, *limit will
- * contain the new limit index.  The text previously occupying
- * <code>[start, limit)</code> has been transliterated, possibly to a
- * string of a different length, at <code>[start,
- * </code><em>new-limit</em><code>)</code>, where <em>new-limit</em>
- * is the return value.
- * @param status a pointer to the UErrorCode
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-utrans_trans(const UTransliterator* trans,
-             UReplaceable* rep,
-             UReplaceableCallbacks* repFunc,
-             int32_t start,
-             int32_t* limit,
-             UErrorCode* status);
-
-/**
- * Transliterate the portion of the UReplaceable text buffer that can
- * be transliterated unambiguosly.  This method is typically called
- * after new text has been inserted, e.g. as a result of a keyboard
- * event.  The transliterator will try to transliterate characters of
- * <code>rep</code> between <code>index.cursor</code> and
- * <code>index.limit</code>.  Characters before
- * <code>index.cursor</code> will not be changed.
- *
- * <p>Upon return, values in <code>index</code> will be updated.
- * <code>index.start</code> will be advanced to the first
- * character that future calls to this method will read.
- * <code>index.cursor</code> and <code>index.limit</code> will
- * be adjusted to delimit the range of text that future calls to
- * this method may change.
- *
- * <p>Typical usage of this method begins with an initial call
- * with <code>index.start</code> and <code>index.limit</code>
- * set to indicate the portion of <code>text</code> to be
- * transliterated, and <code>index.cursor == index.start</code>.
- * Thereafter, <code>index</code> can be used without
- * modification in future calls, provided that all changes to
- * <code>text</code> are made via this method.
- *
- * <p>This method assumes that future calls may be made that will
- * insert new text into the buffer.  As a result, it only performs
- * unambiguous transliterations.  After the last call to this method,
- * there may be untransliterated text that is waiting for more input
- * to resolve an ambiguity.  In order to perform these pending
- * transliterations, clients should call utrans_trans() with a start
- * of index.start and a limit of index.end after the last call to this
- * method has been made.
- *
- * @param trans the transliterator
- * @param rep a pointer to the string.  This will be passed to the
- * repFunc functions.
- * @param repFunc a set of function pointers that will be used to
- * modify the string pointed to by rep.
- * @param pos a struct containing the start and limit indices of the
- * text to be read and the text to be transliterated
- * @param status a pointer to the UErrorCode
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-utrans_transIncremental(const UTransliterator* trans,
-                        UReplaceable* rep,
-                        UReplaceableCallbacks* repFunc,
-                        UTransPosition* pos,
-                        UErrorCode* status);
-
-/**
- * Transliterate a segment of a UChar* string.  The string is passed
- * in in a UChar* buffer.  The string is modified in place.  If the
- * result is longer than textCapacity, it is truncated.  The actual
- * length of the result is returned in *textLength, if textLength is
- * non-NULL. *textLength may be greater than textCapacity, but only
- * textCapacity UChars will be written to *text, including the zero
- * terminator.
- *
- * @param trans the transliterator
- * @param text a pointer to a buffer containing the text to be
- * transliterated on input and the result text on output.
- * @param textLength a pointer to the length of the string in text.
- * If the length is -1 then the string is assumed to be
- * zero-terminated.  Upon return, the new length is stored in
- * *textLength.  If textLength is NULL then the string is assumed to
- * be zero-terminated.
- * @param textCapacity a pointer to the length of the text buffer.
- * Upon return, 
- * @param start the beginning index, inclusive; <code>0 <= start <=
- * limit</code>.
- * @param limit pointer to the ending index, exclusive; <code>start <=
- * limit <= repFunc->length(rep)</code>.  Upon return, *limit will
- * contain the new limit index.  The text previously occupying
- * <code>[start, limit)</code> has been transliterated, possibly to a
- * string of a different length, at <code>[start,
- * </code><em>new-limit</em><code>)</code>, where <em>new-limit</em>
- * is the return value.
- * @param status a pointer to the UErrorCode
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-utrans_transUChars(const UTransliterator* trans,
-                   UChar* text,
-                   int32_t* textLength,
-                   int32_t textCapacity,
-                   int32_t start,
-                   int32_t* limit,
-                   UErrorCode* status);
-
-/**
- * Transliterate the portion of the UChar* text buffer that can be
- * transliterated unambiguosly.  See utrans_transIncremental().  The
- * string is passed in in a UChar* buffer.  The string is modified in
- * place.  If the result is longer than textCapacity, it is truncated.
- * The actual length of the result is returned in *textLength, if
- * textLength is non-NULL. *textLength may be greater than
- * textCapacity, but only textCapacity UChars will be written to
- * *text, including the zero terminator.  See utrans_transIncremental()
- * for usage details.
- *
- * @param trans the transliterator
- * @param text a pointer to a buffer containing the text to be
- * transliterated on input and the result text on output.
- * @param textLength a pointer to the length of the string in text.
- * If the length is -1 then the string is assumed to be
- * zero-terminated.  Upon return, the new length is stored in
- * *textLength.  If textLength is NULL then the string is assumed to
- * be zero-terminated.
- * @param textCapacity the length of the text buffer
- * @param pos a struct containing the start and limit indices of the
- * text to be read and the text to be transliterated
- * @param status a pointer to the UErrorCode
- * @see utrans_transIncremental
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2 
-utrans_transIncrementalUChars(const UTransliterator* trans,
-                              UChar* text,
-                              int32_t* textLength,
-                              int32_t textCapacity,
-                              UTransPosition* pos,
-                              UErrorCode* status);
-
-/* deprecated API ----------------------------------------------------------- */
-
-/* see utrans.h documentation for why these functions are deprecated */
-
-/**
- * Deprecated, use utrans_openU() instead.
- * Open a custom transliterator, given a custom rules string 
- * OR 
- * a system transliterator, given its ID.  
- * Any non-NULL result from this function should later be closed with
- * utrans_close().
- *
- * @param id a valid ID, as returned by utrans_getAvailableID()
- * @param dir the desired direction
- * @param rules the transliterator rules.  See the C++ header rbt.h
- * for rules syntax. If NULL then a system transliterator matching 
- * the ID is returned.
- * @param rulesLength the length of the rules, or -1 if the rules
- * are zero-terminated.
- * @param parseError a pointer to a UParseError struct to receive the
- * details of any parsing errors. This parameter may be NULL if no
- * parsing error details are desired.
- * @param status a pointer to the UErrorCode
- * @return a transliterator pointer that may be passed to other
- * utrans_xxx() functions, or NULL if the open call fails.
- * @deprecated ICU 2.8 Use utrans_openU() instead, see utrans.h
- */
-U_DEPRECATED UTransliterator* U_EXPORT2 
-utrans_open(const char* id,
-            UTransDirection dir,
-            const UChar* rules,         /* may be Null */
-            int32_t rulesLength,        /* -1 if null-terminated */ 
-            UParseError* parseError,    /* may be Null */
-            UErrorCode* status);
-
-/**
- * Deprecated, use utrans_getUnicodeID() instead.
- * Return the programmatic identifier for this transliterator.
- * If this identifier is passed to utrans_open(), it will open
- * a transliterator equivalent to this one, if the ID has been
- * registered.
- * @param trans the transliterator to return the ID of.
- * @param buf the buffer in which to receive the ID.  This may be
- * NULL, in which case no characters are copied.
- * @param bufCapacity the capacity of the buffer.  Ignored if buf is
- * NULL.
- * @return the actual length of the ID, not including
- * zero-termination.  This may be greater than bufCapacity.
- * @deprecated ICU 2.8 Use utrans_getUnicodeID() instead, see utrans.h
- */
-U_DEPRECATED int32_t U_EXPORT2 
-utrans_getID(const UTransliterator* trans,
-             char* buf,
-             int32_t bufCapacity);
-
-/**
- * Deprecated, use utrans_unregisterID() instead.
- * Unregister a transliterator from the system.  After this call the
- * system will no longer recognize the given ID when passed to
- * utrans_open().  If the id is invalid then nothing is done.
- *
- * @param id a zero-terminated ID
- * @deprecated ICU 2.8 Use utrans_unregisterID() instead, see utrans.h
- */
-U_DEPRECATED void U_EXPORT2 
-utrans_unregister(const char* id);
-
-/**
- * Deprecated, use utrans_openIDs() instead.
- * Return the ID of the index-th system transliterator.  The result
- * is placed in the given buffer.  If the given buffer is too small,
- * the initial substring is copied to buf.  The result in buf is
- * always zero-terminated.
- *
- * @param index the number of the transliterator to return.  Must
- * satisfy 0 <= index < utrans_countAvailableIDs().  If index is out
- * of range then it is treated as if it were 0.
- * @param buf the buffer in which to receive the ID.  This may be
- * NULL, in which case no characters are copied.
- * @param bufCapacity the capacity of the buffer.  Ignored if buf is
- * NULL.
- * @return the actual length of the index-th ID, not including
- * zero-termination.  This may be greater than bufCapacity.
- * @deprecated ICU 2.8 Use utrans_openIDs() instead, see utrans.h
- */
-U_DEPRECATED int32_t U_EXPORT2 
-utrans_getAvailableID(int32_t index,
-                      char* buf,
-                      int32_t bufCapacity);
-
-#endif /* #if !UCONFIG_NO_TRANSLITERATION */
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/utrans.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/utrans.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/utrans.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/utrans.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,583 @@
+/*
+*******************************************************************************
+*   Copyright (C) 1997-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   Date        Name        Description
+*   06/21/00    aliu        Creation.
+*******************************************************************************
+*/
+
+#ifndef UTRANS_H
+#define UTRANS_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_TRANSLITERATION
+
+#include "unicode/urep.h"
+#include "unicode/parseerr.h"
+#include "unicode/uenum.h"
+
+/********************************************************************
+ * General Notes
+ ********************************************************************
+ */
+/**
+ * \file
+ * \brief C API: Transliterator
+ *
+ * <h2> Transliteration </h2>
+ * The data structures and functions described in this header provide
+ * transliteration services.  Transliteration services are implemented
+ * as C++ classes.  The comments and documentation in this header
+ * assume the reader is familiar with the C++ headers translit.h and
+ * associated documentation.
+ *
+ * A significant but incomplete subset of the C++ transliteration
+ * services are available to C code through this header.  In order to
+ * access more complex transliteration services, refer to the C++
+ * headers and documentation.
+ *
+ * There are two sets of functions for working with transliterator IDs:
+ *
+ * An old, deprecated set uses char * IDs, which works for true and pure
+ * identifiers that these APIs were designed for,
+ * for example "Cyrillic-Latin".
+ * It does not work when the ID contains filters ("[:Script=Cyrl:]")
+ * or even a complete set of rules because then the ID string contains more
+ * than just "invariant" characters (see utypes.h).
+ *
+ * A new set of functions replaces the old ones and uses UChar * IDs,
+ * paralleling the UnicodeString IDs in the C++ API. (New in ICU 2.8.)
+ */
+
+/********************************************************************
+ * Data Structures
+ ********************************************************************/
+
+/**
+ * An opaque transliterator for use in C.  Open with utrans_openxxx()
+ * and close with utrans_close() when done.  Equivalent to the C++ class
+ * Transliterator and its subclasses.
+ * @see Transliterator
+ * @stable ICU 2.0
+ */
+typedef void* UTransliterator;
+
+/**
+ * Direction constant indicating the direction in a transliterator,
+ * e.g., the forward or reverse rules of a RuleBasedTransliterator.
+ * Specified when a transliterator is opened.  An "A-B" transliterator
+ * transliterates A to B when operating in the forward direction, and
+ * B to A when operating in the reverse direction.
+ * @stable ICU 2.0
+ */
+typedef enum UTransDirection {
+    
+    /**
+     * UTRANS_FORWARD means from &lt;source&gt; to &lt;target&gt; for a
+     * transliterator with ID &lt;source&gt;-&lt;target&gt;.  For a transliterator
+     * opened using a rule, it means forward direction rules, e.g.,
+     * "A > B".
+     */
+    UTRANS_FORWARD,
+
+    /**
+     * UTRANS_REVERSE means from &lt;target&gt; to &lt;source&gt; for a
+     * transliterator with ID &lt;source&gt;-&lt;target&gt;.  For a transliterator
+     * opened using a rule, it means reverse direction rules, e.g.,
+     * "A < B".
+     */
+    UTRANS_REVERSE
+
+} UTransDirection;
+
+/**
+ * Position structure for utrans_transIncremental() incremental
+ * transliteration.  This structure defines two substrings of the text
+ * being transliterated.  The first region, [contextStart,
+ * contextLimit), defines what characters the transliterator will read
+ * as context.  The second region, [start, limit), defines what
+ * characters will actually be transliterated.  The second region
+ * should be a subset of the first.
+ *
+ * <p>After a transliteration operation, some of the indices in this
+ * structure will be modified.  See the field descriptions for
+ * details.
+ *
+ * <p>contextStart <= start <= limit <= contextLimit
+ *
+ * <p>Note: All index values in this structure must be at code point
+ * boundaries.  That is, none of them may occur between two code units
+ * of a surrogate pair.  If any index does split a surrogate pair,
+ * results are unspecified.
+ *
+ * @stable ICU 2.0
+ */
+typedef struct UTransPosition {
+
+    /**
+     * Beginning index, inclusive, of the context to be considered for
+     * a transliteration operation.  The transliterator will ignore
+     * anything before this index.  INPUT/OUTPUT parameter: This parameter
+     * is updated by a transliteration operation to reflect the maximum
+     * amount of antecontext needed by a transliterator.
+     * @stable ICU 2.4
+     */
+    int32_t contextStart;
+    
+    /**
+     * Ending index, exclusive, of the context to be considered for a
+     * transliteration operation.  The transliterator will ignore
+     * anything at or after this index.  INPUT/OUTPUT parameter: This
+     * parameter is updated to reflect changes in the length of the
+     * text, but points to the same logical position in the text.
+     * @stable ICU 2.4
+     */
+    int32_t contextLimit;
+    
+    /**
+     * Beginning index, inclusive, of the text to be transliteratd.
+     * INPUT/OUTPUT parameter: This parameter is advanced past
+     * characters that have already been transliterated by a
+     * transliteration operation.
+     * @stable ICU 2.4
+     */
+    int32_t start;
+    
+    /**
+     * Ending index, exclusive, of the text to be transliteratd.
+     * INPUT/OUTPUT parameter: This parameter is updated to reflect
+     * changes in the length of the text, but points to the same
+     * logical position in the text.
+     * @stable ICU 2.4
+     */
+    int32_t limit;
+
+} UTransPosition;
+
+/********************************************************************
+ * General API
+ ********************************************************************/
+
+/**
+ * Open a custom transliterator, given a custom rules string 
+ * OR 
+ * a system transliterator, given its ID.  
+ * Any non-NULL result from this function should later be closed with
+ * utrans_close().
+ *
+ * @param id a valid transliterator ID
+ * @param idLength the length of the ID string, or -1 if NUL-terminated
+ * @param dir the desired direction
+ * @param rules the transliterator rules.  See the C++ header rbt.h for
+ *              rules syntax. If NULL then a system transliterator matching
+ *              the ID is returned.
+ * @param rulesLength the length of the rules, or -1 if the rules
+ *                    are NUL-terminated.
+ * @param parseError a pointer to a UParseError struct to receive the details
+ *                   of any parsing errors. This parameter may be NULL if no
+ *                   parsing error details are desired.
+ * @param pErrorCode a pointer to the UErrorCode
+ * @return a transliterator pointer that may be passed to other
+ *         utrans_xxx() functions, or NULL if the open call fails.
+ * @stable ICU 2.8
+ */
+U_STABLE UTransliterator* U_EXPORT2
+utrans_openU(const UChar *id,
+             int32_t idLength,
+             UTransDirection dir,
+             const UChar *rules,
+             int32_t rulesLength,
+             UParseError *parseError,
+             UErrorCode *pErrorCode);
+
+/**
+ * Open an inverse of an existing transliterator.  For this to work,
+ * the inverse must be registered with the system.  For example, if
+ * the Transliterator "A-B" is opened, and then its inverse is opened,
+ * the result is the Transliterator "B-A", if such a transliterator is
+ * registered with the system.  Otherwise the result is NULL and a
+ * failing UErrorCode is set.  Any non-NULL result from this function
+ * should later be closed with utrans_close().
+ *
+ * @param trans the transliterator to open the inverse of.
+ * @param status a pointer to the UErrorCode
+ * @return a pointer to a newly-opened transliterator that is the
+ * inverse of trans, or NULL if the open call fails.
+ * @stable ICU 2.0
+ */
+U_STABLE UTransliterator* U_EXPORT2 
+utrans_openInverse(const UTransliterator* trans,
+                   UErrorCode* status);
+
+/**
+ * Create a copy of a transliterator.  Any non-NULL result from this
+ * function should later be closed with utrans_close().
+ *
+ * @param trans the transliterator to be copied.
+ * @param status a pointer to the UErrorCode
+ * @return a transliterator pointer that may be passed to other
+ * utrans_xxx() functions, or NULL if the clone call fails.
+ * @stable ICU 2.0
+ */
+U_STABLE UTransliterator* U_EXPORT2 
+utrans_clone(const UTransliterator* trans,
+             UErrorCode* status);
+
+/**
+ * Close a transliterator.  Any non-NULL pointer returned by
+ * utrans_openXxx() or utrans_clone() should eventually be closed.
+ * @param trans the transliterator to be closed.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+utrans_close(UTransliterator* trans);
+
+/**
+ * Return the programmatic identifier for this transliterator.
+ * If this identifier is passed to utrans_openU(), it will open
+ * a transliterator equivalent to this one, if the ID has been
+ * registered.
+ *
+ * @param trans the transliterator to return the ID of.
+ * @param resultLength pointer to an output variable receiving the length
+ *        of the ID string; can be NULL
+ * @return the NUL-terminated ID string. This pointer remains
+ * valid until utrans_close() is called on this transliterator.
+ *
+ * @stable ICU 2.8
+ */
+U_STABLE const UChar * U_EXPORT2
+utrans_getUnicodeID(const UTransliterator *trans,
+                    int32_t *resultLength);
+
+/**
+ * Register an open transliterator with the system.  When
+ * utrans_open() is called with an ID string that is equal to that
+ * returned by utrans_getID(adoptedTrans,...), then
+ * utrans_clone(adoptedTrans,...) is returned.
+ *
+ * <p>NOTE: After this call the system owns the adoptedTrans and will
+ * close it.  The user must not call utrans_close() on adoptedTrans.
+ *
+ * @param adoptedTrans a transliterator, typically the result of
+ * utrans_openRules(), to be registered with the system.
+ * @param status a pointer to the UErrorCode
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+utrans_register(UTransliterator* adoptedTrans,
+                UErrorCode* status);
+
+/**
+ * Unregister a transliterator from the system.  After this call the
+ * system will no longer recognize the given ID when passed to
+ * utrans_open(). If the ID is invalid then nothing is done.
+ *
+ * @param id an ID to unregister
+ * @param idLength the length of id, or -1 if id is zero-terminated
+ * @stable ICU 2.8
+ */
+U_STABLE void U_EXPORT2
+utrans_unregisterID(const UChar* id, int32_t idLength);
+
+/**
+ * Set the filter used by a transliterator.  A filter can be used to
+ * make the transliterator pass certain characters through untouched.
+ * The filter is expressed using a UnicodeSet pattern.  If the
+ * filterPattern is NULL or the empty string, then the transliterator
+ * will be reset to use no filter.
+ *
+ * @param trans the transliterator
+ * @param filterPattern a pattern string, in the form accepted by
+ * UnicodeSet, specifying which characters to apply the
+ * transliteration to.  May be NULL or the empty string to indicate no
+ * filter.
+ * @param filterPatternLen the length of filterPattern, or -1 if
+ * filterPattern is zero-terminated
+ * @param status a pointer to the UErrorCode
+ * @see UnicodeSet
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+utrans_setFilter(UTransliterator* trans,
+                 const UChar* filterPattern,
+                 int32_t filterPatternLen,
+                 UErrorCode* status);
+
+/**
+ * Return the number of system transliterators.
+ * It is recommended to use utrans_openIDs() instead.
+ *
+ * @return the number of system transliterators.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2 
+utrans_countAvailableIDs(void);
+
+/**
+ * Return a UEnumeration for the available transliterators.
+ *
+ * @param pErrorCode Pointer to the UErrorCode in/out parameter.
+ * @return UEnumeration for the available transliterators.
+ *         Close with uenum_close().
+ *
+ * @stable ICU 2.8
+ */
+U_STABLE UEnumeration * U_EXPORT2
+utrans_openIDs(UErrorCode *pErrorCode);
+
+/********************************************************************
+ * Transliteration API
+ ********************************************************************/
+
+/**
+ * Transliterate a segment of a UReplaceable string.  The string is
+ * passed in as a UReplaceable pointer rep and a UReplaceableCallbacks
+ * function pointer struct repFunc.  Functions in the repFunc struct
+ * will be called in order to modify the rep string.
+ *
+ * @param trans the transliterator
+ * @param rep a pointer to the string.  This will be passed to the
+ * repFunc functions.
+ * @param repFunc a set of function pointers that will be used to
+ * modify the string pointed to by rep.
+ * @param start the beginning index, inclusive; <code>0 <= start <=
+ * limit</code>.
+ * @param limit pointer to the ending index, exclusive; <code>start <=
+ * limit <= repFunc->length(rep)</code>.  Upon return, *limit will
+ * contain the new limit index.  The text previously occupying
+ * <code>[start, limit)</code> has been transliterated, possibly to a
+ * string of a different length, at <code>[start,
+ * </code><em>new-limit</em><code>)</code>, where <em>new-limit</em>
+ * is the return value.
+ * @param status a pointer to the UErrorCode
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+utrans_trans(const UTransliterator* trans,
+             UReplaceable* rep,
+             UReplaceableCallbacks* repFunc,
+             int32_t start,
+             int32_t* limit,
+             UErrorCode* status);
+
+/**
+ * Transliterate the portion of the UReplaceable text buffer that can
+ * be transliterated unambiguosly.  This method is typically called
+ * after new text has been inserted, e.g. as a result of a keyboard
+ * event.  The transliterator will try to transliterate characters of
+ * <code>rep</code> between <code>index.cursor</code> and
+ * <code>index.limit</code>.  Characters before
+ * <code>index.cursor</code> will not be changed.
+ *
+ * <p>Upon return, values in <code>index</code> will be updated.
+ * <code>index.start</code> will be advanced to the first
+ * character that future calls to this method will read.
+ * <code>index.cursor</code> and <code>index.limit</code> will
+ * be adjusted to delimit the range of text that future calls to
+ * this method may change.
+ *
+ * <p>Typical usage of this method begins with an initial call
+ * with <code>index.start</code> and <code>index.limit</code>
+ * set to indicate the portion of <code>text</code> to be
+ * transliterated, and <code>index.cursor == index.start</code>.
+ * Thereafter, <code>index</code> can be used without
+ * modification in future calls, provided that all changes to
+ * <code>text</code> are made via this method.
+ *
+ * <p>This method assumes that future calls may be made that will
+ * insert new text into the buffer.  As a result, it only performs
+ * unambiguous transliterations.  After the last call to this method,
+ * there may be untransliterated text that is waiting for more input
+ * to resolve an ambiguity.  In order to perform these pending
+ * transliterations, clients should call utrans_trans() with a start
+ * of index.start and a limit of index.end after the last call to this
+ * method has been made.
+ *
+ * @param trans the transliterator
+ * @param rep a pointer to the string.  This will be passed to the
+ * repFunc functions.
+ * @param repFunc a set of function pointers that will be used to
+ * modify the string pointed to by rep.
+ * @param pos a struct containing the start and limit indices of the
+ * text to be read and the text to be transliterated
+ * @param status a pointer to the UErrorCode
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+utrans_transIncremental(const UTransliterator* trans,
+                        UReplaceable* rep,
+                        UReplaceableCallbacks* repFunc,
+                        UTransPosition* pos,
+                        UErrorCode* status);
+
+/**
+ * Transliterate a segment of a UChar* string.  The string is passed
+ * in in a UChar* buffer.  The string is modified in place.  If the
+ * result is longer than textCapacity, it is truncated.  The actual
+ * length of the result is returned in *textLength, if textLength is
+ * non-NULL. *textLength may be greater than textCapacity, but only
+ * textCapacity UChars will be written to *text, including the zero
+ * terminator.
+ *
+ * @param trans the transliterator
+ * @param text a pointer to a buffer containing the text to be
+ * transliterated on input and the result text on output.
+ * @param textLength a pointer to the length of the string in text.
+ * If the length is -1 then the string is assumed to be
+ * zero-terminated.  Upon return, the new length is stored in
+ * *textLength.  If textLength is NULL then the string is assumed to
+ * be zero-terminated.
+ * @param textCapacity a pointer to the length of the text buffer.
+ * Upon return, 
+ * @param start the beginning index, inclusive; <code>0 <= start <=
+ * limit</code>.
+ * @param limit pointer to the ending index, exclusive; <code>start <=
+ * limit <= repFunc->length(rep)</code>.  Upon return, *limit will
+ * contain the new limit index.  The text previously occupying
+ * <code>[start, limit)</code> has been transliterated, possibly to a
+ * string of a different length, at <code>[start,
+ * </code><em>new-limit</em><code>)</code>, where <em>new-limit</em>
+ * is the return value.
+ * @param status a pointer to the UErrorCode
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+utrans_transUChars(const UTransliterator* trans,
+                   UChar* text,
+                   int32_t* textLength,
+                   int32_t textCapacity,
+                   int32_t start,
+                   int32_t* limit,
+                   UErrorCode* status);
+
+/**
+ * Transliterate the portion of the UChar* text buffer that can be
+ * transliterated unambiguosly.  See utrans_transIncremental().  The
+ * string is passed in in a UChar* buffer.  The string is modified in
+ * place.  If the result is longer than textCapacity, it is truncated.
+ * The actual length of the result is returned in *textLength, if
+ * textLength is non-NULL. *textLength may be greater than
+ * textCapacity, but only textCapacity UChars will be written to
+ * *text, including the zero terminator.  See utrans_transIncremental()
+ * for usage details.
+ *
+ * @param trans the transliterator
+ * @param text a pointer to a buffer containing the text to be
+ * transliterated on input and the result text on output.
+ * @param textLength a pointer to the length of the string in text.
+ * If the length is -1 then the string is assumed to be
+ * zero-terminated.  Upon return, the new length is stored in
+ * *textLength.  If textLength is NULL then the string is assumed to
+ * be zero-terminated.
+ * @param textCapacity the length of the text buffer
+ * @param pos a struct containing the start and limit indices of the
+ * text to be read and the text to be transliterated
+ * @param status a pointer to the UErrorCode
+ * @see utrans_transIncremental
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2 
+utrans_transIncrementalUChars(const UTransliterator* trans,
+                              UChar* text,
+                              int32_t* textLength,
+                              int32_t textCapacity,
+                              UTransPosition* pos,
+                              UErrorCode* status);
+
+/* deprecated API ----------------------------------------------------------- */
+
+/* see utrans.h documentation for why these functions are deprecated */
+
+/**
+ * Deprecated, use utrans_openU() instead.
+ * Open a custom transliterator, given a custom rules string 
+ * OR 
+ * a system transliterator, given its ID.  
+ * Any non-NULL result from this function should later be closed with
+ * utrans_close().
+ *
+ * @param id a valid ID, as returned by utrans_getAvailableID()
+ * @param dir the desired direction
+ * @param rules the transliterator rules.  See the C++ header rbt.h
+ * for rules syntax. If NULL then a system transliterator matching 
+ * the ID is returned.
+ * @param rulesLength the length of the rules, or -1 if the rules
+ * are zero-terminated.
+ * @param parseError a pointer to a UParseError struct to receive the
+ * details of any parsing errors. This parameter may be NULL if no
+ * parsing error details are desired.
+ * @param status a pointer to the UErrorCode
+ * @return a transliterator pointer that may be passed to other
+ * utrans_xxx() functions, or NULL if the open call fails.
+ * @deprecated ICU 2.8 Use utrans_openU() instead, see utrans.h
+ */
+U_DEPRECATED UTransliterator* U_EXPORT2 
+utrans_open(const char* id,
+            UTransDirection dir,
+            const UChar* rules,         /* may be Null */
+            int32_t rulesLength,        /* -1 if null-terminated */ 
+            UParseError* parseError,    /* may be Null */
+            UErrorCode* status);
+
+/**
+ * Deprecated, use utrans_getUnicodeID() instead.
+ * Return the programmatic identifier for this transliterator.
+ * If this identifier is passed to utrans_open(), it will open
+ * a transliterator equivalent to this one, if the ID has been
+ * registered.
+ * @param trans the transliterator to return the ID of.
+ * @param buf the buffer in which to receive the ID.  This may be
+ * NULL, in which case no characters are copied.
+ * @param bufCapacity the capacity of the buffer.  Ignored if buf is
+ * NULL.
+ * @return the actual length of the ID, not including
+ * zero-termination.  This may be greater than bufCapacity.
+ * @deprecated ICU 2.8 Use utrans_getUnicodeID() instead, see utrans.h
+ */
+U_DEPRECATED int32_t U_EXPORT2 
+utrans_getID(const UTransliterator* trans,
+             char* buf,
+             int32_t bufCapacity);
+
+/**
+ * Deprecated, use utrans_unregisterID() instead.
+ * Unregister a transliterator from the system.  After this call the
+ * system will no longer recognize the given ID when passed to
+ * utrans_open().  If the id is invalid then nothing is done.
+ *
+ * @param id a zero-terminated ID
+ * @deprecated ICU 2.8 Use utrans_unregisterID() instead, see utrans.h
+ */
+U_DEPRECATED void U_EXPORT2 
+utrans_unregister(const char* id);
+
+/**
+ * Deprecated, use utrans_openIDs() instead.
+ * Return the ID of the index-th system transliterator.  The result
+ * is placed in the given buffer.  If the given buffer is too small,
+ * the initial substring is copied to buf.  The result in buf is
+ * always zero-terminated.
+ *
+ * @param index the number of the transliterator to return.  Must
+ * satisfy 0 <= index < utrans_countAvailableIDs().  If index is out
+ * of range then it is treated as if it were 0.
+ * @param buf the buffer in which to receive the ID.  This may be
+ * NULL, in which case no characters are copied.
+ * @param bufCapacity the capacity of the buffer.  Ignored if buf is
+ * NULL.
+ * @return the actual length of the index-th ID, not including
+ * zero-termination.  This may be greater than bufCapacity.
+ * @deprecated ICU 2.8 Use utrans_openIDs() instead, see utrans.h
+ */
+U_DEPRECATED int32_t U_EXPORT2 
+utrans_getAvailableID(int32_t index,
+                      char* buf,
+                      int32_t bufCapacity);
+
+#endif /* #if !UCONFIG_NO_TRANSLITERATION */
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/utypes.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/utypes.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/utypes.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,801 +0,0 @@
-/*
-**********************************************************************
-*   Copyright (C) 1996-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*
-*  FILE NAME : UTYPES.H (formerly ptypes.h)
-*
-*   Date        Name        Description
-*   12/11/96    helena      Creation.
-*   02/27/97    aliu        Added typedefs for UClassID, int8, int16, int32,
-*                           uint8, uint16, and uint32.
-*   04/01/97    aliu        Added XP_CPLUSPLUS and modified to work under C as
-*                            well as C++.
-*                           Modified to use memcpy() for uprv_arrayCopy() fns.
-*   04/14/97    aliu        Added TPlatformUtilities.
-*   05/07/97    aliu        Added import/export specifiers (replacing the old
-*                           broken EXT_CLASS).  Added version number for our
-*                           code.  Cleaned up header.
-*    6/20/97    helena      Java class name change.
-*   08/11/98    stephen     UErrorCode changed from typedef to enum
-*   08/12/98    erm         Changed T_ANALYTIC_PACKAGE_VERSION to 3
-*   08/14/98    stephen     Added uprv_arrayCopy() for int8_t, int16_t, int32_t
-*   12/09/98    jfitz       Added BUFFER_OVERFLOW_ERROR (bug 1100066)
-*   04/20/99    stephen     Cleaned up & reworked for autoconf.
-*                           Renamed to utypes.h.
-*   05/05/99    stephen     Changed to use <inttypes.h>
-*   12/07/99    helena      Moved copyright notice string from ucnv_bld.h here.
-*******************************************************************************
-*/
-
-#ifndef UTYPES_H
-#define UTYPES_H
-
-
-#include "unicode/umachine.h"
-#include "unicode/utf.h"
-#include "unicode/uversion.h"
-#include "unicode/uconfig.h"
-
-#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
-#define U_HIDE_DRAFT_API 1
-#endif
-
-#ifdef U_HIDE_DRAFT_API
-#include "unicode/udraft.h"
-#endif
-
-#ifdef U_HIDE_DEPRECATED_API
-#include "unicode/udeprctd.h"
-#endif
-
-#ifdef U_HIDE_DEPRECATED_API
-#include "unicode/uobslete.h"
-#endif
-
-#ifdef U_HIDE_INTERNAL_API
-#include "unicode/uintrnal.h"
-#endif
-
-#ifdef U_HIDE_SYSTEM_API
-#include "unicode/usystem.h"
-#endif
-
-/*!
- * \file
- * \brief Basic definitions for ICU, for both C and C++ APIs
- *
- * This file defines basic types, constants, and enumerations directly or
- * indirectly by including other header files, especially utf.h for the
- * basic character and string definitions and umachine.h for consistent
- * integer and other types.
- */
-
-/*===========================================================================*/
-/* char Character set family                                                 */
-/*===========================================================================*/
-
-/**
- * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
- * @stable ICU 2.0
- */
-#define U_ASCII_FAMILY 0
-
-/**
- * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
- * @stable ICU 2.0
- */
-#define U_EBCDIC_FAMILY 1
-
-/**
- * \def U_CHARSET_FAMILY
- *
- * <p>These definitions allow to specify the encoding of text
- * in the char data type as defined by the platform and the compiler.
- * It is enough to determine the code point values of "invariant characters",
- * which are the ones shared by all encodings that are in use
- * on a given platform.</p>
- *
- * <p>Those "invariant characters" should be all the uppercase and lowercase
- * latin letters, the digits, the space, and "basic punctuation".
- * Also, '\\n', '\\r', '\\t' should be available.</p>
- *
- * <p>The list of "invariant characters" is:<br>
- * \code
- *    A-Z  a-z  0-9  SPACE  "  %  &amp;  '  (  )  *  +  ,  -  .  /  :  ;  <  =  >  ?  _
- * \endcode
- * <br>
- * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
- *
- * <p>This matches the IBM Syntactic Character Set (CS 640).</p>
- *
- * <p>In other words, all the graphic characters in 7-bit ASCII should
- * be safely accessible except the following:</p>
- *
- * \code
- *    '\' <backslash>
- *    '[' <left bracket>
- *    ']' <right bracket>
- *    '{' <left brace>
- *    '}' <right brace>
- *    '^' <circumflex>
- *    '~' <tilde>
- *    '!' <exclamation mark>
- *    '#' <number sign>
- *    '|' <vertical line>
- *    '$' <dollar sign>
- *    '@' <commercial at>
- *    '`' <grave accent>
- * \endcode
- * @stable ICU 2.0
- */
-
-#ifndef U_CHARSET_FAMILY
-#   define U_CHARSET_FAMILY 0
-#endif
-
-/*===========================================================================*/
-/* ICUDATA naming scheme                                                     */
-/*===========================================================================*/
-
-/**
- * \def U_ICUDATA_TYPE_LETTER
- *
- * This is a platform-dependent string containing one letter:
- * - b for big-endian, ASCII-family platforms
- * - l for little-endian, ASCII-family platforms
- * - e for big-endian, EBCDIC-family platforms
- * This letter is part of the common data file name.
- * @stable ICU 2.0
- */
-
-/**
- * \def U_ICUDATA_TYPE_LITLETTER
- * The non-string form of U_ICUDATA_TYPE_LETTER
- * @stable ICU 2.0
- */
-#if U_CHARSET_FAMILY
-#   if U_IS_BIG_ENDIAN
-   /* EBCDIC - should always be BE */
-#     define U_ICUDATA_TYPE_LETTER "e"
-#     define U_ICUDATA_TYPE_LITLETTER e
-#   else
-#     error "Don't know what to do with little endian EBCDIC!"
-#     define U_ICUDATA_TYPE_LETTER "x"
-#     define U_ICUDATA_TYPE_LITLETTER x
-#   endif
-#else
-#   if U_IS_BIG_ENDIAN
-      /* Big-endian ASCII */
-#     define U_ICUDATA_TYPE_LETTER "b"
-#     define U_ICUDATA_TYPE_LITLETTER b
-#   else
-      /* Little-endian ASCII */
-#     define U_ICUDATA_TYPE_LETTER "l"
-#     define U_ICUDATA_TYPE_LITLETTER l
-#   endif
-#endif
-
-/**
- * A single string literal containing the icudata stub name. i.e. 'icudt18e' for
- * ICU 1.8.x on EBCDIC, etc..
- * @stable ICU 2.0
- */
-#define U_ICUDATA_NAME    "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
-
-
-/**
- *  U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
- *    Defined as a literal, not a string.
- *    Tricky Preprocessor use - ## operator replaces macro paramters with the literal string
- *                              from the corresponding macro invocation, _before_ other macro substitutions.
- *                              Need a nested \#defines to get the actual version numbers rather than
- *                              the literal text U_ICU_VERSION_MAJOR_NUM into the name.
- *                              The net result will be something of the form
- *                                  \#define U_ICU_ENTRY_POINT icudt19_dat
- * @stable ICU 2.4
- */
-#define U_ICUDATA_ENTRY_POINT  U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM)
-
-/**
- * Do not use.
- * @internal
- */
-#define U_DEF2_ICUDATA_ENTRY_POINT(major, minor) U_DEF_ICUDATA_ENTRY_POINT(major, minor)
-/**
- * Do not use.
- * @internal
- */
-#define U_DEF_ICUDATA_ENTRY_POINT(major, minor) icudt##major##minor##_dat
-
-/**
- * \def U_CALLCONV
- * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
- * in callback function typedefs to make sure that the calling convention
- * is compatible.
- *
- * This is only used for non-ICU-API functions.
- * When a function is a public ICU API,
- * you must use the U_CAPI and U_EXPORT2 qualifiers.
- * @stable ICU 2.0
- */
-#if defined(OS390) && (__COMPILER_VER__ < 0x41020000) && defined(XP_CPLUSPLUS)
-#    define U_CALLCONV __cdecl
-#else
-#    define U_CALLCONV U_EXPORT2
-#endif
-
-/**
- * \def NULL
- * Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
- * @stable ICU 2.0
- */
-#ifndef NULL
-#ifdef XP_CPLUSPLUS
-#define NULL    0
-#else
-#define NULL    ((void *)0)
-#endif
-#endif
-
-/*===========================================================================*/
-/* Calendar/TimeZone data types                                              */
-/*===========================================================================*/
-
-/**
- * Date and Time data type.
- * This is a primitive data type that holds the date and time
- * as the number of milliseconds since 1970-jan-01, 00:00 UTC.
- * UTC leap seconds are ignored.
- * @stable ICU 2.0
- */
-typedef double UDate;
-
-/** The number of milliseconds per second @stable ICU 2.0 */
-#define U_MILLIS_PER_SECOND        (1000)
-/** The number of milliseconds per minute @stable ICU 2.0 */
-#define U_MILLIS_PER_MINUTE       (60000)
-/** The number of milliseconds per hour @stable ICU 2.0 */
-#define U_MILLIS_PER_HOUR       (3600000)
-/** The number of milliseconds per day @stable ICU 2.0 */
-#define U_MILLIS_PER_DAY       (86400000)
-
-
-/*===========================================================================*/
-/* UClassID-based RTTI */
-/*===========================================================================*/
-
-/**
- * UClassID is used to identify classes without using RTTI, since RTTI
- * is not yet supported by all C++ compilers.  Each class hierarchy which needs
- * to implement polymorphic clone() or operator==() defines two methods,
- * described in detail below.  UClassID values can be compared using
- * operator==(). Nothing else should be done with them.
- *
- * \par
- * getDynamicClassID() is declared in the base class of the hierarchy as
- * a pure virtual.  Each concrete subclass implements it in the same way:
- *
- * \code
- *      class Base {
- *      public:
- *          virtual UClassID getDynamicClassID() const = 0;
- *      }
- *
- *      class Derived {
- *      public:
- *          virtual UClassID getDynamicClassID() const
- *            { return Derived::getStaticClassID(); }
- *      }
- * \endcode
- *
- * Each concrete class implements getStaticClassID() as well, which allows
- * clients to test for a specific type.
- *
- * \code
- *      class Derived {
- *      public:
- *          static UClassID U_EXPORT2 getStaticClassID();
- *      private:
- *          static char fgClassID;
- *      }
- *
- *      // In Derived.cpp:
- *      UClassID Derived::getStaticClassID()
- *        { return (UClassID)&Derived::fgClassID; }
- *      char Derived::fgClassID = 0; // Value is irrelevant
- * \endcode
- * @stable ICU 2.0
- */
-typedef void* UClassID;
-
-/*===========================================================================*/
-/* Shared library/DLL import-export API control                              */
-/*===========================================================================*/
-
-/*
- * Control of symbol import/export.
- * ICU is separated into three libraries.
- */
-
-/*
- * \def U_COMBINED_IMPLEMENTATION
- * Set to export library symbols from inside the ICU library
- * when all of ICU is in a single library.
- * This can be set as a compiler option while building ICU, and it
- * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
- * @stable ICU 2.0
- */
-
-/**
- * \def U_DATA_API
- * Set to export library symbols from inside the stubdata library,
- * and to import them from outside.
- * @stable ICU 3.0
- */
-
-/**
- * \def U_COMMON_API
- * Set to export library symbols from inside the common library,
- * and to import them from outside.
- * @stable ICU 2.0
- */
-
-/**
- * \def U_I18N_API
- * Set to export library symbols from inside the i18n library,
- * and to import them from outside.
- * @stable ICU 2.0
- */
-
-/**
- * \def U_LAYOUT_API
- * Set to export library symbols from inside the layout engine library,
- * and to import them from outside.
- * @stable ICU 2.0
- */
-
-/**
- * \def U_LAYOUTEX_API
- * Set to export library symbols from inside the layout extensions library,
- * and to import them from outside.
- * @stable ICU 2.6
- */
-
-/**
- * \def U_IO_API
- * Set to export library symbols from inside the ustdio library,
- * and to import them from outside.
- * @stable ICU 2.0
- */
-
-/**
- * \def U_TOOLUTIL_API
- * Set to export library symbols from inside the toolutil library,
- * and to import them from outside.
- * @stable ICU 3.4
- */
-
-#if defined(U_COMBINED_IMPLEMENTATION)
-#define U_DATA_API     U_EXPORT
-#define U_COMMON_API   U_EXPORT
-#define U_I18N_API     U_EXPORT
-#define U_LAYOUT_API   U_EXPORT
-#define U_LAYOUTEX_API U_EXPORT
-#define U_IO_API       U_EXPORT
-#define U_TOOLUTIL_API U_EXPORT
-#elif defined(U_STATIC_IMPLEMENTATION)
-#define U_DATA_API
-#define U_COMMON_API
-#define U_I18N_API
-#define U_LAYOUT_API
-#define U_LAYOUTEX_API
-#define U_IO_API
-#define U_TOOLUTIL_API
-#elif defined(U_COMMON_IMPLEMENTATION)
-#define U_DATA_API     U_IMPORT
-#define U_COMMON_API   U_EXPORT
-#define U_I18N_API     U_IMPORT
-#define U_LAYOUT_API   U_IMPORT
-#define U_LAYOUTEX_API U_IMPORT
-#define U_IO_API       U_IMPORT
-#define U_TOOLUTIL_API U_IMPORT
-#elif defined(U_I18N_IMPLEMENTATION)
-#define U_DATA_API     U_IMPORT
-#define U_COMMON_API   U_IMPORT
-#define U_I18N_API     U_EXPORT
-#define U_LAYOUT_API   U_IMPORT
-#define U_LAYOUTEX_API U_IMPORT
-#define U_IO_API       U_IMPORT
-#define U_TOOLUTIL_API U_IMPORT
-#elif defined(U_LAYOUT_IMPLEMENTATION)
-#define U_DATA_API     U_IMPORT
-#define U_COMMON_API   U_IMPORT
-#define U_I18N_API     U_IMPORT
-#define U_LAYOUT_API   U_EXPORT
-#define U_LAYOUTEX_API U_IMPORT
-#define U_IO_API       U_IMPORT
-#define U_TOOLUTIL_API U_IMPORT
-#elif defined(U_LAYOUTEX_IMPLEMENTATION)
-#define U_DATA_API     U_IMPORT
-#define U_COMMON_API   U_IMPORT
-#define U_I18N_API     U_IMPORT
-#define U_LAYOUT_API   U_IMPORT
-#define U_LAYOUTEX_API U_EXPORT
-#define U_IO_API       U_IMPORT
-#define U_TOOLUTIL_API U_IMPORT
-#elif defined(U_IO_IMPLEMENTATION)
-#define U_DATA_API     U_IMPORT
-#define U_COMMON_API   U_IMPORT
-#define U_I18N_API     U_IMPORT
-#define U_LAYOUT_API   U_IMPORT
-#define U_LAYOUTEX_API U_IMPORT
-#define U_IO_API       U_EXPORT
-#define U_TOOLUTIL_API U_IMPORT
-#elif defined(U_TOOLUTIL_IMPLEMENTATION)
-#define U_DATA_API     U_IMPORT
-#define U_COMMON_API   U_IMPORT
-#define U_I18N_API     U_IMPORT
-#define U_LAYOUT_API   U_IMPORT
-#define U_LAYOUTEX_API U_IMPORT
-#define U_IO_API       U_IMPORT
-#define U_TOOLUTIL_API U_EXPORT
-#else
-#define U_DATA_API     U_IMPORT
-#define U_COMMON_API   U_IMPORT
-#define U_I18N_API     U_IMPORT
-#define U_LAYOUT_API   U_IMPORT
-#define U_LAYOUTEX_API U_IMPORT
-#define U_IO_API       U_IMPORT
-#define U_TOOLUTIL_API U_IMPORT
-#endif
-
-/**
- * \def U_STANDARD_CPP_NAMESPACE
- * Control of C++ Namespace
- * @stable ICU 2.0
- */
-#ifdef __cplusplus
-#define U_STANDARD_CPP_NAMESPACE        ::
-#else
-#define U_STANDARD_CPP_NAMESPACE
-#endif
-
-
-/*===========================================================================*/
-/* Global delete operator                                                    */
-/*===========================================================================*/
-
-/*
- * The ICU4C library must not use the global new and delete operators.
- * These operators here are defined to enable testing for this.
- * See Jitterbug 2581 for details of why this is necessary.
- *
- * Verification that ICU4C's memory usage is correct, i.e.,
- * that global new/delete are not used:
- *
- * a) Check for imports of global new/delete (see uobject.cpp for details)
- * b) Verify that new is never imported.
- * c) Verify that delete is only imported from object code for interface/mixin classes.
- * d) Add global delete and delete[] only for the ICU4C library itself
- *    and define them in a way that crashes or otherwise easily shows a problem.
- *
- * The following implements d).
- * The operator implementations crash; this is intentional and used for library debugging.
- *
- * Note: This is currently only done on Windows because
- * some Linux/Unix compilers have problems with defining global new/delete.
- * On Windows, U_WINDOWS is defined, and it is _MSC_VER>=1200 for MSVC 6.0 and higher.
- */
-#if defined(XP_CPLUSPLUS) && defined(U_WINDOWS) && U_DEBUG && U_OVERRIDE_CXX_ALLOCATION && (_MSC_VER>=1200) && !defined(U_STATIC_IMPLEMENTATION) && (defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION))
-
-#ifndef U_HIDE_INTERNAL_API
-/**
- * Global operator new, defined only inside ICU4C, must not be used.
- * Crashes intentionally.
- * @internal
- */
-inline void *
-operator new(size_t /*size*/) {
-    char *q=NULL;
-    *q=5; /* break it */
-    return q;
-}
-
-#ifdef _Ret_bytecap_
-/* This is only needed to suppress a Visual C++ 2008 warning for operator new[]. */
-_Ret_bytecap_(_Size)
-#endif
-/**
- * Global operator new[], defined only inside ICU4C, must not be used.
- * Crashes intentionally.
- * @internal
- */
-inline void *
-operator new[](size_t /*size*/) {
-    char *q=NULL;
-    *q=5; /* break it */
-    return q;
-}
-
-/**
- * Global operator delete, defined only inside ICU4C, must not be used.
- * Crashes intentionally.
- * @internal
- */
-inline void
-operator delete(void * /*p*/) {
-    char *q=NULL;
-    *q=5; /* break it */
-}
-
-/**
- * Global operator delete[], defined only inside ICU4C, must not be used.
- * Crashes intentionally.
- * @internal
- */
-inline void
-operator delete[](void * /*p*/) {
-    char *q=NULL;
-    *q=5; /* break it */
-}
-
-#endif /* U_HIDE_INTERNAL_API */
-#endif
-
-/*===========================================================================*/
-/* UErrorCode */
-/*===========================================================================*/
-
-/**
- * Error code to replace exception handling, so that the code is compatible with all C++ compilers,
- * and to use the same mechanism for C and C++.
- *
- * \par
- * ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode
- * first test if(U_FAILURE(errorCode)) { return immediately; }
- * so that in a chain of such functions the first one that sets an error code
- * causes the following ones to not perform any operations.
- *
- * \par
- * Error codes should be tested using U_FAILURE() and U_SUCCESS().
- * @stable ICU 2.0
- */
-typedef enum UErrorCode {
-    /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
-     * and is that way because VC++ debugger displays first encountered constant,
-     * which is not the what the code is used for
-     */
-
-    U_USING_FALLBACK_WARNING  = -128,   /**< A resource bundle lookup returned a fallback result (not an error) */
-
-    U_ERROR_WARNING_START     = -128,   /**< Start of information results (semantically successful) */
-
-    U_USING_DEFAULT_WARNING   = -127,   /**< A resource bundle lookup returned a result from the root locale (not an error) */
-
-    U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
-
-    U_STATE_OLD_WARNING       = -125,   /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
-
-    U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
-
-    U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
-
-    U_AMBIGUOUS_ALIAS_WARNING = -122,   /**< This converter alias can go to different converter implementations */
-
-    U_DIFFERENT_UCA_VERSION = -121,     /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
-
-    U_ERROR_WARNING_LIMIT,              /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */
-
-
-    U_ZERO_ERROR              =  0,     /**< No error, no warning. */
-
-    U_ILLEGAL_ARGUMENT_ERROR  =  1,     /**< Start of codes indicating failure */
-    U_MISSING_RESOURCE_ERROR  =  2,     /**< The requested resource cannot be found */
-    U_INVALID_FORMAT_ERROR    =  3,     /**< Data format is not what is expected */
-    U_FILE_ACCESS_ERROR       =  4,     /**< The requested file cannot be found */
-    U_INTERNAL_PROGRAM_ERROR  =  5,     /**< Indicates a bug in the library code */
-    U_MESSAGE_PARSE_ERROR     =  6,     /**< Unable to parse a message (message format) */
-    U_MEMORY_ALLOCATION_ERROR =  7,     /**< Memory allocation error */
-    U_INDEX_OUTOFBOUNDS_ERROR =  8,     /**< Trying to access the index that is out of bounds */
-    U_PARSE_ERROR             =  9,     /**< Equivalent to Java ParseException */
-    U_INVALID_CHAR_FOUND      = 10,     /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
-    U_TRUNCATED_CHAR_FOUND    = 11,     /**< Character conversion: Incomplete input sequence. */
-    U_ILLEGAL_CHAR_FOUND      = 12,     /**< Character conversion: Illegal input sequence/combination of input units. */
-    U_INVALID_TABLE_FORMAT    = 13,     /**< Conversion table file found, but corrupted */
-    U_INVALID_TABLE_FILE      = 14,     /**< Conversion table file not found */
-    U_BUFFER_OVERFLOW_ERROR   = 15,     /**< A result would not fit in the supplied buffer */
-    U_UNSUPPORTED_ERROR       = 16,     /**< Requested operation not supported in current context */
-    U_RESOURCE_TYPE_MISMATCH  = 17,     /**< an operation is requested over a resource that does not support it */
-    U_ILLEGAL_ESCAPE_SEQUENCE = 18,     /**< ISO-2022 illlegal escape sequence */
-    U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
-    U_NO_SPACE_AVAILABLE      = 20,     /**< No space available for in-buffer expansion for Arabic shaping */
-    U_CE_NOT_FOUND_ERROR      = 21,     /**< Currently used only while setting variable top, but can be used generally */
-    U_PRIMARY_TOO_LONG_ERROR  = 22,     /**< User tried to set variable top to a primary that is longer than two bytes */
-    U_STATE_TOO_OLD_ERROR     = 23,     /**< ICU cannot construct a service from this state, as it is no longer supported */
-    U_TOO_MANY_ALIASES_ERROR  = 24,     /**< There are too many aliases in the path to the requested resource.
-                                             It is very possible that a circular alias definition has occured */
-    U_ENUM_OUT_OF_SYNC_ERROR  = 25,     /**< UEnumeration out of sync with underlying collection */
-    U_INVARIANT_CONVERSION_ERROR = 26,  /**< Unable to convert a UChar* string to char* with the invariant converter. */
-    U_INVALID_STATE_ERROR     = 27,     /**< Requested operation can not be completed with ICU in its current state */
-    U_COLLATOR_VERSION_MISMATCH = 28,   /**< Collator version is not compatible with the base version */
-    U_USELESS_COLLATOR_ERROR  = 29,     /**< Collator is options only and no base is specified */
-    U_NO_WRITE_PERMISSION     = 30,     /**< Attempt to modify read-only or constant data. */
-
-    U_STANDARD_ERROR_LIMIT,             /**< This must always be the last value to indicate the limit for standard errors */
-    /*
-     * the error code range 0x10000 0x10100 are reserved for Transliterator
-     */
-    U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
-    U_PARSE_ERROR_START = 0x10000,    /**< Start of Transliterator errors */
-    U_MALFORMED_RULE,                 /**< Elements of a rule are misplaced */
-    U_MALFORMED_SET,                  /**< A UnicodeSet pattern is invalid*/
-    U_MALFORMED_SYMBOL_REFERENCE,     /**< UNUSED as of ICU 2.4 */
-    U_MALFORMED_UNICODE_ESCAPE,       /**< A Unicode escape pattern is invalid*/
-    U_MALFORMED_VARIABLE_DEFINITION,  /**< A variable definition is invalid */
-    U_MALFORMED_VARIABLE_REFERENCE,   /**< A variable reference is invalid */
-    U_MISMATCHED_SEGMENT_DELIMITERS,  /**< UNUSED as of ICU 2.4 */
-    U_MISPLACED_ANCHOR_START,         /**< A start anchor appears at an illegal position */
-    U_MISPLACED_CURSOR_OFFSET,        /**< A cursor offset occurs at an illegal position */
-    U_MISPLACED_QUANTIFIER,           /**< A quantifier appears after a segment close delimiter */
-    U_MISSING_OPERATOR,               /**< A rule contains no operator */
-    U_MISSING_SEGMENT_CLOSE,          /**< UNUSED as of ICU 2.4 */
-    U_MULTIPLE_ANTE_CONTEXTS,         /**< More than one ante context */
-    U_MULTIPLE_CURSORS,               /**< More than one cursor */
-    U_MULTIPLE_POST_CONTEXTS,         /**< More than one post context */
-    U_TRAILING_BACKSLASH,             /**< A dangling backslash */
-    U_UNDEFINED_SEGMENT_REFERENCE,    /**< A segment reference does not correspond to a defined segment */
-    U_UNDEFINED_VARIABLE,             /**< A variable reference does not correspond to a defined variable */
-    U_UNQUOTED_SPECIAL,               /**< A special character was not quoted or escaped */
-    U_UNTERMINATED_QUOTE,             /**< A closing single quote is missing */
-    U_RULE_MASK_ERROR,                /**< A rule is hidden by an earlier more general rule */
-    U_MISPLACED_COMPOUND_FILTER,      /**< A compound filter is in an invalid location */
-    U_MULTIPLE_COMPOUND_FILTERS,      /**< More than one compound filter */
-    U_INVALID_RBT_SYNTAX,             /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
-    U_INVALID_PROPERTY_PATTERN,       /**< UNUSED as of ICU 2.4 */
-    U_MALFORMED_PRAGMA,               /**< A 'use' pragma is invlalid */
-    U_UNCLOSED_SEGMENT,               /**< A closing ')' is missing */
-    U_ILLEGAL_CHAR_IN_SEGMENT,        /**< UNUSED as of ICU 2.4 */
-    U_VARIABLE_RANGE_EXHAUSTED,       /**< Too many stand-ins generated for the given variable range */
-    U_VARIABLE_RANGE_OVERLAP,         /**< The variable range overlaps characters used in rules */
-    U_ILLEGAL_CHARACTER,              /**< A special character is outside its allowed context */
-    U_INTERNAL_TRANSLITERATOR_ERROR,  /**< Internal transliterator system error */
-    U_INVALID_ID,                     /**< A "::id" rule specifies an unknown transliterator */
-    U_INVALID_FUNCTION,               /**< A "&fn()" rule specifies an unknown transliterator */
-    U_PARSE_ERROR_LIMIT,              /**< The limit for Transliterator errors */
-
-    /*
-     * the error code range 0x10100 0x10200 are reserved for formatting API parsing error
-     */
-    U_UNEXPECTED_TOKEN=0x10100,       /**< Syntax error in format pattern */
-    U_FMT_PARSE_ERROR_START=0x10100,  /**< Start of format library errors */
-    U_MULTIPLE_DECIMAL_SEPARATORS,    /**< More than one decimal separator in number pattern */
-    U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
-    U_MULTIPLE_EXPONENTIAL_SYMBOLS,   /**< More than one exponent symbol in number pattern */
-    U_MALFORMED_EXPONENTIAL_PATTERN,  /**< Grouping symbol in exponent pattern */
-    U_MULTIPLE_PERCENT_SYMBOLS,       /**< More than one percent symbol in number pattern */
-    U_MULTIPLE_PERMILL_SYMBOLS,       /**< More than one permill symbol in number pattern */
-    U_MULTIPLE_PAD_SPECIFIERS,        /**< More than one pad symbol in number pattern */
-    U_PATTERN_SYNTAX_ERROR,           /**< Syntax error in format pattern */
-    U_ILLEGAL_PAD_POSITION,           /**< Pad symbol misplaced in number pattern */
-    U_UNMATCHED_BRACES,               /**< Braces do not match in message pattern */
-    U_UNSUPPORTED_PROPERTY,           /**< UNUSED as of ICU 2.4 */
-    U_UNSUPPORTED_ATTRIBUTE,          /**< UNUSED as of ICU 2.4 */
-    U_ARGUMENT_TYPE_MISMATCH,         /**< Argument name and argument index mismatch in MessageFormat functions */
-    U_DUPLICATE_KEYWORD,              /**< Duplicate keyword in PluralFormat */
-    U_UNDEFINED_KEYWORD,              /**< Undefined Pluarl keyword */
-    U_DEFAULT_KEYWORD_MISSING,        /**< Missing DEFAULT rule in plural rules */
-    U_FMT_PARSE_ERROR_LIMIT,          /**< The limit for format library errors */
-
-    /*
-     * the error code range 0x10200 0x102ff are reserved for Break Iterator related error
-     */
-    U_BRK_INTERNAL_ERROR=0x10200,          /**< An internal error (bug) was detected.             */
-    U_BRK_ERROR_START=0x10200,             /**< Start of codes indicating Break Iterator failures */
-    U_BRK_HEX_DIGITS_EXPECTED,             /**< Hex digits expected as part of a escaped char in a rule. */
-    U_BRK_SEMICOLON_EXPECTED,              /**< Missing ';' at the end of a RBBI rule.            */
-    U_BRK_RULE_SYNTAX,                     /**< Syntax error in RBBI rule.                        */
-    U_BRK_UNCLOSED_SET,                    /**< UnicodeSet witing an RBBI rule missing a closing ']'.  */
-    U_BRK_ASSIGN_ERROR,                    /**< Syntax error in RBBI rule assignment statement.   */
-    U_BRK_VARIABLE_REDFINITION,            /**< RBBI rule $Variable redefined.                    */
-    U_BRK_MISMATCHED_PAREN,                /**< Mis-matched parentheses in an RBBI rule.          */
-    U_BRK_NEW_LINE_IN_QUOTED_STRING,       /**< Missing closing quote in an RBBI rule.            */
-    U_BRK_UNDEFINED_VARIABLE,              /**< Use of an undefined $Variable in an RBBI rule.    */
-    U_BRK_INIT_ERROR,                      /**< Initialization failure.  Probable missing ICU Data. */
-    U_BRK_RULE_EMPTY_SET,                  /**< Rule contains an empty Unicode Set.               */
-    U_BRK_UNRECOGNIZED_OPTION,             /**< !!option in RBBI rules not recognized.            */
-    U_BRK_MALFORMED_RULE_TAG,              /**< The {nnn} tag on a rule is mal formed             */
-    U_BRK_ERROR_LIMIT,                     /**< This must always be the last value to indicate the limit for Break Iterator failures */
-
-    /*
-     * The error codes in the range 0x10300-0x103ff are reserved for regular expression related errrs
-     */
-    U_REGEX_INTERNAL_ERROR=0x10300,       /**< An internal error (bug) was detected.              */
-    U_REGEX_ERROR_START=0x10300,          /**< Start of codes indicating Regexp failures          */
-    U_REGEX_RULE_SYNTAX,                  /**< Syntax error in regexp pattern.                    */
-    U_REGEX_INVALID_STATE,                /**< RegexMatcher in invalid state for requested operation */
-    U_REGEX_BAD_ESCAPE_SEQUENCE,          /**< Unrecognized backslash escape sequence in pattern  */
-    U_REGEX_PROPERTY_SYNTAX,              /**< Incorrect Unicode property                         */
-    U_REGEX_UNIMPLEMENTED,                /**< Use of regexp feature that is not yet implemented. */
-    U_REGEX_MISMATCHED_PAREN,             /**< Incorrectly nested parentheses in regexp pattern.  */
-    U_REGEX_NUMBER_TOO_BIG,               /**< Decimal number is too large.                       */
-    U_REGEX_BAD_INTERVAL,                 /**< Error in {min,max} interval                        */
-    U_REGEX_MAX_LT_MIN,                   /**< In {min,max}, max is less than min.                */
-    U_REGEX_INVALID_BACK_REF,             /**< Back-reference to a non-existent capture group.    */
-    U_REGEX_INVALID_FLAG,                 /**< Invalid value for match mode flags.                */
-    U_REGEX_LOOK_BEHIND_LIMIT,            /**< Look-Behind pattern matches must have a bounded maximum length.    */
-    U_REGEX_SET_CONTAINS_STRING,          /**< Regexps cannot have UnicodeSets containing strings.*/
-    U_REGEX_OCTAL_TOO_BIG,                /**< Octal character constants must be <= 0377.         */
-    U_REGEX_MISSING_CLOSE_BRACKET,        /**< Missing closing bracket on a bracket expression.   */
-    U_REGEX_INVALID_RANGE,                /**< In a character range [x-y], x is greater than y.   */
-    U_REGEX_STACK_OVERFLOW,               /**< Regular expression backtrack stack overflow.       */
-    U_REGEX_TIME_OUT,                     /**< Maximum allowed match time exceeded                */
-    U_REGEX_STOPPED_BY_CALLER,            /**< Matching operation aborted by user callback fn.    */
-    U_REGEX_ERROR_LIMIT,                  /**< This must always be the last value to indicate the limit for regexp errors */
-
-    /*
-     * The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes
-     */
-    U_IDNA_PROHIBITED_ERROR=0x10400,
-    U_IDNA_ERROR_START=0x10400,
-    U_IDNA_UNASSIGNED_ERROR,
-    U_IDNA_CHECK_BIDI_ERROR,
-    U_IDNA_STD3_ASCII_RULES_ERROR,
-    U_IDNA_ACE_PREFIX_ERROR,
-    U_IDNA_VERIFICATION_ERROR,
-    U_IDNA_LABEL_TOO_LONG_ERROR,
-    U_IDNA_ZERO_LENGTH_LABEL_ERROR,
-    U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
-    U_IDNA_ERROR_LIMIT,
-    /*
-     * Aliases for StringPrep
-     */
-    U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
-    U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
-    U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
-
-
-    U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT      /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
-} UErrorCode;
-
-/* Use the following to determine if an UErrorCode represents */
-/* operational success or failure. */
-
-#ifdef XP_CPLUSPLUS
-    /**
-     * Does the error code indicate success?
-     * @stable ICU 2.0
-     */
-    static
-    inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
-    /**
-     * Does the error code indicate a failure?
-     * @stable ICU 2.0
-     */
-    static
-    inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
-#else
-    /**
-     * Does the error code indicate success?
-     * @stable ICU 2.0
-     */
-#   define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
-    /**
-     * Does the error code indicate a failure?
-     * @stable ICU 2.0
-     */
-#   define U_FAILURE(x) ((x)>U_ZERO_ERROR)
-#endif
-
-/**
- * Return a string for a UErrorCode value.
- * The string will be the same as the name of the error code constant
- * in the UErrorCode enum above.
- * @stable ICU 2.0
- */
-U_STABLE const char * U_EXPORT2
-u_errorName(UErrorCode code);
-
-
-#endif /* _UTYPES */

Copied: MacRuby/trunk/icu-1060/unicode/utypes.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/utypes.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/utypes.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/utypes.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,801 @@
+/*
+**********************************************************************
+*   Copyright (C) 1996-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+*  FILE NAME : UTYPES.H (formerly ptypes.h)
+*
+*   Date        Name        Description
+*   12/11/96    helena      Creation.
+*   02/27/97    aliu        Added typedefs for UClassID, int8, int16, int32,
+*                           uint8, uint16, and uint32.
+*   04/01/97    aliu        Added XP_CPLUSPLUS and modified to work under C as
+*                            well as C++.
+*                           Modified to use memcpy() for uprv_arrayCopy() fns.
+*   04/14/97    aliu        Added TPlatformUtilities.
+*   05/07/97    aliu        Added import/export specifiers (replacing the old
+*                           broken EXT_CLASS).  Added version number for our
+*                           code.  Cleaned up header.
+*    6/20/97    helena      Java class name change.
+*   08/11/98    stephen     UErrorCode changed from typedef to enum
+*   08/12/98    erm         Changed T_ANALYTIC_PACKAGE_VERSION to 3
+*   08/14/98    stephen     Added uprv_arrayCopy() for int8_t, int16_t, int32_t
+*   12/09/98    jfitz       Added BUFFER_OVERFLOW_ERROR (bug 1100066)
+*   04/20/99    stephen     Cleaned up & reworked for autoconf.
+*                           Renamed to utypes.h.
+*   05/05/99    stephen     Changed to use <inttypes.h>
+*   12/07/99    helena      Moved copyright notice string from ucnv_bld.h here.
+*******************************************************************************
+*/
+
+#ifndef UTYPES_H
+#define UTYPES_H
+
+
+#include "unicode/umachine.h"
+#include "unicode/utf.h"
+#include "unicode/uversion.h"
+#include "unicode/uconfig.h"
+
+#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
+#define U_HIDE_DRAFT_API 1
+#endif
+
+#ifdef U_HIDE_DRAFT_API
+#include "unicode/udraft.h"
+#endif
+
+#ifdef U_HIDE_DEPRECATED_API
+#include "unicode/udeprctd.h"
+#endif
+
+#ifdef U_HIDE_DEPRECATED_API
+#include "unicode/uobslete.h"
+#endif
+
+#ifdef U_HIDE_INTERNAL_API
+#include "unicode/uintrnal.h"
+#endif
+
+#ifdef U_HIDE_SYSTEM_API
+#include "unicode/usystem.h"
+#endif
+
+/*!
+ * \file
+ * \brief Basic definitions for ICU, for both C and C++ APIs
+ *
+ * This file defines basic types, constants, and enumerations directly or
+ * indirectly by including other header files, especially utf.h for the
+ * basic character and string definitions and umachine.h for consistent
+ * integer and other types.
+ */
+
+/*===========================================================================*/
+/* char Character set family                                                 */
+/*===========================================================================*/
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
+ * @stable ICU 2.0
+ */
+#define U_ASCII_FAMILY 0
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
+ * @stable ICU 2.0
+ */
+#define U_EBCDIC_FAMILY 1
+
+/**
+ * \def U_CHARSET_FAMILY
+ *
+ * <p>These definitions allow to specify the encoding of text
+ * in the char data type as defined by the platform and the compiler.
+ * It is enough to determine the code point values of "invariant characters",
+ * which are the ones shared by all encodings that are in use
+ * on a given platform.</p>
+ *
+ * <p>Those "invariant characters" should be all the uppercase and lowercase
+ * latin letters, the digits, the space, and "basic punctuation".
+ * Also, '\\n', '\\r', '\\t' should be available.</p>
+ *
+ * <p>The list of "invariant characters" is:<br>
+ * \code
+ *    A-Z  a-z  0-9  SPACE  "  %  &amp;  '  (  )  *  +  ,  -  .  /  :  ;  <  =  >  ?  _
+ * \endcode
+ * <br>
+ * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
+ *
+ * <p>This matches the IBM Syntactic Character Set (CS 640).</p>
+ *
+ * <p>In other words, all the graphic characters in 7-bit ASCII should
+ * be safely accessible except the following:</p>
+ *
+ * \code
+ *    '\' <backslash>
+ *    '[' <left bracket>
+ *    ']' <right bracket>
+ *    '{' <left brace>
+ *    '}' <right brace>
+ *    '^' <circumflex>
+ *    '~' <tilde>
+ *    '!' <exclamation mark>
+ *    '#' <number sign>
+ *    '|' <vertical line>
+ *    '$' <dollar sign>
+ *    '@' <commercial at>
+ *    '`' <grave accent>
+ * \endcode
+ * @stable ICU 2.0
+ */
+
+#ifndef U_CHARSET_FAMILY
+#   define U_CHARSET_FAMILY 0
+#endif
+
+/*===========================================================================*/
+/* ICUDATA naming scheme                                                     */
+/*===========================================================================*/
+
+/**
+ * \def U_ICUDATA_TYPE_LETTER
+ *
+ * This is a platform-dependent string containing one letter:
+ * - b for big-endian, ASCII-family platforms
+ * - l for little-endian, ASCII-family platforms
+ * - e for big-endian, EBCDIC-family platforms
+ * This letter is part of the common data file name.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_ICUDATA_TYPE_LITLETTER
+ * The non-string form of U_ICUDATA_TYPE_LETTER
+ * @stable ICU 2.0
+ */
+#if U_CHARSET_FAMILY
+#   if U_IS_BIG_ENDIAN
+   /* EBCDIC - should always be BE */
+#     define U_ICUDATA_TYPE_LETTER "e"
+#     define U_ICUDATA_TYPE_LITLETTER e
+#   else
+#     error "Don't know what to do with little endian EBCDIC!"
+#     define U_ICUDATA_TYPE_LETTER "x"
+#     define U_ICUDATA_TYPE_LITLETTER x
+#   endif
+#else
+#   if U_IS_BIG_ENDIAN
+      /* Big-endian ASCII */
+#     define U_ICUDATA_TYPE_LETTER "b"
+#     define U_ICUDATA_TYPE_LITLETTER b
+#   else
+      /* Little-endian ASCII */
+#     define U_ICUDATA_TYPE_LETTER "l"
+#     define U_ICUDATA_TYPE_LITLETTER l
+#   endif
+#endif
+
+/**
+ * A single string literal containing the icudata stub name. i.e. 'icudt18e' for
+ * ICU 1.8.x on EBCDIC, etc..
+ * @stable ICU 2.0
+ */
+#define U_ICUDATA_NAME    "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
+
+
+/**
+ *  U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
+ *    Defined as a literal, not a string.
+ *    Tricky Preprocessor use - ## operator replaces macro paramters with the literal string
+ *                              from the corresponding macro invocation, _before_ other macro substitutions.
+ *                              Need a nested \#defines to get the actual version numbers rather than
+ *                              the literal text U_ICU_VERSION_MAJOR_NUM into the name.
+ *                              The net result will be something of the form
+ *                                  \#define U_ICU_ENTRY_POINT icudt19_dat
+ * @stable ICU 2.4
+ */
+#define U_ICUDATA_ENTRY_POINT  U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM)
+
+/**
+ * Do not use.
+ * @internal
+ */
+#define U_DEF2_ICUDATA_ENTRY_POINT(major, minor) U_DEF_ICUDATA_ENTRY_POINT(major, minor)
+/**
+ * Do not use.
+ * @internal
+ */
+#define U_DEF_ICUDATA_ENTRY_POINT(major, minor) icudt##major##minor##_dat
+
+/**
+ * \def U_CALLCONV
+ * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
+ * in callback function typedefs to make sure that the calling convention
+ * is compatible.
+ *
+ * This is only used for non-ICU-API functions.
+ * When a function is a public ICU API,
+ * you must use the U_CAPI and U_EXPORT2 qualifiers.
+ * @stable ICU 2.0
+ */
+#if defined(OS390) && (__COMPILER_VER__ < 0x41020000) && defined(XP_CPLUSPLUS)
+#    define U_CALLCONV __cdecl
+#else
+#    define U_CALLCONV U_EXPORT2
+#endif
+
+/**
+ * \def NULL
+ * Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
+ * @stable ICU 2.0
+ */
+#ifndef NULL
+#ifdef XP_CPLUSPLUS
+#define NULL    0
+#else
+#define NULL    ((void *)0)
+#endif
+#endif
+
+/*===========================================================================*/
+/* Calendar/TimeZone data types                                              */
+/*===========================================================================*/
+
+/**
+ * Date and Time data type.
+ * This is a primitive data type that holds the date and time
+ * as the number of milliseconds since 1970-jan-01, 00:00 UTC.
+ * UTC leap seconds are ignored.
+ * @stable ICU 2.0
+ */
+typedef double UDate;
+
+/** The number of milliseconds per second @stable ICU 2.0 */
+#define U_MILLIS_PER_SECOND        (1000)
+/** The number of milliseconds per minute @stable ICU 2.0 */
+#define U_MILLIS_PER_MINUTE       (60000)
+/** The number of milliseconds per hour @stable ICU 2.0 */
+#define U_MILLIS_PER_HOUR       (3600000)
+/** The number of milliseconds per day @stable ICU 2.0 */
+#define U_MILLIS_PER_DAY       (86400000)
+
+
+/*===========================================================================*/
+/* UClassID-based RTTI */
+/*===========================================================================*/
+
+/**
+ * UClassID is used to identify classes without using RTTI, since RTTI
+ * is not yet supported by all C++ compilers.  Each class hierarchy which needs
+ * to implement polymorphic clone() or operator==() defines two methods,
+ * described in detail below.  UClassID values can be compared using
+ * operator==(). Nothing else should be done with them.
+ *
+ * \par
+ * getDynamicClassID() is declared in the base class of the hierarchy as
+ * a pure virtual.  Each concrete subclass implements it in the same way:
+ *
+ * \code
+ *      class Base {
+ *      public:
+ *          virtual UClassID getDynamicClassID() const = 0;
+ *      }
+ *
+ *      class Derived {
+ *      public:
+ *          virtual UClassID getDynamicClassID() const
+ *            { return Derived::getStaticClassID(); }
+ *      }
+ * \endcode
+ *
+ * Each concrete class implements getStaticClassID() as well, which allows
+ * clients to test for a specific type.
+ *
+ * \code
+ *      class Derived {
+ *      public:
+ *          static UClassID U_EXPORT2 getStaticClassID();
+ *      private:
+ *          static char fgClassID;
+ *      }
+ *
+ *      // In Derived.cpp:
+ *      UClassID Derived::getStaticClassID()
+ *        { return (UClassID)&Derived::fgClassID; }
+ *      char Derived::fgClassID = 0; // Value is irrelevant
+ * \endcode
+ * @stable ICU 2.0
+ */
+typedef void* UClassID;
+
+/*===========================================================================*/
+/* Shared library/DLL import-export API control                              */
+/*===========================================================================*/
+
+/*
+ * Control of symbol import/export.
+ * ICU is separated into three libraries.
+ */
+
+/*
+ * \def U_COMBINED_IMPLEMENTATION
+ * Set to export library symbols from inside the ICU library
+ * when all of ICU is in a single library.
+ * This can be set as a compiler option while building ICU, and it
+ * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_DATA_API
+ * Set to export library symbols from inside the stubdata library,
+ * and to import them from outside.
+ * @stable ICU 3.0
+ */
+
+/**
+ * \def U_COMMON_API
+ * Set to export library symbols from inside the common library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_I18N_API
+ * Set to export library symbols from inside the i18n library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_LAYOUT_API
+ * Set to export library symbols from inside the layout engine library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_LAYOUTEX_API
+ * Set to export library symbols from inside the layout extensions library,
+ * and to import them from outside.
+ * @stable ICU 2.6
+ */
+
+/**
+ * \def U_IO_API
+ * Set to export library symbols from inside the ustdio library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_TOOLUTIL_API
+ * Set to export library symbols from inside the toolutil library,
+ * and to import them from outside.
+ * @stable ICU 3.4
+ */
+
+#if defined(U_COMBINED_IMPLEMENTATION)
+#define U_DATA_API     U_EXPORT
+#define U_COMMON_API   U_EXPORT
+#define U_I18N_API     U_EXPORT
+#define U_LAYOUT_API   U_EXPORT
+#define U_LAYOUTEX_API U_EXPORT
+#define U_IO_API       U_EXPORT
+#define U_TOOLUTIL_API U_EXPORT
+#elif defined(U_STATIC_IMPLEMENTATION)
+#define U_DATA_API
+#define U_COMMON_API
+#define U_I18N_API
+#define U_LAYOUT_API
+#define U_LAYOUTEX_API
+#define U_IO_API
+#define U_TOOLUTIL_API
+#elif defined(U_COMMON_IMPLEMENTATION)
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_EXPORT
+#define U_I18N_API     U_IMPORT
+#define U_LAYOUT_API   U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API       U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_I18N_IMPLEMENTATION)
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_IMPORT
+#define U_I18N_API     U_EXPORT
+#define U_LAYOUT_API   U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API       U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_LAYOUT_IMPLEMENTATION)
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_IMPORT
+#define U_I18N_API     U_IMPORT
+#define U_LAYOUT_API   U_EXPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API       U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_LAYOUTEX_IMPLEMENTATION)
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_IMPORT
+#define U_I18N_API     U_IMPORT
+#define U_LAYOUT_API   U_IMPORT
+#define U_LAYOUTEX_API U_EXPORT
+#define U_IO_API       U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_IO_IMPLEMENTATION)
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_IMPORT
+#define U_I18N_API     U_IMPORT
+#define U_LAYOUT_API   U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API       U_EXPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_TOOLUTIL_IMPLEMENTATION)
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_IMPORT
+#define U_I18N_API     U_IMPORT
+#define U_LAYOUT_API   U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API       U_IMPORT
+#define U_TOOLUTIL_API U_EXPORT
+#else
+#define U_DATA_API     U_IMPORT
+#define U_COMMON_API   U_IMPORT
+#define U_I18N_API     U_IMPORT
+#define U_LAYOUT_API   U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API       U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#endif
+
+/**
+ * \def U_STANDARD_CPP_NAMESPACE
+ * Control of C++ Namespace
+ * @stable ICU 2.0
+ */
+#ifdef __cplusplus
+#define U_STANDARD_CPP_NAMESPACE        ::
+#else
+#define U_STANDARD_CPP_NAMESPACE
+#endif
+
+
+/*===========================================================================*/
+/* Global delete operator                                                    */
+/*===========================================================================*/
+
+/*
+ * The ICU4C library must not use the global new and delete operators.
+ * These operators here are defined to enable testing for this.
+ * See Jitterbug 2581 for details of why this is necessary.
+ *
+ * Verification that ICU4C's memory usage is correct, i.e.,
+ * that global new/delete are not used:
+ *
+ * a) Check for imports of global new/delete (see uobject.cpp for details)
+ * b) Verify that new is never imported.
+ * c) Verify that delete is only imported from object code for interface/mixin classes.
+ * d) Add global delete and delete[] only for the ICU4C library itself
+ *    and define them in a way that crashes or otherwise easily shows a problem.
+ *
+ * The following implements d).
+ * The operator implementations crash; this is intentional and used for library debugging.
+ *
+ * Note: This is currently only done on Windows because
+ * some Linux/Unix compilers have problems with defining global new/delete.
+ * On Windows, U_WINDOWS is defined, and it is _MSC_VER>=1200 for MSVC 6.0 and higher.
+ */
+#if defined(XP_CPLUSPLUS) && defined(U_WINDOWS) && U_DEBUG && U_OVERRIDE_CXX_ALLOCATION && (_MSC_VER>=1200) && !defined(U_STATIC_IMPLEMENTATION) && (defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION))
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Global operator new, defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void *
+operator new(size_t /*size*/) {
+    char *q=NULL;
+    *q=5; /* break it */
+    return q;
+}
+
+#ifdef _Ret_bytecap_
+/* This is only needed to suppress a Visual C++ 2008 warning for operator new[]. */
+_Ret_bytecap_(_Size)
+#endif
+/**
+ * Global operator new[], defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void *
+operator new[](size_t /*size*/) {
+    char *q=NULL;
+    *q=5; /* break it */
+    return q;
+}
+
+/**
+ * Global operator delete, defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void
+operator delete(void * /*p*/) {
+    char *q=NULL;
+    *q=5; /* break it */
+}
+
+/**
+ * Global operator delete[], defined only inside ICU4C, must not be used.
+ * Crashes intentionally.
+ * @internal
+ */
+inline void
+operator delete[](void * /*p*/) {
+    char *q=NULL;
+    *q=5; /* break it */
+}
+
+#endif /* U_HIDE_INTERNAL_API */
+#endif
+
+/*===========================================================================*/
+/* UErrorCode */
+/*===========================================================================*/
+
+/**
+ * Error code to replace exception handling, so that the code is compatible with all C++ compilers,
+ * and to use the same mechanism for C and C++.
+ *
+ * \par
+ * ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode
+ * first test if(U_FAILURE(errorCode)) { return immediately; }
+ * so that in a chain of such functions the first one that sets an error code
+ * causes the following ones to not perform any operations.
+ *
+ * \par
+ * Error codes should be tested using U_FAILURE() and U_SUCCESS().
+ * @stable ICU 2.0
+ */
+typedef enum UErrorCode {
+    /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
+     * and is that way because VC++ debugger displays first encountered constant,
+     * which is not the what the code is used for
+     */
+
+    U_USING_FALLBACK_WARNING  = -128,   /**< A resource bundle lookup returned a fallback result (not an error) */
+
+    U_ERROR_WARNING_START     = -128,   /**< Start of information results (semantically successful) */
+
+    U_USING_DEFAULT_WARNING   = -127,   /**< A resource bundle lookup returned a result from the root locale (not an error) */
+
+    U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
+
+    U_STATE_OLD_WARNING       = -125,   /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
+
+    U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
+
+    U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
+
+    U_AMBIGUOUS_ALIAS_WARNING = -122,   /**< This converter alias can go to different converter implementations */
+
+    U_DIFFERENT_UCA_VERSION = -121,     /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
+
+    U_ERROR_WARNING_LIMIT,              /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */
+
+
+    U_ZERO_ERROR              =  0,     /**< No error, no warning. */
+
+    U_ILLEGAL_ARGUMENT_ERROR  =  1,     /**< Start of codes indicating failure */
+    U_MISSING_RESOURCE_ERROR  =  2,     /**< The requested resource cannot be found */
+    U_INVALID_FORMAT_ERROR    =  3,     /**< Data format is not what is expected */
+    U_FILE_ACCESS_ERROR       =  4,     /**< The requested file cannot be found */
+    U_INTERNAL_PROGRAM_ERROR  =  5,     /**< Indicates a bug in the library code */
+    U_MESSAGE_PARSE_ERROR     =  6,     /**< Unable to parse a message (message format) */
+    U_MEMORY_ALLOCATION_ERROR =  7,     /**< Memory allocation error */
+    U_INDEX_OUTOFBOUNDS_ERROR =  8,     /**< Trying to access the index that is out of bounds */
+    U_PARSE_ERROR             =  9,     /**< Equivalent to Java ParseException */
+    U_INVALID_CHAR_FOUND      = 10,     /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
+    U_TRUNCATED_CHAR_FOUND    = 11,     /**< Character conversion: Incomplete input sequence. */
+    U_ILLEGAL_CHAR_FOUND      = 12,     /**< Character conversion: Illegal input sequence/combination of input units. */
+    U_INVALID_TABLE_FORMAT    = 13,     /**< Conversion table file found, but corrupted */
+    U_INVALID_TABLE_FILE      = 14,     /**< Conversion table file not found */
+    U_BUFFER_OVERFLOW_ERROR   = 15,     /**< A result would not fit in the supplied buffer */
+    U_UNSUPPORTED_ERROR       = 16,     /**< Requested operation not supported in current context */
+    U_RESOURCE_TYPE_MISMATCH  = 17,     /**< an operation is requested over a resource that does not support it */
+    U_ILLEGAL_ESCAPE_SEQUENCE = 18,     /**< ISO-2022 illlegal escape sequence */
+    U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
+    U_NO_SPACE_AVAILABLE      = 20,     /**< No space available for in-buffer expansion for Arabic shaping */
+    U_CE_NOT_FOUND_ERROR      = 21,     /**< Currently used only while setting variable top, but can be used generally */
+    U_PRIMARY_TOO_LONG_ERROR  = 22,     /**< User tried to set variable top to a primary that is longer than two bytes */
+    U_STATE_TOO_OLD_ERROR     = 23,     /**< ICU cannot construct a service from this state, as it is no longer supported */
+    U_TOO_MANY_ALIASES_ERROR  = 24,     /**< There are too many aliases in the path to the requested resource.
+                                             It is very possible that a circular alias definition has occured */
+    U_ENUM_OUT_OF_SYNC_ERROR  = 25,     /**< UEnumeration out of sync with underlying collection */
+    U_INVARIANT_CONVERSION_ERROR = 26,  /**< Unable to convert a UChar* string to char* with the invariant converter. */
+    U_INVALID_STATE_ERROR     = 27,     /**< Requested operation can not be completed with ICU in its current state */
+    U_COLLATOR_VERSION_MISMATCH = 28,   /**< Collator version is not compatible with the base version */
+    U_USELESS_COLLATOR_ERROR  = 29,     /**< Collator is options only and no base is specified */
+    U_NO_WRITE_PERMISSION     = 30,     /**< Attempt to modify read-only or constant data. */
+
+    U_STANDARD_ERROR_LIMIT,             /**< This must always be the last value to indicate the limit for standard errors */
+    /*
+     * the error code range 0x10000 0x10100 are reserved for Transliterator
+     */
+    U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
+    U_PARSE_ERROR_START = 0x10000,    /**< Start of Transliterator errors */
+    U_MALFORMED_RULE,                 /**< Elements of a rule are misplaced */
+    U_MALFORMED_SET,                  /**< A UnicodeSet pattern is invalid*/
+    U_MALFORMED_SYMBOL_REFERENCE,     /**< UNUSED as of ICU 2.4 */
+    U_MALFORMED_UNICODE_ESCAPE,       /**< A Unicode escape pattern is invalid*/
+    U_MALFORMED_VARIABLE_DEFINITION,  /**< A variable definition is invalid */
+    U_MALFORMED_VARIABLE_REFERENCE,   /**< A variable reference is invalid */
+    U_MISMATCHED_SEGMENT_DELIMITERS,  /**< UNUSED as of ICU 2.4 */
+    U_MISPLACED_ANCHOR_START,         /**< A start anchor appears at an illegal position */
+    U_MISPLACED_CURSOR_OFFSET,        /**< A cursor offset occurs at an illegal position */
+    U_MISPLACED_QUANTIFIER,           /**< A quantifier appears after a segment close delimiter */
+    U_MISSING_OPERATOR,               /**< A rule contains no operator */
+    U_MISSING_SEGMENT_CLOSE,          /**< UNUSED as of ICU 2.4 */
+    U_MULTIPLE_ANTE_CONTEXTS,         /**< More than one ante context */
+    U_MULTIPLE_CURSORS,               /**< More than one cursor */
+    U_MULTIPLE_POST_CONTEXTS,         /**< More than one post context */
+    U_TRAILING_BACKSLASH,             /**< A dangling backslash */
+    U_UNDEFINED_SEGMENT_REFERENCE,    /**< A segment reference does not correspond to a defined segment */
+    U_UNDEFINED_VARIABLE,             /**< A variable reference does not correspond to a defined variable */
+    U_UNQUOTED_SPECIAL,               /**< A special character was not quoted or escaped */
+    U_UNTERMINATED_QUOTE,             /**< A closing single quote is missing */
+    U_RULE_MASK_ERROR,                /**< A rule is hidden by an earlier more general rule */
+    U_MISPLACED_COMPOUND_FILTER,      /**< A compound filter is in an invalid location */
+    U_MULTIPLE_COMPOUND_FILTERS,      /**< More than one compound filter */
+    U_INVALID_RBT_SYNTAX,             /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
+    U_INVALID_PROPERTY_PATTERN,       /**< UNUSED as of ICU 2.4 */
+    U_MALFORMED_PRAGMA,               /**< A 'use' pragma is invlalid */
+    U_UNCLOSED_SEGMENT,               /**< A closing ')' is missing */
+    U_ILLEGAL_CHAR_IN_SEGMENT,        /**< UNUSED as of ICU 2.4 */
+    U_VARIABLE_RANGE_EXHAUSTED,       /**< Too many stand-ins generated for the given variable range */
+    U_VARIABLE_RANGE_OVERLAP,         /**< The variable range overlaps characters used in rules */
+    U_ILLEGAL_CHARACTER,              /**< A special character is outside its allowed context */
+    U_INTERNAL_TRANSLITERATOR_ERROR,  /**< Internal transliterator system error */
+    U_INVALID_ID,                     /**< A "::id" rule specifies an unknown transliterator */
+    U_INVALID_FUNCTION,               /**< A "&fn()" rule specifies an unknown transliterator */
+    U_PARSE_ERROR_LIMIT,              /**< The limit for Transliterator errors */
+
+    /*
+     * the error code range 0x10100 0x10200 are reserved for formatting API parsing error
+     */
+    U_UNEXPECTED_TOKEN=0x10100,       /**< Syntax error in format pattern */
+    U_FMT_PARSE_ERROR_START=0x10100,  /**< Start of format library errors */
+    U_MULTIPLE_DECIMAL_SEPARATORS,    /**< More than one decimal separator in number pattern */
+    U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
+    U_MULTIPLE_EXPONENTIAL_SYMBOLS,   /**< More than one exponent symbol in number pattern */
+    U_MALFORMED_EXPONENTIAL_PATTERN,  /**< Grouping symbol in exponent pattern */
+    U_MULTIPLE_PERCENT_SYMBOLS,       /**< More than one percent symbol in number pattern */
+    U_MULTIPLE_PERMILL_SYMBOLS,       /**< More than one permill symbol in number pattern */
+    U_MULTIPLE_PAD_SPECIFIERS,        /**< More than one pad symbol in number pattern */
+    U_PATTERN_SYNTAX_ERROR,           /**< Syntax error in format pattern */
+    U_ILLEGAL_PAD_POSITION,           /**< Pad symbol misplaced in number pattern */
+    U_UNMATCHED_BRACES,               /**< Braces do not match in message pattern */
+    U_UNSUPPORTED_PROPERTY,           /**< UNUSED as of ICU 2.4 */
+    U_UNSUPPORTED_ATTRIBUTE,          /**< UNUSED as of ICU 2.4 */
+    U_ARGUMENT_TYPE_MISMATCH,         /**< Argument name and argument index mismatch in MessageFormat functions */
+    U_DUPLICATE_KEYWORD,              /**< Duplicate keyword in PluralFormat */
+    U_UNDEFINED_KEYWORD,              /**< Undefined Pluarl keyword */
+    U_DEFAULT_KEYWORD_MISSING,        /**< Missing DEFAULT rule in plural rules */
+    U_FMT_PARSE_ERROR_LIMIT,          /**< The limit for format library errors */
+
+    /*
+     * the error code range 0x10200 0x102ff are reserved for Break Iterator related error
+     */
+    U_BRK_INTERNAL_ERROR=0x10200,          /**< An internal error (bug) was detected.             */
+    U_BRK_ERROR_START=0x10200,             /**< Start of codes indicating Break Iterator failures */
+    U_BRK_HEX_DIGITS_EXPECTED,             /**< Hex digits expected as part of a escaped char in a rule. */
+    U_BRK_SEMICOLON_EXPECTED,              /**< Missing ';' at the end of a RBBI rule.            */
+    U_BRK_RULE_SYNTAX,                     /**< Syntax error in RBBI rule.                        */
+    U_BRK_UNCLOSED_SET,                    /**< UnicodeSet witing an RBBI rule missing a closing ']'.  */
+    U_BRK_ASSIGN_ERROR,                    /**< Syntax error in RBBI rule assignment statement.   */
+    U_BRK_VARIABLE_REDFINITION,            /**< RBBI rule $Variable redefined.                    */
+    U_BRK_MISMATCHED_PAREN,                /**< Mis-matched parentheses in an RBBI rule.          */
+    U_BRK_NEW_LINE_IN_QUOTED_STRING,       /**< Missing closing quote in an RBBI rule.            */
+    U_BRK_UNDEFINED_VARIABLE,              /**< Use of an undefined $Variable in an RBBI rule.    */
+    U_BRK_INIT_ERROR,                      /**< Initialization failure.  Probable missing ICU Data. */
+    U_BRK_RULE_EMPTY_SET,                  /**< Rule contains an empty Unicode Set.               */
+    U_BRK_UNRECOGNIZED_OPTION,             /**< !!option in RBBI rules not recognized.            */
+    U_BRK_MALFORMED_RULE_TAG,              /**< The {nnn} tag on a rule is mal formed             */
+    U_BRK_ERROR_LIMIT,                     /**< This must always be the last value to indicate the limit for Break Iterator failures */
+
+    /*
+     * The error codes in the range 0x10300-0x103ff are reserved for regular expression related errrs
+     */
+    U_REGEX_INTERNAL_ERROR=0x10300,       /**< An internal error (bug) was detected.              */
+    U_REGEX_ERROR_START=0x10300,          /**< Start of codes indicating Regexp failures          */
+    U_REGEX_RULE_SYNTAX,                  /**< Syntax error in regexp pattern.                    */
+    U_REGEX_INVALID_STATE,                /**< RegexMatcher in invalid state for requested operation */
+    U_REGEX_BAD_ESCAPE_SEQUENCE,          /**< Unrecognized backslash escape sequence in pattern  */
+    U_REGEX_PROPERTY_SYNTAX,              /**< Incorrect Unicode property                         */
+    U_REGEX_UNIMPLEMENTED,                /**< Use of regexp feature that is not yet implemented. */
+    U_REGEX_MISMATCHED_PAREN,             /**< Incorrectly nested parentheses in regexp pattern.  */
+    U_REGEX_NUMBER_TOO_BIG,               /**< Decimal number is too large.                       */
+    U_REGEX_BAD_INTERVAL,                 /**< Error in {min,max} interval                        */
+    U_REGEX_MAX_LT_MIN,                   /**< In {min,max}, max is less than min.                */
+    U_REGEX_INVALID_BACK_REF,             /**< Back-reference to a non-existent capture group.    */
+    U_REGEX_INVALID_FLAG,                 /**< Invalid value for match mode flags.                */
+    U_REGEX_LOOK_BEHIND_LIMIT,            /**< Look-Behind pattern matches must have a bounded maximum length.    */
+    U_REGEX_SET_CONTAINS_STRING,          /**< Regexps cannot have UnicodeSets containing strings.*/
+    U_REGEX_OCTAL_TOO_BIG,                /**< Octal character constants must be <= 0377.         */
+    U_REGEX_MISSING_CLOSE_BRACKET,        /**< Missing closing bracket on a bracket expression.   */
+    U_REGEX_INVALID_RANGE,                /**< In a character range [x-y], x is greater than y.   */
+    U_REGEX_STACK_OVERFLOW,               /**< Regular expression backtrack stack overflow.       */
+    U_REGEX_TIME_OUT,                     /**< Maximum allowed match time exceeded                */
+    U_REGEX_STOPPED_BY_CALLER,            /**< Matching operation aborted by user callback fn.    */
+    U_REGEX_ERROR_LIMIT,                  /**< This must always be the last value to indicate the limit for regexp errors */
+
+    /*
+     * The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes
+     */
+    U_IDNA_PROHIBITED_ERROR=0x10400,
+    U_IDNA_ERROR_START=0x10400,
+    U_IDNA_UNASSIGNED_ERROR,
+    U_IDNA_CHECK_BIDI_ERROR,
+    U_IDNA_STD3_ASCII_RULES_ERROR,
+    U_IDNA_ACE_PREFIX_ERROR,
+    U_IDNA_VERIFICATION_ERROR,
+    U_IDNA_LABEL_TOO_LONG_ERROR,
+    U_IDNA_ZERO_LENGTH_LABEL_ERROR,
+    U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
+    U_IDNA_ERROR_LIMIT,
+    /*
+     * Aliases for StringPrep
+     */
+    U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
+    U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
+    U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
+
+
+    U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT      /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
+} UErrorCode;
+
+/* Use the following to determine if an UErrorCode represents */
+/* operational success or failure. */
+
+#ifdef XP_CPLUSPLUS
+    /**
+     * Does the error code indicate success?
+     * @stable ICU 2.0
+     */
+    static
+    inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
+    /**
+     * Does the error code indicate a failure?
+     * @stable ICU 2.0
+     */
+    static
+    inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
+#else
+    /**
+     * Does the error code indicate success?
+     * @stable ICU 2.0
+     */
+#   define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
+    /**
+     * Does the error code indicate a failure?
+     * @stable ICU 2.0
+     */
+#   define U_FAILURE(x) ((x)>U_ZERO_ERROR)
+#endif
+
+/**
+ * Return a string for a UErrorCode value.
+ * The string will be the same as the name of the error code constant
+ * in the UErrorCode enum above.
+ * @stable ICU 2.0
+ */
+U_STABLE const char * U_EXPORT2
+u_errorName(UErrorCode code);
+
+
+#endif /* _UTYPES */

Deleted: MacRuby/trunk/icu-1060/unicode/uversion.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/uversion.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/uversion.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,275 +0,0 @@
-/*
-*******************************************************************************
-*   Copyright (C) 2000-2008, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*******************************************************************************
-*
-*   file name:  uversion.h
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   Created by: Vladimir Weinstein
-*
-*  Contains all the important version numbers for ICU. 
-*  Gets included by utypes.h and Windows .rc files
-*/
-
-/**
- * \file
- * \brief C API: Contains all the important version numbers for ICU. 
- */
-/*===========================================================================*/
-/* Main ICU version information                                              */
-/*===========================================================================*/
-
-#ifndef UVERSION_H
-#define UVERSION_H
-
-/**
- * IMPORTANT: When updating version, the following things need to be done:
- * source/common/unicode/uversion.h - this file: update major, minor,
- *        patchlevel, suffix, version, short version constants, namespace,
- *                                                             and copyright
- * source/common/common.vcproj - update 'Output file name' on the link tab so
- *                   that it contains the new major/minor combination
- * source/i18n/i18n.vcproj - same as for the common.vcproj
- * source/layout/layout.vcproj - same as for the common.vcproj
- * source/layoutex/layoutex.vcproj - same
- * source/stubdata/stubdata.vcproj - same as for the common.vcproj
- * source/io/io.vcproj - same as for the common.vcproj
- * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains
- *                            the new major/minor combination
- * source/tools/genren/genren.pl - use this script according to the README
- *                    in that folder                                         
- */
-
-#include "unicode/umachine.h"
-
-/** The standard copyright notice that gets compiled into each library. 
- *  This value will change in the subsequent releases of ICU
- *  @stable ICU 2.4
- */
-#define U_COPYRIGHT_STRING \
-  " Copyright (C) 2008, International Business Machines Corporation and others. All Rights Reserved. "
-
-/** Maximum length of the copyright string.
- *  @stable ICU 2.4
- */
-#define U_COPYRIGHT_STRING_LENGTH  128
-
-/** The current ICU major version as an integer. 
- *  This value will change in the subsequent releases of ICU
- *  @stable ICU 2.4
- */
-#define U_ICU_VERSION_MAJOR_NUM 4
-
-/** The current ICU minor version as an integer. 
- *  This value will change in the subsequent releases of ICU
- *  @stable ICU 2.6
- */
-#define U_ICU_VERSION_MINOR_NUM 0
-
-/** The current ICU patchlevel version as an integer.  
- *  This value will change in the subsequent releases of ICU
- *  @stable ICU 2.4
- */
-#define U_ICU_VERSION_PATCHLEVEL_NUM 0
-
-/** The current ICU build level version as an integer.  
- *  This value is for use by ICU clients. It defaults to 0.
- *  @draft ICU 4.0
- */
-#ifndef U_ICU_VERSION_BUILDLEVEL_NUM
-#define U_ICU_VERSION_BUILDLEVEL_NUM 0
-#endif
-
-/** Glued version suffix for renamers 
- *  This value will change in the subsequent releases of ICU
- *  @stable ICU 2.6
- */
-#define U_ICU_VERSION_SUFFIX _4_0
-
-/** The current ICU library version as a dotted-decimal string. The patchlevel
- *  only appears in this string if it non-zero. 
- *  This value will change in the subsequent releases of ICU
- *  @stable ICU 2.4
- */
-#define U_ICU_VERSION "4.0"
-
-/** The current ICU library major/minor version as a string without dots, for library name suffixes. 
- *  This value will change in the subsequent releases of ICU
- *  @stable ICU 2.6
- */
-#define U_ICU_VERSION_SHORT "40"
-
-/** An ICU version consists of up to 4 numbers from 0..255.
- *  @stable ICU 2.4
- */
-#define U_MAX_VERSION_LENGTH 4
-
-/** In a string, ICU version fields are delimited by dots.
- *  @stable ICU 2.4
- */
-#define U_VERSION_DELIMITER '.'
-
-/** The maximum length of an ICU version string.
- *  @stable ICU 2.4
- */
-#define U_MAX_VERSION_STRING_LENGTH 20
-
-/** The binary form of a version on ICU APIs is an array of 4 uint8_t.
- *  @stable ICU 2.4
- */
-typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
-
-/*===========================================================================*/
-/* C++ namespace if supported. Versioned unless versioning is disabled.      */
-/*===========================================================================*/
-
-/**
- * \def U_NAMESPACE_BEGIN
- * This is used to begin a declaration of a public ICU C++ API.
- * If the compiler doesn't support namespaces, this does nothing.
- * @stable ICU 2.4
- */
-
-/**
- * \def U_NAMESPACE_END
- * This is used to end a declaration of a public ICU C++ API 
- * If the compiler doesn't support namespaces, this does nothing.
- * @stable ICU 2.4
- */
-
-/**
- * \def U_NAMESPACE_USE
- * This is used to specify that the rest of the code uses the
- * public ICU C++ API namespace.
- * If the compiler doesn't support namespaces, this does nothing.
- * @stable ICU 2.4
- */
-
-/**
- * \def U_NAMESPACE_QUALIFIER
- * This is used to qualify that a function or class is part of
- * the public ICU C++ API namespace.
- * If the compiler doesn't support namespaces, this does nothing.
- * @stable ICU 2.4
- */
-
-/* Define namespace symbols if the compiler supports it. */
-#if U_HAVE_NAMESPACE && defined(XP_CPLUSPLUS)
-#   if U_DISABLE_RENAMING
-#       define U_ICU_NAMESPACE icu
-        namespace U_ICU_NAMESPACE { }
-#   else
-#       define U_ICU_NAMESPACE icu_4_0
-        namespace U_ICU_NAMESPACE { }
-        namespace icu = U_ICU_NAMESPACE;
-#   endif
-
-#   define U_NAMESPACE_BEGIN namespace U_ICU_NAMESPACE {
-#   define U_NAMESPACE_END  }
-#   define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE;
-#   define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
-
-#   ifndef U_USING_ICU_NAMESPACE
-#       define U_USING_ICU_NAMESPACE 1
-#   endif
-#   if U_USING_ICU_NAMESPACE
-        U_NAMESPACE_USE
-#   endif
-#else
-#   define U_NAMESPACE_BEGIN
-#   define U_NAMESPACE_END
-#   define U_NAMESPACE_USE
-#   define U_NAMESPACE_QUALIFIER
-#endif
-
-
-/*===========================================================================*/
-/* General version helper functions. Definitions in putil.c                  */
-/*===========================================================================*/
-
-/**
- * Parse a string with dotted-decimal version information and
- * fill in a UVersionInfo structure with the result.
- * Definition of this function lives in putil.c
- *
- * @param versionArray The destination structure for the version information.
- * @param versionString A string with dotted-decimal version information,
- *                      with up to four non-negative number fields with
- *                      values of up to 255 each.
- * @stable ICU 2.4
- */
-U_STABLE void U_EXPORT2
-u_versionFromString(UVersionInfo versionArray, const char *versionString);
-
-/**
- * Write a string with dotted-decimal version information according
- * to the input UVersionInfo.
- * Definition of this function lives in putil.c
- *
- * @param versionArray The version information to be written as a string.
- * @param versionString A string buffer that will be filled in with
- *                      a string corresponding to the numeric version
- *                      information in versionArray.
- *                      The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.
- * @stable ICU 2.4
- */
-U_STABLE void U_EXPORT2
-u_versionToString(UVersionInfo versionArray, char *versionString);
-
-/**
- * Gets the ICU release version.  The version array stores the version information
- * for ICU.  For example, release "1.3.31.2" is then represented as 0x01031F02.
- * Definition of this function lives in putil.c
- *
- * @param versionArray the version # information, the result will be filled in
- * @stable ICU 2.0
- */
-U_STABLE void U_EXPORT2
-u_getVersion(UVersionInfo versionArray);
-
-
-/*===========================================================================
- * ICU collation framework version information                               
- * Version info that can be obtained from a collator is affected by these    
- * numbers in a secret and magic way. Please use collator version as whole
- *===========================================================================
- */
-
-/** Collation runtime version (sort key generator, strcoll). 
- * If the version is different, sortkeys for the same string could be different 
- * version 2 was in ICU 1.8.1. changed is: compression intervals, French secondary 
- * compression, generating quad level always when strength is quad or more 
- * version 4 - ICU 2.2 - tracking UCA changes, ignore completely ignorables 
- * in contractions, ignore primary ignorables after shifted 
- * version 5 - ICU 2.8 - changed implicit generation code
- * version 6 - ICU 3.4 - with the UCA 4.1, Thai tag is no longer generated or used
- * This value may change in the subsequent releases of ICU
- * @stable ICU 2.4
- */
-#define UCOL_RUNTIME_VERSION 6
-
-/** Builder code version. When this is different, same tailoring might result
- * in assigning different collation elements to code points                  
- * version 2 was in ICU 1.8.1. added support for prefixes, tweaked canonical 
- * closure. However, the tailorings should probably get same CEs assigned    
- * version 5 - ICU 2.2 - fixed some bugs, renamed some indirect values.      
- * version 6 - ICU 2.8 - fixed bug in builder that allowed 0xFF in primary values
- * version 7 - ICU 3.4 - with the UCA 4.1 Thai tag is no longer processed, complete ignorables
- *                       now break contractions
- * Backward compatible with the old rules. 
- * This value may change in the subsequent releases of ICU
- * @stable ICU 2.4
- */
-#define UCOL_BUILDER_VERSION 7
-
-/** This is the version of the tailorings 
- *  This value may change in the subsequent releases of ICU
- *  @stable ICU 2.4
- */
-#define UCOL_TAILORINGS_VERSION 1
-
-#endif

Copied: MacRuby/trunk/icu-1060/unicode/uversion.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/uversion.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/uversion.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/uversion.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,275 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2000-2008, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*
+*   file name:  uversion.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   Created by: Vladimir Weinstein
+*
+*  Contains all the important version numbers for ICU. 
+*  Gets included by utypes.h and Windows .rc files
+*/
+
+/**
+ * \file
+ * \brief C API: Contains all the important version numbers for ICU. 
+ */
+/*===========================================================================*/
+/* Main ICU version information                                              */
+/*===========================================================================*/
+
+#ifndef UVERSION_H
+#define UVERSION_H
+
+/**
+ * IMPORTANT: When updating version, the following things need to be done:
+ * source/common/unicode/uversion.h - this file: update major, minor,
+ *        patchlevel, suffix, version, short version constants, namespace,
+ *                                                             and copyright
+ * source/common/common.vcproj - update 'Output file name' on the link tab so
+ *                   that it contains the new major/minor combination
+ * source/i18n/i18n.vcproj - same as for the common.vcproj
+ * source/layout/layout.vcproj - same as for the common.vcproj
+ * source/layoutex/layoutex.vcproj - same
+ * source/stubdata/stubdata.vcproj - same as for the common.vcproj
+ * source/io/io.vcproj - same as for the common.vcproj
+ * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains
+ *                            the new major/minor combination
+ * source/tools/genren/genren.pl - use this script according to the README
+ *                    in that folder                                         
+ */
+
+#include "unicode/umachine.h"
+
+/** The standard copyright notice that gets compiled into each library. 
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING \
+  " Copyright (C) 2008, International Business Machines Corporation and others. All Rights Reserved. "
+
+/** Maximum length of the copyright string.
+ *  @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING_LENGTH  128
+
+/** The current ICU major version as an integer. 
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.4
+ */
+#define U_ICU_VERSION_MAJOR_NUM 4
+
+/** The current ICU minor version as an integer. 
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.6
+ */
+#define U_ICU_VERSION_MINOR_NUM 0
+
+/** The current ICU patchlevel version as an integer.  
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.4
+ */
+#define U_ICU_VERSION_PATCHLEVEL_NUM 0
+
+/** The current ICU build level version as an integer.  
+ *  This value is for use by ICU clients. It defaults to 0.
+ *  @draft ICU 4.0
+ */
+#ifndef U_ICU_VERSION_BUILDLEVEL_NUM
+#define U_ICU_VERSION_BUILDLEVEL_NUM 0
+#endif
+
+/** Glued version suffix for renamers 
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SUFFIX _4_0
+
+/** The current ICU library version as a dotted-decimal string. The patchlevel
+ *  only appears in this string if it non-zero. 
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.4
+ */
+#define U_ICU_VERSION "4.0"
+
+/** The current ICU library major/minor version as a string without dots, for library name suffixes. 
+ *  This value will change in the subsequent releases of ICU
+ *  @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SHORT "40"
+
+/** An ICU version consists of up to 4 numbers from 0..255.
+ *  @stable ICU 2.4
+ */
+#define U_MAX_VERSION_LENGTH 4
+
+/** In a string, ICU version fields are delimited by dots.
+ *  @stable ICU 2.4
+ */
+#define U_VERSION_DELIMITER '.'
+
+/** The maximum length of an ICU version string.
+ *  @stable ICU 2.4
+ */
+#define U_MAX_VERSION_STRING_LENGTH 20
+
+/** The binary form of a version on ICU APIs is an array of 4 uint8_t.
+ *  @stable ICU 2.4
+ */
+typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
+
+/*===========================================================================*/
+/* C++ namespace if supported. Versioned unless versioning is disabled.      */
+/*===========================================================================*/
+
+/**
+ * \def U_NAMESPACE_BEGIN
+ * This is used to begin a declaration of a public ICU C++ API.
+ * If the compiler doesn't support namespaces, this does nothing.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_END
+ * This is used to end a declaration of a public ICU C++ API 
+ * If the compiler doesn't support namespaces, this does nothing.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_USE
+ * This is used to specify that the rest of the code uses the
+ * public ICU C++ API namespace.
+ * If the compiler doesn't support namespaces, this does nothing.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_QUALIFIER
+ * This is used to qualify that a function or class is part of
+ * the public ICU C++ API namespace.
+ * If the compiler doesn't support namespaces, this does nothing.
+ * @stable ICU 2.4
+ */
+
+/* Define namespace symbols if the compiler supports it. */
+#if U_HAVE_NAMESPACE && defined(XP_CPLUSPLUS)
+#   if U_DISABLE_RENAMING
+#       define U_ICU_NAMESPACE icu
+        namespace U_ICU_NAMESPACE { }
+#   else
+#       define U_ICU_NAMESPACE icu_4_0
+        namespace U_ICU_NAMESPACE { }
+        namespace icu = U_ICU_NAMESPACE;
+#   endif
+
+#   define U_NAMESPACE_BEGIN namespace U_ICU_NAMESPACE {
+#   define U_NAMESPACE_END  }
+#   define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE;
+#   define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
+
+#   ifndef U_USING_ICU_NAMESPACE
+#       define U_USING_ICU_NAMESPACE 1
+#   endif
+#   if U_USING_ICU_NAMESPACE
+        U_NAMESPACE_USE
+#   endif
+#else
+#   define U_NAMESPACE_BEGIN
+#   define U_NAMESPACE_END
+#   define U_NAMESPACE_USE
+#   define U_NAMESPACE_QUALIFIER
+#endif
+
+
+/*===========================================================================*/
+/* General version helper functions. Definitions in putil.c                  */
+/*===========================================================================*/
+
+/**
+ * Parse a string with dotted-decimal version information and
+ * fill in a UVersionInfo structure with the result.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The destination structure for the version information.
+ * @param versionString A string with dotted-decimal version information,
+ *                      with up to four non-negative number fields with
+ *                      values of up to 255 each.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+u_versionFromString(UVersionInfo versionArray, const char *versionString);
+
+/**
+ * Write a string with dotted-decimal version information according
+ * to the input UVersionInfo.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The version information to be written as a string.
+ * @param versionString A string buffer that will be filled in with
+ *                      a string corresponding to the numeric version
+ *                      information in versionArray.
+ *                      The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.
+ * @stable ICU 2.4
+ */
+U_STABLE void U_EXPORT2
+u_versionToString(UVersionInfo versionArray, char *versionString);
+
+/**
+ * Gets the ICU release version.  The version array stores the version information
+ * for ICU.  For example, release "1.3.31.2" is then represented as 0x01031F02.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray the version # information, the result will be filled in
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+u_getVersion(UVersionInfo versionArray);
+
+
+/*===========================================================================
+ * ICU collation framework version information                               
+ * Version info that can be obtained from a collator is affected by these    
+ * numbers in a secret and magic way. Please use collator version as whole
+ *===========================================================================
+ */
+
+/** Collation runtime version (sort key generator, strcoll). 
+ * If the version is different, sortkeys for the same string could be different 
+ * version 2 was in ICU 1.8.1. changed is: compression intervals, French secondary 
+ * compression, generating quad level always when strength is quad or more 
+ * version 4 - ICU 2.2 - tracking UCA changes, ignore completely ignorables 
+ * in contractions, ignore primary ignorables after shifted 
+ * version 5 - ICU 2.8 - changed implicit generation code
+ * version 6 - ICU 3.4 - with the UCA 4.1, Thai tag is no longer generated or used
+ * This value may change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define UCOL_RUNTIME_VERSION 6
+
+/** Builder code version. When this is different, same tailoring might result
+ * in assigning different collation elements to code points                  
+ * version 2 was in ICU 1.8.1. added support for prefixes, tweaked canonical 
+ * closure. However, the tailorings should probably get same CEs assigned    
+ * version 5 - ICU 2.2 - fixed some bugs, renamed some indirect values.      
+ * version 6 - ICU 2.8 - fixed bug in builder that allowed 0xFF in primary values
+ * version 7 - ICU 3.4 - with the UCA 4.1 Thai tag is no longer processed, complete ignorables
+ *                       now break contractions
+ * Backward compatible with the old rules. 
+ * This value may change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define UCOL_BUILDER_VERSION 7
+
+/** This is the version of the tailorings 
+ *  This value may change in the subsequent releases of ICU
+ *  @stable ICU 2.4
+ */
+#define UCOL_TAILORINGS_VERSION 1
+
+#endif

Deleted: MacRuby/trunk/icu-1060/unicode/vtzone.h
===================================================================
--- MacRuby/branches/icu/icu-1060/unicode/vtzone.h	2010-03-12 21:32:03 UTC (rev 3744)
+++ MacRuby/trunk/icu-1060/unicode/vtzone.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,443 +0,0 @@
-/*
-*******************************************************************************
-* Copyright (C) 2007-2008, International Business Machines Corporation and         *
-* others. All Rights Reserved.                                                *
-*******************************************************************************
-*/
-#ifndef VTZONE_H
-#define VTZONE_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file 
- * \brief C++ API: RFC2445 VTIMEZONE support
- */
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/basictz.h"
-
-U_NAMESPACE_BEGIN
-
-class VTZWriter;
-class VTZReader;
-class UVector;
-
-/**
- * <code>VTimeZone</code> is a class implementing RFC2445 VTIMEZONE.  You can create a
- * <code>VTimeZone</code> instance from a time zone ID supported by <code>TimeZone</code>.
- * With the <code>VTimeZone</code> instance created from the ID, you can write out the rule
- * in RFC2445 VTIMEZONE format.  Also, you can create a <code>VTimeZone</code> instance
- * from RFC2445 VTIMEZONE data stream, which allows you to calculate time
- * zone offset by the rules defined by the data.<br><br>
- * Note: The consumer of this class reading or writing VTIMEZONE data is responsible to
- * decode or encode Non-ASCII text.  Methods reading/writing VTIMEZONE data in this class
- * do nothing with MIME encoding.
- * @stable ICU 4.0
- */
-class U_I18N_API VTimeZone : public BasicTimeZone {
-public:
-    /**
-     * Copy constructor.
-     * @param source    The <code>VTimeZone</code> object to be copied.
-     * @stable ICU 4.0
-     */
-    VTimeZone(const VTimeZone& source);
-
-    /**
-     * Destructor.
-     * @stable ICU 4.0
-     */
-    virtual ~VTimeZone();
-
-    /**
-     * Assignment operator.
-     * @param right The object to be copied.
-     * @stable ICU 4.0
-     */
-    VTimeZone& operator=(const VTimeZone& right);
-
-    /**
-     * Return true if the given <code>TimeZone</code> objects are
-     * semantically equal. Objects of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given <code>TimeZone</code> objects are
-      *semantically equal.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator==(const TimeZone& that) const;
-
-    /**
-     * Return true if the given <code>TimeZone</code> objects are
-     * semantically unequal. Objects of different subclasses are considered unequal.
-     * @param that  The object to be compared with.
-     * @return  true if the given <code>TimeZone</code> objects are
-     * semantically unequal.
-     * @stable ICU 4.0
-     */
-    virtual UBool operator!=(const TimeZone& that) const;
-
-    /**
-     * Create a <code>VTimeZone</code> instance by the time zone ID.
-     * @param ID The time zone ID, such as America/New_York
-     * @return A <code>VTimeZone</code> object initialized by the time zone ID,
-     * or NULL when the ID is unknown.
-     * @stable ICU 4.0
-     */
-    static VTimeZone* createVTimeZoneByID(const UnicodeString& ID);
-
-    /**
-     * Create a <code>VTimeZone</code> instance by RFC2445 VTIMEZONE data
-     * 
-     * @param vtzdata The string including VTIMEZONE data block
-     * @param status Output param to filled in with a success or an error.
-     * @return A <code>VTimeZone</code> initialized by the VTIMEZONE data or
-     * NULL if failed to load the rule from the VTIMEZONE data.
-     * @stable ICU 4.0
-     */
-    static VTimeZone* createVTimeZone(const UnicodeString& vtzdata, UErrorCode& status);
-
-    /**
-     * Gets the RFC2445 TZURL property value.  When a <code>VTimeZone</code> instance was
-     * created from VTIMEZONE data, the initial value is set by the TZURL property value
-     * in the data.  Otherwise, the initial value is not set.
-     * @param url Receives the RFC2445 TZURL property value.
-     * @return TRUE if TZURL attribute is available and value is set.
-     * @stable ICU 4.0
-     */
-    UBool getTZURL(UnicodeString& url) const;
-
-    /**
-     * Sets the RFC2445 TZURL property value.
-     * @param url The TZURL property value.
-     * @stable ICU 4.0
-     */
-    void setTZURL(const UnicodeString& url);
-
-    /**
-     * Gets the RFC2445 LAST-MODIFIED property value.  When a <code>VTimeZone</code> instance
-     * was created from VTIMEZONE data, the initial value is set by the LAST-MODIFIED property
-     * value in the data.  Otherwise, the initial value is not set.
-     * @param lastModified Receives the last modified date.
-     * @return TRUE if lastModified attribute is available and value is set.
-     * @stable ICU 4.0
-     */
-    UBool getLastModified(UDate& lastModified) const;
-
-    /**
-     * Sets the RFC2445 LAST-MODIFIED property value.
-     * @param lastModified The LAST-MODIFIED date.
-     * @stable ICU 4.0
-     */
-    void setLastModified(UDate lastModified);
-
-    /**
-     * Writes RFC2445 VTIMEZONE data for this time zone
-     * @param result Output param to filled in with the VTIMEZONE data.
-     * @param status Output param to filled in with a success or an error.
-     * @stable ICU 4.0
-     */
-    void write(UnicodeString& result, UErrorCode& status) const;
-
-    /**
-     * Writes RFC2445 VTIMEZONE data for this time zone applicalbe
-     * for dates after the specified start time.
-     * @param start The start date.
-     * @param result Output param to filled in with the VTIMEZONE data.
-     * @param status Output param to filled in with a success or an error.
-     * @stable ICU 4.0
-     */
-    void write(UDate start, UnicodeString& result, UErrorCode& status) /*const*/;
-
-    /**
-     * Writes RFC2445 VTIMEZONE data applicalbe for the specified date.
-     * Some common iCalendar implementations can only handle a single time
-     * zone property or a pair of standard and daylight time properties using
-     * BYDAY rule with day of week (such as BYDAY=1SUN).  This method produce
-     * the VTIMEZONE data which can be handled these implementations.  The rules
-     * produced by this method can be used only for calculating time zone offset
-     * around the specified date.
-     * @param time The date used for rule extraction.
-     * @param result Output param to filled in with the VTIMEZONE data.
-     * @param status Output param to filled in with a success or an error.
-     * @stable ICU 4.0
-     */
-    void writeSimple(UDate time, UnicodeString& result, UErrorCode& status) /*const*/;
-
-    /**
-     * Clones TimeZone objects polymorphically. Clients are responsible for deleting
-     * the TimeZone object cloned.
-     * @return   A new copy of this TimeZone object.
-     * @stable ICU 4.0
-     */
-    virtual TimeZone* clone(void) const;
-
-    /**
-     * Returns the TimeZone's adjusted GMT offset (i.e., the number of milliseconds to add
-     * to GMT to get local time in this time zone, taking daylight savings time into
-     * account) as of a particular reference date.  The reference date is used to determine
-     * whether daylight savings time is in effect and needs to be figured into the offset
-     * that is returned (in other words, what is the adjusted GMT offset in this time zone
-     * at this particular date and time?).  For the time zones produced by createTimeZone(),
-     * the reference data is specified according to the Gregorian calendar, and the date
-     * and time fields are local standard time.
-     *
-     * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload,
-     * which returns both the raw and the DST offset for a given time. This method
-     * is retained only for backward compatibility.
-     *
-     * @param era        The reference date's era
-     * @param year       The reference date's year
-     * @param month      The reference date's month (0-based; 0 is January)
-     * @param day        The reference date's day-in-month (1-based)
-     * @param dayOfWeek  The reference date's day-of-week (1-based; 1 is Sunday)
-     * @param millis     The reference date's milliseconds in day, local standard time
-     * @param status     Output param to filled in with a success or an error.
-     * @return           The offset in milliseconds to add to GMT to get local time.
-     * @stable ICU 4.0
-     */
-    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
-                              uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const;
-
-    /**
-     * Gets the time zone offset, for current date, modified in case of
-     * daylight savings. This is the offset to add *to* UTC to get local time.
-     *
-     * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload,
-     * which returns both the raw and the DST offset for a given time. This method
-     * is retained only for backward compatibility.
-     *
-     * @param era        The reference date's era
-     * @param year       The reference date's year
-     * @param month      The reference date's month (0-based; 0 is January)
-     * @param day        The reference date's day-in-month (1-based)
-     * @param dayOfWeek  The reference date's day-of-week (1-based; 1 is Sunday)
-     * @param millis     The reference date's milliseconds in day, local standard time
-     * @param monthLength The length of the given month in days.
-     * @param status     Output param to filled in with a success or an error.
-     * @return           The offset in milliseconds to add to GMT to get local time.
-     * @stable ICU 4.0
-     */
-    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
-                           uint8_t dayOfWeek, int32_t millis,
-                           int32_t monthLength, UErrorCode& status) const;
-
-    /**
-     * Returns the time zone raw and GMT offset for the given moment
-     * in time.  Upon return, local-millis = GMT-millis + rawOffset +
-     * dstOffset.  All computations are performed in the proleptic
-     * Gregorian calendar.  The default implementation in the TimeZone
-     * class delegates to the 8-argument getOffset().
-     *
-     * @param date moment in time for which to return offsets, in
-     * units of milliseconds from January 1, 1970 0:00 GMT, either GMT
-     * time or local wall time, depending on `local'.
-     * @param local if true, `date' is local wall time; otherwise it
-     * is in GMT time.
-     * @param rawOffset output parameter to receive the raw offset, that
-     * is, the offset not including DST adjustments
-     * @param dstOffset output parameter to receive the DST offset,
-     * that is, the offset to be added to `rawOffset' to obtain the
-     * total offset between local and GMT time. If DST is not in
-     * effect, this value is zero; otherwise it is a positive value,
-     * typically one hour.
-     * @param ec input-output error code
-     * @stable ICU 4.0
-     */
-    virtual void getOffset(UDate date, UBool local, int32_t& rawOffset,
-                           int32_t& dstOffset, UErrorCode& ec) const;
-
-    /**
-     * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
-     * to GMT to get local time, before taking daylight savings time into account).
-     *
-     * @param offsetMillis  The new raw GMT offset for this time zone.
-     * @stable ICU 4.0
-     */
-    virtual void setRawOffset(int32_t offsetMillis);
-
-    /**
-     * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
-     * to GMT to get local time, before taking daylight savings time into account).
-     *
-     * @return   The TimeZone's raw GMT offset.
-     * @stable ICU 4.0
-     */
-    virtual int32_t getRawOffset(void) const;
-
-    /**
-     * Queries if this time zone uses daylight savings time.
-     * @return true if this time zone uses daylight savings time,
-     * false, otherwise.
-     * @stable ICU 4.0
-     */
-    virtual UBool useDaylightTime(void) const;
-
-    /**
-     * Queries if the given date is in daylight savings time in
-     * this time zone.
-     * This method is wasteful since it creates a new GregorianCalendar and
-     * deletes it each time it is called. This is a deprecated method
-     * and provided only for Java compatibility.
-     *
-     * @param date the given UDate.
-     * @param status Output param filled in with success/error code.
-     * @return true if the given date is in daylight savings time,
-     * false, otherwise.
-     * @deprecated ICU 2.4. Use Calendar::inDaylightTime() instead.
-     */
-    virtual UBool inDaylightTime(UDate date, UErrorCode& status) const;
-
-    /**
-     * Returns true if this zone has the same rule and offset as another zone.
-     * That is, if this zone differs only in ID, if at all.
-     * @param other the <code>TimeZone</code> object to be compared with
-     * @return true if the given zone is the same as this one,
-     * with the possible exception of the ID
-     * @stable ICU 4.0
-     */
-    virtual UBool hasSameRules(const TimeZone& other) const;
-
-    /**
-     * Gets the first time zone transition after the base time.
-     * @param base      The base time.
-     * @param inclusive Whether the base time is inclusive or not.
-     * @param result    Receives the first transition after the base time.
-     * @return  TRUE if the transition is found.
-     * @stable ICU 4.0
-     */
-    virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/;
-
-    /**
-     * Gets the most recent time zone transition before the base time.
-     * @param base      The base time.
-     * @param inclusive Whether the base time is inclusive or not.
-     * @param result    Receives the most recent transition before the base time.
-     * @return  TRUE if the transition is found.
-     * @stable ICU 4.0
-     */
-    virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/;
-
-    /**
-     * Returns the number of <code>TimeZoneRule</code>s which represents time transitions,
-     * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except
-     * <code>InitialTimeZoneRule</code>.  The return value range is 0 or any positive value.
-     * @param status    Receives error status code.
-     * @return The number of <code>TimeZoneRule</code>s representing time transitions.
-     * @stable ICU 4.0
-     */
-    virtual int32_t countTransitionRules(UErrorCode& status) /*const*/;
-
-    /**
-     * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code>
-     * which represent time transitions for this time zone.  On successful return,
-     * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and
-     * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code>
-     * instances up to the size specified by trscount.  The results are referencing the
-     * rule instance held by this time zone instance.  Therefore, after this time zone
-     * is destructed, they are no longer available.
-     * @param initial       Receives the initial timezone rule
-     * @param trsrules      Receives the timezone transition rules
-     * @param trscount      On input, specify the size of the array 'transitions' receiving
-     *                      the timezone transition rules.  On output, actual number of
-     *                      rules filled in the array will be set.
-     * @param status        Receives error status code.
-     * @stable ICU 4.0
-     */
-    virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial,
-        const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) /*const*/;
-
-private:
-    enum { DEFAULT_VTIMEZONE_LINES = 100 };
-
-    /**
-     * Default constructor.
-     */
-    VTimeZone();
-    static VTimeZone* createVTimeZone(VTZReader* reader);
-    void write(VTZWriter& writer, UErrorCode& status) const;
-    void write(UDate start, VTZWriter& writer, UErrorCode& status) /*const*/;
-    void writeSimple(UDate time, VTZWriter& writer, UErrorCode& status) /*const*/;
-    void load(VTZReader& reader, UErrorCode& status);
-    void parse(UErrorCode& status);
-
-    void writeZone(VTZWriter& w, BasicTimeZone& basictz, UVector* customProps,
-        UErrorCode& status) const;
-
-    void writeHeaders(VTZWriter& w, UErrorCode& status) const;
-    void writeFooter(VTZWriter& writer, UErrorCode& status) const;
-
-    void writeZonePropsByTime(VTZWriter& writer, UBool isDst, const UnicodeString& tzname,
-                              int32_t fromOffset, int32_t toOffset, UDate time, UBool withRDATE,
-                              UErrorCode& status) const;
-    void writeZonePropsByDOM(VTZWriter& writer, UBool isDst, const UnicodeString& tzname,
-                             int32_t fromOffset, int32_t toOffset,
-                             int32_t month, int32_t dayOfMonth, UDate startTime, UDate untilTime,
-                             UErrorCode& status) const;
-    void writeZonePropsByDOW(VTZWriter& writer, UBool isDst, const UnicodeString& tzname,
-                             int32_t fromOffset, int32_t toOffset,
-                             int32_t month, int32_t weekInMonth, int32_t dayOfWeek,
-                             UDate startTime, UDate untilTime, UErrorCode& status) const;
-    void writeZonePropsByDOW_GEQ_DOM(VTZWriter& writer, UBool isDst, const UnicodeString& tzname,
-                                     int32_t fromOffset, int32_t toOffset,
-                                     int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
-                                     UDate startTime, UDate untilTime, UErrorCode& status) const;
-    void writeZonePropsByDOW_GEQ_DOM_sub(VTZWriter& writer, int32_t month, int32_t dayOfMonth,
-                                         int32_t dayOfWeek, int32_t numDays,
-                                         UDate untilTime, int32_t fromOffset, UErrorCode& status) const;
-    void writeZonePropsByDOW_LEQ_DOM(VTZWriter& writer, UBool isDst, const UnicodeString& tzname,
-                                     int32_t fromOffset, int32_t toOffset,
-                                     int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
-                                     UDate startTime, UDate untilTime, UErrorCode& status) const;
-    void writeFinalRule(VTZWriter& writer, UBool isDst, const AnnualTimeZoneRule* rule,
-                        int32_t fromRawOffset, int32_t fromDSTSavings,
-                        UDate startTime, UErrorCode& status) const;
-
-    void beginZoneProps(VTZWriter& writer, UBool isDst, const UnicodeString& tzname,
-                        int32_t fromOffset, int32_t toOffset, UDate startTime, UErrorCode& status) const;
-    void endZoneProps(VTZWriter& writer, UBool isDst, UErrorCode& status) const;
-    void beginRRULE(VTZWriter& writer, int32_t month, UErrorCode& status) const;
-    void appendUNTIL(VTZWriter& writer, const UnicodeString& until, UErrorCode& status) const;
-
-    BasicTimeZone   *tz;
-    UVector         *vtzlines;
-    UnicodeString   tzurl;
-    UDate           lastmod;
-    UnicodeString   olsonzid;
-    UnicodeString   icutzver;
-
-public:
-    /**
-     * Return the class ID for this class. This is useful only for comparing to
-     * a return value from getDynamicClassID(). For example:
-     * <pre>
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       erived::getStaticClassID()) ...
-     * </pre>
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 4.0
-     */
-    static UClassID U_EXPORT2 getStaticClassID(void);
-
-    /**
-     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
-     * method is to implement a simple version of RTTI, since not all C++
-     * compilers support genuine RTTI. Polymorphic operator==() and clone()
-     * methods call this method.
-     *
-     * @return          The class ID for this object. All objects of a
-     *                  given class have the same class ID.  Objects of
-     *                  other classes have different class IDs.
-     * @stable ICU 4.0
-     */
-    virtual UClassID getDynamicClassID(void) const;
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // VTZONE_H
-//eof

Copied: MacRuby/trunk/icu-1060/unicode/vtzone.h (from rev 3744, MacRuby/branches/icu/icu-1060/unicode/vtzone.h)
===================================================================
--- MacRuby/trunk/icu-1060/unicode/vtzone.h	                        (rev 0)
+++ MacRuby/trunk/icu-1060/unicode/vtzone.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,443 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2008, International Business Machines Corporation and         *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*/
+#ifndef VTZONE_H
+#define VTZONE_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: RFC2445 VTIMEZONE support
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/basictz.h"
+
+U_NAMESPACE_BEGIN
+
+class VTZWriter;
+class VTZReader;
+class UVector;
+
+/**
+ * <code>VTimeZone</code> is a class implementing RFC2445 VTIMEZONE.  You can create a
+ * <code>VTimeZone</code> instance from a time zone ID supported by <code>TimeZone</code>.
+ * With the <code>VTimeZone</code> instance created from the ID, you can write out the rule
+ * in RFC2445 VTIMEZONE format.  Also, you can create a <code>VTimeZone</code> instance
+ * from RFC2445 VTIMEZONE data stream, which allows you to calculate time
+ * zone offset by the rules defined by the data.<br><br>
+ * Note: The consumer of this class reading or writing VTIMEZONE data is responsible to
+ * decode or encode Non-ASCII text.  Methods reading/writing VTIMEZONE data in this class
+ * do nothing with MIME encoding.
+ * @stable ICU 4.0
+ */
+class U_I18N_API VTimeZone : public BasicTimeZone {
+public:
+    /**
+     * Copy constructor.
+     * @param source    The <code>VTimeZone</code> object to be copied.
+     * @stable ICU 4.0
+     */
+    VTimeZone(const VTimeZone& source);
+
+    /**
+     * Destructor.
+     * @stable ICU 4.0
+     */
+    virtual ~VTimeZone();
+
+    /**
+     * Assignment operator.
+     * @param right The object to be copied.
+     * @stable ICU 4.0
+     */
+    VTimeZone& operator=(const VTimeZone& right);
+
+    /**
+     * Return true if the given <code>TimeZone</code> objects are
+     * semantically equal. Objects of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given <code>TimeZone</code> objects are
+      *semantically equal.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator==(const TimeZone& that) const;
+
+    /**
+     * Return true if the given <code>TimeZone</code> objects are
+     * semantically unequal. Objects of different subclasses are considered unequal.
+     * @param that  The object to be compared with.
+     * @return  true if the given <code>TimeZone</code> objects are
+     * semantically unequal.
+     * @stable ICU 4.0
+     */
+    virtual UBool operator!=(const TimeZone& that) const;
+
+    /**
+     * Create a <code>VTimeZone</code> instance by the time zone ID.
+     * @param ID The time zone ID, such as America/New_York
+     * @return A <code>VTimeZone</code> object initialized by the time zone ID,
+     * or NULL when the ID is unknown.
+     * @stable ICU 4.0
+     */
+    static VTimeZone* createVTimeZoneByID(const UnicodeString& ID);
+
+    /**
+     * Create a <code>VTimeZone</code> instance by RFC2445 VTIMEZONE data
+     * 
+     * @param vtzdata The string including VTIMEZONE data block
+     * @param status Output param to filled in with a success or an error.
+     * @return A <code>VTimeZone</code> initialized by the VTIMEZONE data or
+     * NULL if failed to load the rule from the VTIMEZONE data.
+     * @stable ICU 4.0
+     */
+    static VTimeZone* createVTimeZone(const UnicodeString& vtzdata, UErrorCode& status);
+
+    /**
+     * Gets the RFC2445 TZURL property value.  When a <code>VTimeZone</code> instance was
+     * created from VTIMEZONE data, the initial value is set by the TZURL property value
+     * in the data.  Otherwise, the initial value is not set.
+     * @param url Receives the RFC2445 TZURL property value.
+     * @return TRUE if TZURL attribute is available and value is set.
+     * @stable ICU 4.0
+     */
+    UBool getTZURL(UnicodeString& url) const;
+
+    /**
+     * Sets the RFC2445 TZURL property value.
+     * @param url The TZURL property value.
+     * @stable ICU 4.0
+     */
+    void setTZURL(const UnicodeString& url);
+
+    /**
+     * Gets the RFC2445 LAST-MODIFIED property value.  When a <code>VTimeZone</code> instance
+     * was created from VTIMEZONE data, the initial value is set by the LAST-MODIFIED property
+     * value in the data.  Otherwise, the initial value is not set.
+     * @param lastModified Receives the last modified date.
+     * @return TRUE if lastModified attribute is available and value is set.
+     * @stable ICU 4.0
+     */
+    UBool getLastModified(UDate& lastModified) const;
+
+    /**
+     * Sets the RFC2445 LAST-MODIFIED property value.
+     * @param lastModified The LAST-MODIFIED date.
+     * @stable ICU 4.0
+     */
+    void setLastModified(UDate lastModified);
+
+    /**
+     * Writes RFC2445 VTIMEZONE data for this time zone
+     * @param result Output param to filled in with the VTIMEZONE data.
+     * @param status Output param to filled in with a success or an error.
+     * @stable ICU 4.0
+     */
+    void write(UnicodeString& result, UErrorCode& status) const;
+
+    /**
+     * Writes RFC2445 VTIMEZONE data for this time zone applicalbe
+     * for dates after the specified start time.
+     * @param start The start date.
+     * @param result Output param to filled in with the VTIMEZONE data.
+     * @param status Output param to filled in with a success or an error.
+     * @stable ICU 4.0
+     */
+    void write(UDate start, UnicodeString& result, UErrorCode& status) /*const*/;
+
+    /**
+     * Writes RFC2445 VTIMEZONE data applicalbe for the specified date.
+     * Some common iCalendar implementations can only handle a single time
+     * zone property or a pair of standard and daylight time properties using
+     * BYDAY rule with day of week (such as BYDAY=1SUN).  This method produce
+     * the VTIMEZONE data which can be handled these implementations.  The rules
+     * produced by this method can be used only for calculating time zone offset
+     * around the specified date.
+     * @param time The date used for rule extraction.
+     * @param result Output param to filled in with the VTIMEZONE data.
+     * @param status Output param to filled in with a success or an error.
+     * @stable ICU 4.0
+     */
+    void writeSimple(UDate time, UnicodeString& result, UErrorCode& status) /*const*/;
+
+    /**
+     * Clones TimeZone objects polymorphically. Clients are responsible for deleting
+     * the TimeZone object cloned.
+     * @return   A new copy of this TimeZone object.
+     * @stable ICU 4.0
+     */
+    virtual TimeZone* clone(void) const;
+
+    /**
+     * Returns the TimeZone's adjusted GMT offset (i.e., the number of milliseconds to add
+     * to GMT to get local time in this time zone, taking daylight savings time into
+     * account) as of a particular reference date.  The reference date is used to determine
+     * whether daylight savings time is in effect and needs to be figured into the offset
+     * that is returned (in other words, what is the adjusted GMT offset in this time zone
+     * at this particular date and time?).  For the time zones produced by createTimeZone(),
+     * the reference data is specified according to the Gregorian calendar, and the date
+     * and time fields are local standard time.
+     *
+     * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload,
+     * which returns both the raw and the DST offset for a given time. This method
+     * is retained only for backward compatibility.
+     *
+     * @param era        The reference date's era
+     * @param year       The reference date's year
+     * @param month      The reference date's month (0-based; 0 is January)
+     * @param day        The reference date's day-in-month (1-based)
+     * @param dayOfWeek  The reference date's day-of-week (1-based; 1 is Sunday)
+     * @param millis     The reference date's milliseconds in day, local standard time
+     * @param status     Output param to filled in with a success or an error.
+     * @return           The offset in milliseconds to add to GMT to get local time.
+     * @stable ICU 4.0
+     */
+    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
+                              uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const;
+
+    /**
+     * Gets the time zone offset, for current date, modified in case of
+     * daylight savings. This is the offset to add *to* UTC to get local time.
+     *
+     * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload,
+     * which returns both the raw and the DST offset for a given time. This method
+     * is retained only for backward compatibility.
+     *
+     * @param era        The reference date's era
+     * @param year       The reference date's year
+     * @param month      The reference date's month (0-based; 0 is January)
+     * @param day        The reference date's day-in-month (1-based)
+     * @param dayOfWeek  The reference date's day-of-week (1-based; 1 is Sunday)
+     * @param millis     The reference date's milliseconds in day, local standard time
+     * @param monthLength The length of the given month in days.
+     * @param status     Output param to filled in with a success or an error.
+     * @return           The offset in milliseconds to add to GMT to get local time.
+     * @stable ICU 4.0
+     */
+    virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day,
+                           uint8_t dayOfWeek, int32_t millis,
+                           int32_t monthLength, UErrorCode& status) const;
+
+    /**
+     * Returns the time zone raw and GMT offset for the given moment
+     * in time.  Upon return, local-millis = GMT-millis + rawOffset +
+     * dstOffset.  All computations are performed in the proleptic
+     * Gregorian calendar.  The default implementation in the TimeZone
+     * class delegates to the 8-argument getOffset().
+     *
+     * @param date moment in time for which to return offsets, in
+     * units of milliseconds from January 1, 1970 0:00 GMT, either GMT
+     * time or local wall time, depending on `local'.
+     * @param local if true, `date' is local wall time; otherwise it
+     * is in GMT time.
+     * @param rawOffset output parameter to receive the raw offset, that
+     * is, the offset not including DST adjustments
+     * @param dstOffset output parameter to receive the DST offset,
+     * that is, the offset to be added to `rawOffset' to obtain the
+     * total offset between local and GMT time. If DST is not in
+     * effect, this value is zero; otherwise it is a positive value,
+     * typically one hour.
+     * @param ec input-output error code
+     * @stable ICU 4.0
+     */
+    virtual void getOffset(UDate date, UBool local, int32_t& rawOffset,
+                           int32_t& dstOffset, UErrorCode& ec) const;
+
+    /**
+     * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
+     * to GMT to get local time, before taking daylight savings time into account).
+     *
+     * @param offsetMillis  The new raw GMT offset for this time zone.
+     * @stable ICU 4.0
+     */
+    virtual void setRawOffset(int32_t offsetMillis);
+
+    /**
+     * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add
+     * to GMT to get local time, before taking daylight savings time into account).
+     *
+     * @return   The TimeZone's raw GMT offset.
+     * @stable ICU 4.0
+     */
+    virtual int32_t getRawOffset(void) const;
+
+    /**
+     * Queries if this time zone uses daylight savings time.
+     * @return true if this time zone uses daylight savings time,
+     * false, otherwise.
+     * @stable ICU 4.0
+     */
+    virtual UBool useDaylightTime(void) const;
+
+    /**
+     * Queries if the given date is in daylight savings time in
+     * this time zone.
+     * This method is wasteful since it creates a new GregorianCalendar and
+     * deletes it each time it is called. This is a deprecated method
+     * and provided only for Java compatibility.
+     *
+     * @param date the given UDate.
+     * @param status Output param filled in with success/error code.
+     * @return true if the given date is in daylight savings time,
+     * false, otherwise.
+     * @deprecated ICU 2.4. Use Calendar::inDaylightTime() instead.
+     */
+    virtual UBool inDaylightTime(UDate date, UErrorCode& status) const;
+
+    /**
+     * Returns true if this zone has the same rule and offset as another zone.
+     * That is, if this zone differs only in ID, if at all.
+     * @param other the <code>TimeZone</code> object to be compared with
+     * @return true if the given zone is the same as this one,
+     * with the possible exception of the ID
+     * @stable ICU 4.0
+     */
+    virtual UBool hasSameRules(const TimeZone& other) const;
+
+    /**
+     * Gets the first time zone transition after the base time.
+     * @param base      The base time.
+     * @param inclusive Whether the base time is inclusive or not.
+     * @param result    Receives the first transition after the base time.
+     * @return  TRUE if the transition is found.
+     * @stable ICU 4.0
+     */
+    virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/;
+
+    /**
+     * Gets the most recent time zone transition before the base time.
+     * @param base      The base time.
+     * @param inclusive Whether the base time is inclusive or not.
+     * @param result    Receives the most recent transition before the base time.
+     * @return  TRUE if the transition is found.
+     * @stable ICU 4.0
+     */
+    virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/;
+
+    /**
+     * Returns the number of <code>TimeZoneRule</code>s which represents time transitions,
+     * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except
+     * <code>InitialTimeZoneRule</code>.  The return value range is 0 or any positive value.
+     * @param status    Receives error status code.
+     * @return The number of <code>TimeZoneRule</code>s representing time transitions.
+     * @stable ICU 4.0
+     */
+    virtual int32_t countTransitionRules(UErrorCode& status) /*const*/;
+
+    /**
+     * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code>
+     * which represent time transitions for this time zone.  On successful return,
+     * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and
+     * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code>
+     * instances up to the size specified by trscount.  The results are referencing the
+     * rule instance held by this time zone instance.  Therefore, after this time zone
+     * is destructed, they are no longer available.
+     * @param initial       Receives the initial timezone rule
+     * @param trsrules      Receives the timezone transition rules
+     * @param trscount      On input, specify the size of the array 'transitions' receiving
+     *                      the timezone transition rules.  On output, actual number of
+     *                      rules filled in the array will be set.
+     * @param status        Receives error status code.
+     * @stable ICU 4.0
+     */
+    virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial,
+        const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) /*const*/;
+
+private:
+    enum { DEFAULT_VTIMEZONE_LINES = 100 };
+
+    /**
+     * Default constructor.
+     */
+    VTimeZone();
+    static VTimeZone* createVTimeZone(VTZReader* reader);
+    void write(VTZWriter& writer, UErrorCode& status) const;
+    void write(UDate start, VTZWriter& writer, UErrorCode& status) /*const*/;
+    void writeSimple(UDate time, VTZWriter& writer, UErrorCode& status) /*const*/;
+    void load(VTZReader& reader, UErrorCode& status);
+    void parse(UErrorCode& status);
+
+    void writeZone(VTZWriter& w, BasicTimeZone& basictz, UVector* customProps,
+        UErrorCode& status) const;
+
+    void writeHeaders(VTZWriter& w, UErrorCode& status) const;
+    void writeFooter(VTZWriter& writer, UErrorCode& status) const;
+
+    void writeZonePropsByTime(VTZWriter& writer, UBool isDst, const UnicodeString& tzname,
+                              int32_t fromOffset, int32_t toOffset, UDate time, UBool withRDATE,
+                              UErrorCode& status) const;
+    void writeZonePropsByDOM(VTZWriter& writer, UBool isDst, const UnicodeString& tzname,
+                             int32_t fromOffset, int32_t toOffset,
+                             int32_t month, int32_t dayOfMonth, UDate startTime, UDate untilTime,
+                             UErrorCode& status) const;
+    void writeZonePropsByDOW(VTZWriter& writer, UBool isDst, const UnicodeString& tzname,
+                             int32_t fromOffset, int32_t toOffset,
+                             int32_t month, int32_t weekInMonth, int32_t dayOfWeek,
+                             UDate startTime, UDate untilTime, UErrorCode& status) const;
+    void writeZonePropsByDOW_GEQ_DOM(VTZWriter& writer, UBool isDst, const UnicodeString& tzname,
+                                     int32_t fromOffset, int32_t toOffset,
+                                     int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
+                                     UDate startTime, UDate untilTime, UErrorCode& status) const;
+    void writeZonePropsByDOW_GEQ_DOM_sub(VTZWriter& writer, int32_t month, int32_t dayOfMonth,
+                                         int32_t dayOfWeek, int32_t numDays,
+                                         UDate untilTime, int32_t fromOffset, UErrorCode& status) const;
+    void writeZonePropsByDOW_LEQ_DOM(VTZWriter& writer, UBool isDst, const UnicodeString& tzname,
+                                     int32_t fromOffset, int32_t toOffset,
+                                     int32_t month, int32_t dayOfMonth, int32_t dayOfWeek,
+                                     UDate startTime, UDate untilTime, UErrorCode& status) const;
+    void writeFinalRule(VTZWriter& writer, UBool isDst, const AnnualTimeZoneRule* rule,
+                        int32_t fromRawOffset, int32_t fromDSTSavings,
+                        UDate startTime, UErrorCode& status) const;
+
+    void beginZoneProps(VTZWriter& writer, UBool isDst, const UnicodeString& tzname,
+                        int32_t fromOffset, int32_t toOffset, UDate startTime, UErrorCode& status) const;
+    void endZoneProps(VTZWriter& writer, UBool isDst, UErrorCode& status) const;
+    void beginRRULE(VTZWriter& writer, int32_t month, UErrorCode& status) const;
+    void appendUNTIL(VTZWriter& writer, const UnicodeString& until, UErrorCode& status) const;
+
+    BasicTimeZone   *tz;
+    UVector         *vtzlines;
+    UnicodeString   tzurl;
+    UDate           lastmod;
+    UnicodeString   olsonzid;
+    UnicodeString   icutzver;
+
+public:
+    /**
+     * Return the class ID for this class. This is useful only for comparing to
+     * a return value from getDynamicClassID(). For example:
+     * <pre>
+     * .   Base* polymorphic_pointer = createPolymorphicObject();
+     * .   if (polymorphic_pointer->getDynamicClassID() ==
+     * .       erived::getStaticClassID()) ...
+     * </pre>
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 4.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+     * method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     * @stable ICU 4.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // VTZONE_H
+//eof

Modified: MacRuby/trunk/id.c
===================================================================
--- MacRuby/trunk/id.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/id.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -61,6 +61,7 @@
     selLength = sel_registerName("length");
     selSucc = sel_registerName("succ");
     selNot = sel_registerName("!");
+    selNot2 = sel_registerName("!:");
     selAlloc = sel_registerName("alloc");
     selAllocWithZone = sel_registerName("allocWithZone:");
     selCopyWithZone = sel_registerName("copyWithZone:");

Modified: MacRuby/trunk/id.h
===================================================================
--- MacRuby/trunk/id.h	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/id.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -71,6 +71,7 @@
 extern SEL selLength;
 extern SEL selSucc;
 extern SEL selNot;
+extern SEL selNot2;
 extern SEL selAlloc;
 extern SEL selAllocWithZone;
 extern SEL selCopyWithZone;

Modified: MacRuby/trunk/include/ruby/encoding.h
===================================================================
--- MacRuby/trunk/include/ruby/encoding.h	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/include/ruby/encoding.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -16,83 +16,12 @@
 extern "C" {
 #endif
 
-#ifdef HAVE_STDARG_PROTOTYPES
-# include <stdarg.h>
-#else
-# include <varargs.h>
-#endif
+#include <stdarg.h>
 
-#if WITH_OBJC
+typedef struct rb_encoding rb_encoding;
 
-#include <wctype.h>
-
-typedef CFStringEncoding rb_encoding;
-
-#else
-
-#include "ruby/oniguruma.h"
-
-#define ENCODING_INLINE_MAX 1023
-#define ENCODING_SHIFT (FL_USHIFT+10)
-#define ENCODING_MASK (ENCODING_INLINE_MAX<<ENCODING_SHIFT)
-
-#define ENCODING_SET_INLINED(obj,i) do {\
-    RBASIC(obj)->flags &= ~ENCODING_MASK;\
-    RBASIC(obj)->flags |= (i) << ENCODING_SHIFT;\
-} while (0)
-#define ENCODING_SET(obj,i) do {\
-    VALUE rb_encoding_set_obj = (obj); \
-    int encoding_set_enc_index = (i); \
-    if (encoding_set_enc_index < ENCODING_INLINE_MAX) \
-        ENCODING_SET_INLINED(rb_encoding_set_obj, encoding_set_enc_index); \
-    else \
-        rb_enc_set_index(rb_encoding_set_obj, encoding_set_enc_index); \
-} while (0)
-
-#define ENCODING_GET_INLINED(obj) ((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
-#define ENCODING_GET(obj) \
-    (ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \
-     ENCODING_GET_INLINED(obj) : \
-     rb_enc_get_index(obj))
-
-#if WITH_OBJC
-# define ENCODING_IS_ASCII8BIT(obj) (1)
-#else
-# define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0)
-#endif
-
-#define ENC_CODERANGE_MASK	(FL_USER8|FL_USER9)
-#define ENC_CODERANGE_UNKNOWN	0
-#define ENC_CODERANGE_7BIT	FL_USER8
-#define ENC_CODERANGE_VALID	FL_USER9
-#define ENC_CODERANGE_BROKEN	(FL_USER8|FL_USER9)
-#define ENC_CODERANGE(obj) (RBASIC(obj)->flags & ENC_CODERANGE_MASK)
-#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_7BIT)
-#define ENC_CODERANGE_SET(obj,cr) (RBASIC(obj)->flags = \
-				   (RBASIC(obj)->flags & ~ENC_CODERANGE_MASK) | (cr))
-#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0)
-
-/* assumed ASCII compatiblity */
-#define ENC_CODERANGE_AND(a, b) \
-    (a == ENC_CODERANGE_7BIT ? b : \
-     a == ENC_CODERANGE_VALID ? (b == ENC_CODERANGE_7BIT ? ENC_CODERANGE_VALID : b) : \
-     ENC_CODERANGE_UNKNOWN)
-
-#define ENCODING_CODERANGE_SET(obj, encindex, cr) \
-    do { \
-        VALUE rb_encoding_coderange_obj = (obj); \
-        ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \
-        ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \
-    } while (0)
-
-typedef OnigEncodingType rb_encoding;
-#endif
-
-#define ENCODING_MAXNAMELEN 42
-
 int rb_enc_replicate(const char *, rb_encoding *);
 int rb_define_dummy_encoding(const char *);
-#define rb_enc_to_index(enc) ((enc) ? ((enc)->ruby_encoding_index) : 0)
 int rb_enc_get_index(VALUE obj);
 void rb_enc_set_index(VALUE obj, int encindex);
 int rb_enc_find_index(const char *name);
@@ -120,26 +49,13 @@
 /* name -> rb_encoding */
 rb_encoding * rb_enc_find(const char *name);
 
-#if WITH_OBJC
-rb_encoding * rb_enc_find2(VALUE name);
-#endif
-
 /* encoding -> name */
-#if WITH_OBJC
 const char *rb_enc_name(rb_encoding *);
 VALUE rb_enc_name2(rb_encoding *);
-#else
-#define rb_enc_name(enc) (enc)->name
-#endif
 
 /* encoding -> minlen/maxlen */
-#if WITH_OBJC
 long rb_enc_mbminlen(rb_encoding *);
 long rb_enc_mbmaxlen(rb_encoding *);
-#else
-#define rb_enc_mbminlen(enc) (enc)->min_enc_len
-#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
-#endif
 
 /* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */
 int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc);
@@ -174,7 +90,8 @@
 /* ptr, ptr, encoding -> newline_or_not */
 #define rb_enc_is_newline(p,end,enc)  ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)(p),(UChar*)(end))
 
-#if WITH_OBJC
+#include <wctype.h>
+
 #define rb_enc_isctype(c,t,enc)	(iswctype(c,t))
 #define rb_enc_isascii(c,enc)	(iswascii(c))
 #define rb_enc_isalpha(c,enc)	(iswalpha(c))
@@ -184,17 +101,6 @@
 #define rb_enc_isprint(c,enc)	(iswprint(c))
 #define rb_enc_isspace(c,enc)	(iswspace(c))
 #define rb_enc_isdigit(c,enc)	(iswdigit(c))
-#else
-#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
-#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
-#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)
-#define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER(enc,c)
-#define rb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER(enc,c)
-#define rb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM(enc,c)
-#define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT(enc,c)
-#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c)
-#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c)
-#endif
 
 #define rb_enc_asciicompat(enc) (rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc))
 
@@ -218,31 +124,18 @@
 int rb_ascii8bit_encindex(void);
 VALUE rb_enc_default_external(void);
 void rb_enc_set_default_external(VALUE encoding);
-//VALUE rb_locale_charmap(VALUE klass);
 long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
 
 VALUE rb_num_to_chr(VALUE, rb_encoding *);
 	
 RUBY_EXTERN VALUE rb_cEncoding;
 
-#define ENC_UNINITIALIZED (&rb_cEncoding)
-#define enc_initialized_p(enc) ((enc)->auxiliary_data != &rb_cEncoding)
-#define ENC_FROM_ENCODING(enc) ((VALUE)(enc)->auxiliary_data)
-
-#define ENC_DUMMY_FLAG FL_USER2
-#define ENC_DUMMY_P(enc) (RBASIC(enc)->flags & ENC_DUMMY_FLAG)
-#define ENC_SET_DUMMY(enc) (RBASIC(enc)->flags |= ENC_DUMMY_FLAG)
-
-#if WITH_OBJC
-# define rb_enc_dummy_p(x) (Qfalse)
-#else
 static inline int
 rb_enc_dummy_p(rb_encoding *enc)
 {
-    if (!enc_initialized_p(enc)) return Qfalse;
-    return ENC_DUMMY_P(ENC_FROM_ENCODING(enc));
+    // TODO
+    return Qfalse;
 }
-#endif
 
 VALUE rb_str_transcode(VALUE str, VALUE to);
 

Modified: MacRuby/trunk/include/ruby/intern.h
===================================================================
--- MacRuby/trunk/include/ruby/intern.h	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/include/ruby/intern.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -505,18 +505,22 @@
 unsigned int rb_genrand_int32(void);
 double rb_genrand_real(void);
 /* re.c */
+VALUE rb_reg_compile(VALUE str, int options);
+VALUE rb_reg_check_preprocess(VALUE);
+void rb_match_busy(VALUE match);
 #define rb_memcmp memcmp
 int rb_memcicmp(const void*,const void*,long);
 VALUE rb_reg_nth_defined(int, VALUE);
 VALUE rb_reg_nth_match(int, VALUE);
 VALUE rb_reg_last_match(VALUE);
 VALUE rb_reg_match_last(VALUE);
+VALUE rb_reg_match_pre(VALUE);
+VALUE rb_reg_match_post(VALUE);
 #define HAVE_RB_REG_NEW_STR 1
 VALUE rb_reg_new_str(VALUE, int);
 VALUE rb_reg_new(const char *, long, int);
 VALUE rb_reg_match(VALUE, VALUE);
 int rb_reg_options(VALUE);
-VALUE rb_reg_eqq(VALUE, SEL, VALUE);
 void rb_set_kcode(const char*);
 const char* rb_get_kcode(void);
 /* ruby.c */
@@ -583,22 +587,39 @@
 VALUE rb_str_append(VALUE, VALUE);
 VALUE rb_str_concat(VALUE, VALUE);
 VALUE rb_str_plus(VALUE str1, VALUE str2);
-int rb_memhash(const void *ptr, long len);
-int rb_str_hash(VALUE);
+long rb_memhash(const void *ptr, long len);
+unsigned long rb_str_hash(VALUE);
 int rb_str_hash_cmp(VALUE,VALUE);
 int rb_str_comparable(VALUE, VALUE);
 int rb_str_cmp(VALUE, VALUE);
+int rb_str_casecmp(VALUE, VALUE);
 VALUE rb_str_equal(VALUE str1, VALUE str2);
 void rb_str_update(VALUE, long, long, VALUE);
+void rb_str_delete(VALUE str, long beg, long len);
 VALUE rb_str_split(VALUE, const char*);
 void rb_str_associate(VALUE, VALUE);
 VALUE rb_str_associated(VALUE);
 void rb_str_setter(VALUE, ID, VALUE*);
 VALUE rb_sym_to_s(VALUE);
 VALUE rb_str_length(VALUE);
+VALUE rb_str_inspect(VALUE);
 #if WITH_OBJC
 bool rb_objc_str_is_pure(VALUE);
 #endif
+
+// Return a string object appropriate for bstr_ calls. This does nothing for
+// data/binary RubyStrings.
+VALUE rb_str_bstr(VALUE str);
+
+// Byte strings APIs. Use this only when dealing with raw data.
+VALUE rb_bstr_new(void);
+VALUE rb_bstr_new_with_data(const uint8_t *bytes, long len);
+uint8_t *rb_bstr_bytes(VALUE str);
+void rb_bstr_concat(VALUE str, const uint8_t *bytes, long len);
+long rb_bstr_length(VALUE str);
+void rb_bstr_set_length(VALUE str, long len);
+void rb_bstr_resize(VALUE str, long capa);
+
 /* struct.c */
 VALUE rb_struct_new(VALUE, ...);
 VALUE rb_struct_define(const char*, ...);

Modified: MacRuby/trunk/include/ruby/io.h
===================================================================
--- MacRuby/trunk/include/ruby/io.h	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/include/ruby/io.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -29,7 +29,7 @@
     int read_fd;
     int write_fd;
 
-    CFStringRef path;
+    VALUE path;
     pid_t pid;
     int lineno;
     int mode;

Deleted: MacRuby/trunk/include/ruby/oniguruma.h
===================================================================
--- MacRuby/trunk/include/ruby/oniguruma.h	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/include/ruby/oniguruma.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,785 +0,0 @@
-#ifndef ONIGURUMA_H
-#define ONIGURUMA_H
-/**********************************************************************
-  oniguruma.h - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#if 0
-} /* satisfy cc-mode */
-#endif
-#endif
-
-#define ONIGURUMA
-#define ONIGURUMA_VERSION_MAJOR   5
-#define ONIGURUMA_VERSION_MINOR   9
-#define ONIGURUMA_VERSION_TEENY   1
-
-#ifdef __cplusplus
-# ifndef  HAVE_PROTOTYPES
-#  define HAVE_PROTOTYPES 1
-# endif
-# ifndef  HAVE_STDARG_PROTOTYPES
-#  define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
-
-/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
-#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
-# ifndef  HAVE_STDARG_PROTOTYPES
-#  define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
-
-#ifdef HAVE_STDARG_H
-# ifndef  HAVE_STDARG_PROTOTYPES
-#  define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
-
-#ifndef P_
-#if defined(__STDC__) || defined(_WIN32)
-# define P_(args) args
-#else
-# define P_(args) ()
-#endif
-#endif
-
-#ifndef PV_
-#ifdef HAVE_STDARG_PROTOTYPES
-# define PV_(args) args
-#else
-# define PV_(args) ()
-#endif
-#endif
-
-#ifndef ONIG_EXTERN
-#ifdef RUBY_EXTERN
-#define ONIG_EXTERN   RUBY_EXTERN
-#else
-#if defined(_WIN32) && !defined(__GNUC__)
-#if defined(EXPORT) || defined(RUBY_EXPORT)
-#define ONIG_EXTERN   extern __declspec(dllexport)
-#else
-#define ONIG_EXTERN   extern __declspec(dllimport)
-#endif
-#endif
-#endif
-#endif
-
-#ifndef ONIG_EXTERN
-#define ONIG_EXTERN   extern
-#endif
-
-/* PART: character encoding */
-
-#ifndef ONIG_ESCAPE_UCHAR_COLLISION
-#define UChar OnigUChar
-#endif
-
-typedef unsigned char  OnigUChar;
-typedef unsigned long  OnigCodePoint;
-typedef unsigned int   OnigCtype;
-typedef unsigned int   OnigDistance;
-
-#define ONIG_INFINITE_DISTANCE  ~((OnigDistance )0)
-
-typedef unsigned int OnigCaseFoldType; /* case fold flag */
-
-ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
-
-/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA  (1<<1) */
-/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH     (1<<2) */
-#define ONIGENC_CASE_FOLD_TURKISH_AZERI         (1<<20)
-#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR   (1<<30)
-
-#define ONIGENC_CASE_FOLD_MIN      INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
-#define ONIGENC_CASE_FOLD_DEFAULT  OnigDefaultCaseFoldFlag
-
-
-#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN       3
-#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM      13
-/* 13 => Unicode:0x1ffc */
-
-/* code range */
-#define ONIGENC_CODE_RANGE_NUM(range)     ((int )range[0])
-#define ONIGENC_CODE_RANGE_FROM(range,i)  range[((i)*2) + 1]
-#define ONIGENC_CODE_RANGE_TO(range,i)    range[((i)*2) + 2]
-
-typedef struct {
-  int byte_len;  /* argument(original) character(s) byte length */
-  int code_len;  /* number of code */
-  OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
-} OnigCaseFoldCodeItem;
-
-typedef struct {
-  OnigCodePoint esc;
-  OnigCodePoint anychar;
-  OnigCodePoint anytime;
-  OnigCodePoint zero_or_one_time;
-  OnigCodePoint one_or_more_time;
-  OnigCodePoint anychar_anytime;
-} OnigMetaCharTableType;
-  
-typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
-
-typedef struct OnigEncodingTypeST {
-  int    (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc);
-  const char*   name;
-  int           max_enc_len;
-  int           min_enc_len;
-  int    (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
-  OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
-  int    (*code_to_mbclen)(OnigCodePoint code, struct OnigEncodingTypeST* enc);
-  int    (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf, struct OnigEncodingTypeST* enc);
-  int    (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, struct OnigEncodingTypeST* enc);
-  int    (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, struct OnigEncodingTypeST* enc);
-  int    (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], struct OnigEncodingTypeST* enc);
-  int    (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end);
-  int    (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, struct OnigEncodingTypeST* enc);
-  int    (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], struct OnigEncodingTypeST* enc);
-  OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, struct OnigEncodingTypeST* enc);
-  int    (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
-  void *auxiliary_data;
-  int ruby_encoding_index;
-} OnigEncodingType;
-
-typedef OnigEncodingType* OnigEncoding;
-
-ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
-
-#define ONIG_ENCODING_ASCII        (&OnigEncodingASCII)
-
-#define ONIG_ENCODING_UNDEF    ((OnigEncoding )0)
-
-
-/* work size */
-#define ONIGENC_CODE_TO_MBC_MAXLEN       7
-#define ONIGENC_MBC_CASE_FOLD_MAXLEN    18
-/* 18: 6(max-byte) * 3(case-fold chars) */
-
-/* character types */
-#define ONIGENC_CTYPE_NEWLINE   0
-#define ONIGENC_CTYPE_ALPHA     1
-#define ONIGENC_CTYPE_BLANK     2
-#define ONIGENC_CTYPE_CNTRL     3
-#define ONIGENC_CTYPE_DIGIT     4
-#define ONIGENC_CTYPE_GRAPH     5
-#define ONIGENC_CTYPE_LOWER     6
-#define ONIGENC_CTYPE_PRINT     7
-#define ONIGENC_CTYPE_PUNCT     8
-#define ONIGENC_CTYPE_SPACE     9
-#define ONIGENC_CTYPE_UPPER    10
-#define ONIGENC_CTYPE_XDIGIT   11
-#define ONIGENC_CTYPE_WORD     12
-#define ONIGENC_CTYPE_ALNUM    13  /* alpha || digit */
-#define ONIGENC_CTYPE_ASCII    14
-#define ONIGENC_MAX_STD_CTYPE  ONIGENC_CTYPE_ASCII
-
-
-#define onig_enc_len(enc,p,e)                ONIGENC_MBC_ENC_LEN(enc, p, e)
-
-#define ONIGENC_IS_UNDEF(enc)          ((enc) == ONIG_ENCODING_UNDEF)
-#define ONIGENC_IS_SINGLEBYTE(enc)     (ONIGENC_MBC_MAXLEN(enc) == 1)
-#define ONIGENC_IS_MBC_HEAD(enc,p,e)   (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1)
-#define ONIGENC_IS_MBC_ASCII(p)           (*(p)   < 128)
-#define ONIGENC_IS_CODE_ASCII(code)       ((code) < 128)
-#define ONIGENC_IS_MBC_WORD(enc,s,end) \
-   ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
-
-
-#define ONIGENC_NAME(enc)                      ((enc)->name)
-
-#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
-  (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
-#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
-        (enc)->is_allowed_reverse_match(s,end,enc)
-#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
-        (enc)->left_adjust_char_head(start, s, enc)
-#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
-        (enc)->apply_all_case_fold(case_fold_flag,f,arg,enc)
-#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
-       (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc)
-#define ONIGENC_STEP_BACK(enc,start,s,n) \
-        onigenc_step_back((enc),(start),(s),(n))
-
-#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n)   (n)
-#define ONIGENC_MBCLEN_CHARFOUND_P(r)           (0 < (r))
-#define ONIGENC_MBCLEN_CHARFOUND_LEN(r)         (r)
-
-#define ONIGENC_CONSTRUCT_MBCLEN_INVALID()      (-1)
-#define ONIGENC_MBCLEN_INVALID_P(r)             ((r) == -1)
-
-#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)    (-1-(n))
-#define ONIGENC_MBCLEN_NEEDMORE_P(r)            ((r) < -1)
-#define ONIGENC_MBCLEN_NEEDMORE_LEN(r)          (-1-(r))
-
-#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e)   (enc)->precise_mbc_enc_len(p,e,enc)
-
-ONIG_EXTERN
-int onigenc_mbclen_approximate P_((const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc));
-
-#define ONIGENC_MBC_ENC_LEN(enc,p,e)           onigenc_mbclen_approximate(p,e,enc)
-#define ONIGENC_MBC_MAXLEN(enc)               ((enc)->max_enc_len)
-#define ONIGENC_MBC_MAXLEN_DIST(enc)           ONIGENC_MBC_MAXLEN(enc)
-#define ONIGENC_MBC_MINLEN(enc)               ((enc)->min_enc_len)
-#define ONIGENC_IS_MBC_NEWLINE(enc,p,end)      (enc)->is_mbc_newline((p),(end),enc)
-#define ONIGENC_MBC_TO_CODE(enc,p,end)         (enc)->mbc_to_code((p),(end),enc)
-#define ONIGENC_CODE_TO_MBCLEN(enc,code)       (enc)->code_to_mbclen(code,enc)
-#define ONIGENC_CODE_TO_MBC(enc,code,buf)      (enc)->code_to_mbc(code,buf,enc)
-#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
-  (enc)->property_name_to_ctype(enc,p,end)
-
-#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype)  (enc)->is_code_ctype(code,ctype,enc)
-
-#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
-#define ONIGENC_IS_CODE_GRAPH(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
-#define ONIGENC_IS_CODE_PRINT(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
-#define ONIGENC_IS_CODE_ALNUM(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
-#define ONIGENC_IS_CODE_ALPHA(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
-#define ONIGENC_IS_CODE_LOWER(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
-#define ONIGENC_IS_CODE_UPPER(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
-#define ONIGENC_IS_CODE_CNTRL(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
-#define ONIGENC_IS_CODE_PUNCT(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
-#define ONIGENC_IS_CODE_SPACE(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
-#define ONIGENC_IS_CODE_BLANK(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
-#define ONIGENC_IS_CODE_DIGIT(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
-#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
-#define ONIGENC_IS_CODE_WORD(enc,code) \
-        ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
-
-#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
-        (enc)->get_ctype_code_range(ctype,sbout,ranges,enc)
-
-ONIG_EXTERN
-OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n));
-
-
-/* encoding API */
-ONIG_EXTERN
-int onigenc_init P_((void));
-ONIG_EXTERN
-int onigenc_set_default_encoding P_((OnigEncoding enc));
-ONIG_EXTERN
-OnigEncoding onigenc_get_default_encoding P_((void));
-ONIG_EXTERN
-void  onigenc_set_default_caseconv_table P_((const OnigUChar* table));
-ONIG_EXTERN
-OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev));
-ONIG_EXTERN
-OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
-ONIG_EXTERN
-OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
-ONIG_EXTERN
-OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
-ONIG_EXTERN
-int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end));
-ONIG_EXTERN
-int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
-ONIG_EXTERN
-int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
-
-
-
-/* PART: regular expression */
-
-/* config parameters */
-#define ONIG_NREGION                          10
-#define ONIG_MAX_BACKREF_NUM                1000
-#define ONIG_MAX_REPEAT_NUM               100000
-#define ONIG_MAX_MULTI_BYTE_RANGES_NUM     10000
-/* constants */
-#define ONIG_MAX_ERROR_MESSAGE_LEN            90
-
-typedef unsigned int        OnigOptionType;
-
-#define ONIG_OPTION_DEFAULT            ONIG_OPTION_NONE
-
-/* options */
-#define ONIG_OPTION_NONE                 0U
-#define ONIG_OPTION_IGNORECASE           1U
-#define ONIG_OPTION_EXTEND               (ONIG_OPTION_IGNORECASE         << 1)
-#define ONIG_OPTION_MULTILINE            (ONIG_OPTION_EXTEND             << 1)
-#define ONIG_OPTION_SINGLELINE           (ONIG_OPTION_MULTILINE          << 1)
-#define ONIG_OPTION_FIND_LONGEST         (ONIG_OPTION_SINGLELINE         << 1)
-#define ONIG_OPTION_FIND_NOT_EMPTY       (ONIG_OPTION_FIND_LONGEST       << 1)
-#define ONIG_OPTION_NEGATE_SINGLELINE    (ONIG_OPTION_FIND_NOT_EMPTY     << 1)
-#define ONIG_OPTION_DONT_CAPTURE_GROUP   (ONIG_OPTION_NEGATE_SINGLELINE  << 1)
-#define ONIG_OPTION_CAPTURE_GROUP        (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
-/* options (search time) */
-#define ONIG_OPTION_NOTBOL               (ONIG_OPTION_CAPTURE_GROUP << 1)
-#define ONIG_OPTION_NOTEOL               (ONIG_OPTION_NOTBOL << 1)
-#define ONIG_OPTION_POSIX_REGION         (ONIG_OPTION_NOTEOL << 1)
-#define ONIG_OPTION_MAXBIT               ONIG_OPTION_POSIX_REGION  /* limit */
-
-#define ONIG_OPTION_ON(options,regopt)      ((options) |= (regopt))
-#define ONIG_OPTION_OFF(options,regopt)     ((options) &= ~(regopt))
-#define ONIG_IS_OPTION_ON(options,option)   ((options) & (option))
-
-/* syntax */
-typedef struct {
-  unsigned int   op;
-  unsigned int   op2;
-  unsigned int   behavior;
-  OnigOptionType options;   /* default option */
-  OnigMetaCharTableType meta_char_table;
-} OnigSyntaxType;
-
-ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
-ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
-ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
-ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
-ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
-ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
-ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
-ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
-ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG;
-ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
-
-/* predefined syntaxes (see regsyntax.c) */
-#define ONIG_SYNTAX_ASIS               (&OnigSyntaxASIS)
-#define ONIG_SYNTAX_POSIX_BASIC        (&OnigSyntaxPosixBasic)
-#define ONIG_SYNTAX_POSIX_EXTENDED     (&OnigSyntaxPosixExtended)
-#define ONIG_SYNTAX_EMACS              (&OnigSyntaxEmacs)
-#define ONIG_SYNTAX_GREP               (&OnigSyntaxGrep)
-#define ONIG_SYNTAX_GNU_REGEX          (&OnigSyntaxGnuRegex)
-#define ONIG_SYNTAX_JAVA               (&OnigSyntaxJava)
-#define ONIG_SYNTAX_PERL               (&OnigSyntaxPerl)
-#define ONIG_SYNTAX_PERL_NG            (&OnigSyntaxPerl_NG)
-#define ONIG_SYNTAX_RUBY               (&OnigSyntaxRuby)
-
-/* default syntax */
-ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;
-#define ONIG_SYNTAX_DEFAULT   OnigDefaultSyntax
-
-/* syntax (operators) */
-#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS    (1U<<0)
-#define ONIG_SYN_OP_DOT_ANYCHAR                 (1U<<1)   /* . */
-#define ONIG_SYN_OP_ASTERISK_ZERO_INF           (1U<<2)   /* * */
-#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF       (1U<<3)
-#define ONIG_SYN_OP_PLUS_ONE_INF                (1U<<4)   /* + */
-#define ONIG_SYN_OP_ESC_PLUS_ONE_INF            (1U<<5)
-#define ONIG_SYN_OP_QMARK_ZERO_ONE              (1U<<6)   /* ? */
-#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE          (1U<<7)
-#define ONIG_SYN_OP_BRACE_INTERVAL              (1U<<8)   /* {lower,upper} */
-#define ONIG_SYN_OP_ESC_BRACE_INTERVAL          (1U<<9)   /* \{lower,upper\} */
-#define ONIG_SYN_OP_VBAR_ALT                    (1U<<10)   /* | */
-#define ONIG_SYN_OP_ESC_VBAR_ALT                (1U<<11)  /* \| */
-#define ONIG_SYN_OP_LPAREN_SUBEXP               (1U<<12)  /* (...)   */
-#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP           (1U<<13)  /* \(...\) */
-#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR           (1U<<14)  /* \A, \Z, \z */
-#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR  (1U<<15)  /* \G     */
-#define ONIG_SYN_OP_DECIMAL_BACKREF             (1U<<16)  /* \num   */
-#define ONIG_SYN_OP_BRACKET_CC                  (1U<<17)  /* [...]  */
-#define ONIG_SYN_OP_ESC_W_WORD                  (1U<<18)  /* \w, \W */
-#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END     (1U<<19)  /* \<. \> */
-#define ONIG_SYN_OP_ESC_B_WORD_BOUND            (1U<<20)  /* \b, \B */
-#define ONIG_SYN_OP_ESC_S_WHITE_SPACE           (1U<<21)  /* \s, \S */
-#define ONIG_SYN_OP_ESC_D_DIGIT                 (1U<<22)  /* \d, \D */
-#define ONIG_SYN_OP_LINE_ANCHOR                 (1U<<23)  /* ^, $   */
-#define ONIG_SYN_OP_POSIX_BRACKET               (1U<<24)  /* [:xxxx:] */
-#define ONIG_SYN_OP_QMARK_NON_GREEDY            (1U<<25)  /* ??,*?,+?,{n,m}? */
-#define ONIG_SYN_OP_ESC_CONTROL_CHARS           (1U<<26)  /* \n,\r,\t,\a ... */
-#define ONIG_SYN_OP_ESC_C_CONTROL               (1U<<27)  /* \cx  */
-#define ONIG_SYN_OP_ESC_OCTAL3                  (1U<<28)  /* \OOO */
-#define ONIG_SYN_OP_ESC_X_HEX2                  (1U<<29)  /* \xHH */
-#define ONIG_SYN_OP_ESC_X_BRACE_HEX8            (1U<<30)  /* \x{7HHHHHHH} */
-
-#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE        (1U<<0)  /* \Q...\E */
-#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT         (1U<<1)  /* (?...) */
-#define ONIG_SYN_OP2_OPTION_PERL                (1U<<2)  /* (?imsx),(?-imsx) */
-#define ONIG_SYN_OP2_OPTION_RUBY                (1U<<3)  /* (?imx), (?-imx)  */
-#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT     (1U<<4)  /* ?+,*+,++ */
-#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL   (1U<<5)  /* {n,m}+   */
-#define ONIG_SYN_OP2_CCLASS_SET_OP              (1U<<6)  /* [...&&..[..]..] */
-#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP       (1U<<7)  /* (?<name>...) */
-#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF        (1U<<8)  /* \k<name> */
-#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL          (1U<<9)  /* \g<name>, \g<n> */
-#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY     (1U<<10) /* (?@..),(?@<x>..) */
-#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL  (1U<<11) /* \C-x */
-#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META     (1U<<12) /* \M-x */
-#define ONIG_SYN_OP2_ESC_V_VTAB                 (1U<<13) /* \v as VTAB */
-#define ONIG_SYN_OP2_ESC_U_HEX4                 (1U<<14) /* \uHHHH */
-#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR         (1U<<15) /* \`, \' */
-#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  (1U<<16) /* \p{...}, \P{...} */
-#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
-/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
-#define ONIG_SYN_OP2_ESC_H_XDIGIT               (1U<<19) /* \h, \H */
-#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE         (1U<<20) /* \ */
-
-/* syntax (behavior) */
-#define ONIG_SYN_CONTEXT_INDEP_ANCHORS           (1U<<31) /* not implemented */
-#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS        (1U<<0)  /* ?, *, +, {n,m} */
-#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS      (1U<<1)  /* error or ignore */
-#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP    (1U<<2)  /* ...)... */
-#define ONIG_SYN_ALLOW_INVALID_INTERVAL          (1U<<3)  /* {??? */
-#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV       (1U<<4)  /* {,n} => {0,n} */
-#define ONIG_SYN_STRICT_CHECK_BACKREF            (1U<<5)  /* /(\1)/,/\1()/ ..*/
-#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND   (1U<<6)  /* (?<=a|bc) */
-#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP        (1U<<7)  /* see doc/RE */
-#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8)  /* (?<x>)(?<x>) */
-#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY   (1U<<9)  /* a{n}?=(?:a{n})? */
-
-/* syntax (behavior) in char class [...] */
-#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC      (1U<<20) /* [^...] */
-#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC          (1U<<21) /* [..\w..] etc.. */
-#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC         (1U<<22)
-#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC     (1U<<23) /* [0-9-a]=[0-9\-a] */
-/* syntax (behavior) warning */
-#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED          (1U<<24) /* [,-,] */
-#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT    (1U<<25) /* (?:a*)+ */
-
-/* meta character specifiers (onig_set_meta_char()) */
-#define ONIG_META_CHAR_ESCAPE               0
-#define ONIG_META_CHAR_ANYCHAR              1
-#define ONIG_META_CHAR_ANYTIME              2
-#define ONIG_META_CHAR_ZERO_OR_ONE_TIME     3
-#define ONIG_META_CHAR_ONE_OR_MORE_TIME     4
-#define ONIG_META_CHAR_ANYCHAR_ANYTIME      5
-
-#define ONIG_INEFFECTIVE_META_CHAR          0
-
-/* error codes */
-#define ONIG_IS_PATTERN_ERROR(ecode)   ((ecode) <= -100 && (ecode) > -1000)
-/* normal return */
-#define ONIG_NORMAL                                            0
-#define ONIG_MISMATCH                                         -1
-#define ONIG_NO_SUPPORT_CONFIG                                -2
-
-/* internal error */
-#define ONIGERR_MEMORY                                         -5
-#define ONIGERR_TYPE_BUG                                       -6
-#define ONIGERR_PARSER_BUG                                    -11
-#define ONIGERR_STACK_BUG                                     -12
-#define ONIGERR_UNDEFINED_BYTECODE                            -13
-#define ONIGERR_UNEXPECTED_BYTECODE                           -14
-#define ONIGERR_MATCH_STACK_LIMIT_OVER                        -15
-#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED                -21
-#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR  -22
-/* general error */
-#define ONIGERR_INVALID_ARGUMENT                              -30 
-/* syntax error */
-#define ONIGERR_END_PATTERN_AT_LEFT_BRACE                    -100
-#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET                  -101
-#define ONIGERR_EMPTY_CHAR_CLASS                             -102
-#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS                  -103
-#define ONIGERR_END_PATTERN_AT_ESCAPE                        -104
-#define ONIGERR_END_PATTERN_AT_META                          -105
-#define ONIGERR_END_PATTERN_AT_CONTROL                       -106
-#define ONIGERR_META_CODE_SYNTAX                             -108
-#define ONIGERR_CONTROL_CODE_SYNTAX                          -109
-#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE             -110
-#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE           -111
-#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS      -112
-#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED      -113
-#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID            -114
-#define ONIGERR_NESTED_REPEAT_OPERATOR                       -115
-#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS                  -116
-#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS       -117
-#define ONIGERR_END_PATTERN_IN_GROUP                         -118
-#define ONIGERR_UNDEFINED_GROUP_OPTION                       -119
-#define ONIGERR_INVALID_POSIX_BRACKET_TYPE                   -121
-#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN                  -122
-#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN                 -123
-/* values error (syntax error) */
-#define ONIGERR_TOO_BIG_NUMBER                               -200
-#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE              -201
-#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE     -202
-#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS                    -203
-#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE          -204
-#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES                   -205
-#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING                  -206
-#define ONIGERR_TOO_BIG_BACKREF_NUMBER                       -207
-#define ONIGERR_INVALID_BACKREF                              -208
-#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED         -209
-#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE                     -212
-#define ONIGERR_EMPTY_GROUP_NAME                             -214
-#define ONIGERR_INVALID_GROUP_NAME                           -215
-#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME                   -216
-#define ONIGERR_UNDEFINED_NAME_REFERENCE                     -217
-#define ONIGERR_UNDEFINED_GROUP_REFERENCE                    -218
-#define ONIGERR_MULTIPLEX_DEFINED_NAME                       -219
-#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL               -220
-#define ONIGERR_NEVER_ENDING_RECURSION                       -221
-#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY        -222
-#define ONIGERR_INVALID_CHAR_PROPERTY_NAME                   -223
-#define ONIGERR_INVALID_CODE_POINT_VALUE                     -400
-#define ONIGERR_INVALID_WIDE_CHAR_VALUE                      -400
-#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE                      -401
-#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION           -402
-#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS               -403
-
-/* errors related to thread */
-#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT                -1001
-
-
-/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
-#define ONIG_MAX_CAPTURE_HISTORY_GROUP   31
-#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
-  ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
-
-typedef struct OnigCaptureTreeNodeStruct {
-  int group;   /* group number */
-  int beg;
-  int end;
-  int allocated;
-  int num_childs;
-  struct OnigCaptureTreeNodeStruct** childs;
-} OnigCaptureTreeNode;
-
-/* match result region type */
-struct re_registers {
-  int  allocated;
-  int  num_regs;
-  int* beg;
-  int* end;
-  /* extended */
-  OnigCaptureTreeNode* history_root;  /* capture history tree root */
-};
-
-/* capture tree traverse */
-#define ONIG_TRAVERSE_CALLBACK_AT_FIRST   1
-#define ONIG_TRAVERSE_CALLBACK_AT_LAST    2
-#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
-  ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
-
-
-#define ONIG_REGION_NOTPOS            -1
-
-typedef struct re_registers   OnigRegion;
-
-typedef struct {
-  OnigEncoding enc;
-  OnigUChar* par;
-  OnigUChar* par_end;
-} OnigErrorInfo;
-
-typedef struct {
-  int lower;
-  int upper;
-} OnigRepeatRange;
-
-typedef void (*OnigWarnFunc) P_((const char* s));
-extern void onig_null_warn P_((const char* s));
-#define ONIG_NULL_WARN       onig_null_warn
-
-#define ONIG_CHAR_TABLE_SIZE   256
-
-/* regex_t state */
-#define ONIG_STATE_NORMAL              0
-#define ONIG_STATE_SEARCHING           1
-#define ONIG_STATE_COMPILING          -1
-#define ONIG_STATE_MODIFY             -2
-
-#define ONIG_STATE(reg) \
-  ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
-
-typedef struct re_pattern_buffer {
-  /* common members of BBuf(bytes-buffer) */
-  unsigned char* p;         /* compiled pattern */
-  unsigned int used;        /* used space for p */
-  unsigned int alloc;       /* allocated space for p */
-
-  int state;                     /* normal, searching, compiling */
-  int num_mem;                   /* used memory(...) num counted from 1 */
-  int num_repeat;                /* OP_REPEAT/OP_REPEAT_NG id-counter */
-  int num_null_check;            /* OP_NULL_CHECK_START/END id counter */
-  int num_comb_exp_check;        /* combination explosion check */
-  int num_call;                  /* number of subexp call */
-  unsigned int capture_history;  /* (?@...) flag (1-31) */
-  unsigned int bt_mem_start;     /* need backtrack flag */
-  unsigned int bt_mem_end;       /* need backtrack flag */
-  int stack_pop_level;
-  int repeat_range_alloc;
-  OnigRepeatRange* repeat_range;
-
-  OnigEncoding      enc;
-  OnigOptionType    options;
-  OnigSyntaxType*   syntax;
-  OnigCaseFoldType  case_fold_flag;
-  void*             name_table;
-
-  /* optimization info (string search, char-map and anchors) */
-  int            optimize;          /* optimize flag */
-  int            threshold_len;     /* search str-length for apply optimize */
-  int            anchor;            /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
-  OnigDistance   anchor_dmin;       /* (SEMI_)END_BUF anchor distance */
-  OnigDistance   anchor_dmax;       /* (SEMI_)END_BUF anchor distance */
-  int            sub_anchor;        /* start-anchor for exact or map */
-  unsigned char *exact;
-  unsigned char *exact_end;
-  unsigned char  map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
-  int           *int_map;                   /* BM skip for exact_len > 255 */
-  int           *int_map_backward;          /* BM skip for backward search */
-  OnigDistance   dmin;                      /* min-distance of exact or map */
-  OnigDistance   dmax;                      /* max-distance of exact or map */
-
-  /* regex_t link chain */
-  struct re_pattern_buffer* chain;  /* escape compile-conflict */
-} OnigRegexType;
-
-typedef OnigRegexType*  OnigRegex;
-
-#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
-  typedef OnigRegexType  regex_t;
-#endif
-
-
-typedef struct {
-  int             num_of_elements;
-  OnigEncoding    pattern_enc;
-  OnigEncoding    target_enc;
-  OnigSyntaxType* syntax;
-  OnigOptionType  option;
-  OnigCaseFoldType   case_fold_flag;
-} OnigCompileInfo;
-
-/* Oniguruma Native API */
-ONIG_EXTERN
-int onig_init P_((void));
-ONIG_EXTERN
-int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...));
-ONIG_EXTERN
-void onig_set_warn_func P_((OnigWarnFunc f));
-ONIG_EXTERN
-void onig_set_verb_warn_func P_((OnigWarnFunc f));
-ONIG_EXTERN
-int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
-ONIG_EXTERN
-int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
-ONIG_EXTERN
-void onig_free P_((OnigRegex));
-ONIG_EXTERN
-int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
-ONIG_EXTERN
-int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
-ONIG_EXTERN
-int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
-ONIG_EXTERN
-int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
-ONIG_EXTERN
-OnigRegion* onig_region_new P_((void));
-ONIG_EXTERN
-void onig_region_init P_((OnigRegion* region));
-ONIG_EXTERN
-void onig_region_free P_((OnigRegion* region, int free_self));
-ONIG_EXTERN
-void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
-ONIG_EXTERN
-void onig_region_clear P_((OnigRegion* region));
-ONIG_EXTERN
-int onig_region_resize P_((OnigRegion* region, int n));
-ONIG_EXTERN
-int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
-ONIG_EXTERN
-int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
-ONIG_EXTERN
-int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
-ONIG_EXTERN
-int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg));
-ONIG_EXTERN
-int onig_number_of_names P_((OnigRegex reg));
-ONIG_EXTERN
-int onig_number_of_captures P_((OnigRegex reg));
-ONIG_EXTERN
-int onig_number_of_capture_histories P_((OnigRegex reg));
-ONIG_EXTERN
-OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
-ONIG_EXTERN
-int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg));
-ONIG_EXTERN
-int onig_noname_group_capture_is_active P_((OnigRegex reg));
-ONIG_EXTERN
-OnigEncoding onig_get_encoding P_((OnigRegex reg));
-ONIG_EXTERN
-OnigOptionType onig_get_options P_((OnigRegex reg));
-ONIG_EXTERN
-OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg));
-ONIG_EXTERN
-OnigSyntaxType* onig_get_syntax P_((OnigRegex reg));
-ONIG_EXTERN
-int onig_set_default_syntax P_((OnigSyntaxType* syntax));
-ONIG_EXTERN
-void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
-ONIG_EXTERN
-unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax));
-ONIG_EXTERN
-unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax));
-ONIG_EXTERN
-unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax));
-ONIG_EXTERN
-OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax));
-ONIG_EXTERN
-void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
-ONIG_EXTERN
-void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
-ONIG_EXTERN
-void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
-ONIG_EXTERN
-void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
-ONIG_EXTERN
-int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code));
-ONIG_EXTERN
-void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
-ONIG_EXTERN
-OnigCaseFoldType onig_get_default_case_fold_flag P_((void));
-ONIG_EXTERN
-int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag));
-ONIG_EXTERN
-unsigned int onig_get_match_stack_limit_size P_((void));
-ONIG_EXTERN
-int onig_set_match_stack_limit_size P_((unsigned int size));
-ONIG_EXTERN
-int onig_end P_((void));
-ONIG_EXTERN
-const char* onig_version P_((void));
-ONIG_EXTERN
-const char* onig_copyright P_((void));
-
-#ifdef __cplusplus
-#if 0
-{ /* satisfy cc-mode */
-#endif
-}
-#endif
-
-#endif /* ONIGURUMA_H */

Deleted: MacRuby/trunk/include/ruby/re.h
===================================================================
--- MacRuby/trunk/include/ruby/re.h	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/include/ruby/re.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,67 +0,0 @@
-/**********************************************************************
-
-  re.h -
-
-  $Author: akr $
-  created at: Thu Sep 30 14:18:32 JST 1993
-
-  Copyright (C) 1993-2007 Yukihiro Matsumoto
-
-**********************************************************************/
-
-#ifndef RUBY_RE_H
-#define RUBY_RE_H 1
-
-#if defined(__cplusplus)
-extern "C" {
-#if 0
-} /* satisfy cc-mode */
-#endif
-#endif
-
-#include <sys/types.h>
-#include <stdio.h>
-
-#include "ruby/regex.h"
-
-typedef struct re_pattern_buffer Regexp;
-
-struct rmatch_offset {
-    int beg;
-    int end;
-};
-
-struct rmatch {
-    struct re_registers regs;
-
-    int char_offset_updated;
-    int char_offset_num_allocated;
-    struct rmatch_offset *char_offset;
-};
-
-struct RMatch {
-    struct RBasic basic;
-    VALUE str;
-    struct rmatch *rmatch;
-    VALUE regexp;  /* RRegexp */
-};
-
-#define RMATCH(obj)  (R_CAST(RMatch)(obj))
-#define RMATCH_REGS(obj)  (&(R_CAST(RMatch)(obj))->rmatch->regs)
-
-VALUE rb_reg_regcomp(VALUE);
-int rb_reg_search(VALUE, VALUE, int, int);
-int rb_reg_search2(VALUE, VALUE, int, int, bool);
-VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE);
-int rb_reg_adjust_startpos(VALUE, VALUE, int, int);
-void rb_match_busy(VALUE);
-VALUE rb_reg_quote(VALUE);
-
-#if defined(__cplusplus)
-#if 0
-{ /* satisfy cc-mode */
-#endif
-}  /* extern "C" { */
-#endif
-
-#endif /* RUBY_RE_H */

Copied: MacRuby/trunk/include/ruby/re.h (from rev 3744, MacRuby/branches/icu/include/ruby/re.h)
===================================================================
--- MacRuby/trunk/include/ruby/re.h	                        (rev 0)
+++ MacRuby/trunk/include/ruby/re.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1 @@
+// This file is there for backward compatibility with MRI.

Deleted: MacRuby/trunk/include/ruby/regex.h
===================================================================
--- MacRuby/trunk/include/ruby/regex.h	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/include/ruby/regex.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,38 +0,0 @@
-/**********************************************************************
-
-  regex.h -
-
-  $Author: akr $
-
-  Copyright (C) 1993-2007 Yukihiro Matsumoto
-
-**********************************************************************/
-
-#ifndef ONIGURUMA_REGEX_H
-#define ONIGURUMA_REGEX_H 1
-
-#if defined(__cplusplus)
-extern "C" {
-#if 0
-} /* satisfy cc-mode */
-#endif
-#endif
-
-#include "oniguruma.h"
-
-#ifndef ONIG_RUBY_M17N
-
-ONIG_EXTERN OnigEncoding    OnigEncDefaultCharEncoding;
-
-#define mbclen(p,e,enc)  rb_enc_mbclen((p),(e),(enc))
-
-#endif /* ifndef ONIG_RUBY_M17N */
-
-#if defined(__cplusplus)
-#if 0
-{ /* satisfy cc-mode */
-#endif
-}  /* extern "C" { */
-#endif
-
-#endif /* ONIGURUMA_REGEX_H */

Modified: MacRuby/trunk/include/ruby/ruby.h
===================================================================
--- MacRuby/trunk/include/ruby/ruby.h	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/include/ruby/ruby.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -249,7 +249,8 @@
 #else
 // voodoo_float must be a function
 // because the parameter must be converted to float
-static inline VALUE voodoo_float(float f)
+static inline VALUE
+voodoo_float(float f)
 {
     return *(VALUE *)(&f);
 }
@@ -258,15 +259,9 @@
 #define FIXFLOAT_P(v)  (((VALUE)v & IMMEDIATE_MASK) == FIXFLOAT_FLAG)
 #define FIXFLOAT2DBL(v) coerce_ptr_to_double((VALUE)v)
 
-#if WITH_OBJC
-# define SYMBOL_P(x) (TYPE(x) == T_SYMBOL)
-# define ID2SYM(x) (rb_id2str((ID)x))
-# define SYM2ID(x) (RSYMBOL(x)->id)
-#else
-# define SYMBOL_P(x) (((VALUE)(x)&~(~(VALUE)0<<RUBY_SPECIAL_SHIFT))==SYMBOL_FLAG)
-# define ID2SYM(x) (((VALUE)(x)<<RUBY_SPECIAL_SHIFT)|SYMBOL_FLAG)
-# define SYM2ID(x) RSHIFT((unsigned long)x,RUBY_SPECIAL_SHIFT)
-#endif
+#define SYMBOL_P(x) (TYPE(x) == T_SYMBOL)
+#define ID2SYM(x) (rb_id2str((ID)x))
+#define SYM2ID(x) (rb_sym2id((VALUE)x))
 
 /* special contants - i.e. non-zero and non-fixnum constants */
 enum ruby_special_consts {
@@ -396,18 +391,6 @@
 #define StringValuePtr(v) rb_string_value_ptr(&(v))
 #define StringValueCStr(v) rb_string_value_cstr(&(v))
 
-VALUE rb_bytestring_new();
-VALUE rb_bytestring_new_with_data(const UInt8 *buf, long size);
-VALUE rb_bytestring_new_with_cfdata(CFMutableDataRef data);
-VALUE rb_bytestring_copy(VALUE str);
-CFMutableDataRef rb_bytestring_wrapped_data(VALUE);
-CFStringRef rb_bytestring_resolve_cfstring(VALUE str);
-UInt8 *rb_bytestring_byte_pointer(VALUE);
-VALUE rb_coerce_to_bytestring(VALUE);
-long rb_bytestring_length(VALUE str);
-void rb_bytestring_resize(VALUE str, long newsize);
-void rb_bytestring_append_bytes(VALUE str, const UInt8* bytes, long len);
-
 void rb_check_safe_obj(VALUE);
 void rb_check_safe_str(VALUE);
 #define SafeStringValue(v) do {\
@@ -658,13 +641,6 @@
 # define RARRAY_PTR(a) (rb_ary_ptr((VALUE)a)) 
 #endif
 
-struct RRegexp {
-    struct RBasic basic;
-    struct re_pattern_buffer *ptr;
-    long len;
-    char *str;
-};
-
 #if !WITH_OBJC
 struct RHash {
     struct RBasic basic;
@@ -795,17 +771,7 @@
 #define RFLOAT(obj)  (R_CAST(RFloat)(obj))
 #if WITH_OBJC
 # define RFIXNUM(obj) (R_CAST(RFixnum)(obj))
-# define RSYMBOL(obj) (R_CAST(RSymbol)(obj))
 #endif
-#define RSTRING(obj) (R_CAST(RString)(obj))
-#define RREGEXP(obj) (R_CAST(RRegexp)(obj))
-#if !WITH_OBJC
-# define RCLASS(obj)  (R_CAST(RClass)(obj))
-# define RMODULE(obj) RCLASS(obj)
-# define RSTRING(obj) (R_CAST(RString)(obj))
-# define RARRAY(obj)  (R_CAST(RArray)(obj))
-# define RHASH(obj)   (R_CAST(RHash)(obj))
-#endif
 #define RDATA(obj)   (R_CAST(RData)(obj))
 #define RSTRUCT(obj) (R_CAST(RStruct)(obj))
 #define RBIGNUM(obj) (R_CAST(RBignum)(obj))
@@ -939,34 +905,18 @@
 ID rb_intern2(const char*, long);
 ID rb_intern_str(VALUE str);
 ID rb_to_id(VALUE);
+ID rb_sym2id(VALUE sym);
 VALUE rb_id2str(ID);
 VALUE rb_name2sym(const char *);
-#if WITH_OBJC
-# define rb_sym2name(sym) (RSYMBOL(sym)->str)
+const char *rb_sym2name(VALUE sym);
+
 static inline
 const char *rb_id2name(ID val)
 {
     VALUE s = rb_id2str(val);
     return s == 0 ? NULL : rb_sym2name(s);
 }
-#else
-const char *rb_id2name(ID);
-#endif
 
-#ifdef __GNUC__
-/* __builtin_constant_p and statement expression is available
- * since gcc-2.7.2.3 at least. */
-#define rb_intern(str) \
-    (__builtin_constant_p(str) ? \
-        ({ \
-            static ID rb_intern_id_cache; \
-            if (!rb_intern_id_cache) \
-                rb_intern_id_cache = rb_intern(str); \
-            rb_intern_id_cache; \
-        }) : \
-        rb_intern(str))
-#endif
-
 const char *rb_class2name(VALUE);
 const char *rb_obj_classname(VALUE);
 
@@ -1120,7 +1070,6 @@
 RUBY_EXTERN VALUE rb_cSet;
 RUBY_EXTERN VALUE rb_cStat;
 RUBY_EXTERN VALUE rb_cString;
-RUBY_EXTERN VALUE rb_cByteString;
 RUBY_EXTERN VALUE rb_cStruct;
 RUBY_EXTERN VALUE rb_cSymbol;
 RUBY_EXTERN VALUE rb_cThread;
@@ -1134,9 +1083,9 @@
 RUBY_EXTERN VALUE rb_cRandom;
 
 #if WITH_OBJC
-RUBY_EXTERN VALUE rb_cCFString;
 RUBY_EXTERN VALUE rb_cNSString;
 RUBY_EXTERN VALUE rb_cNSMutableString;
+RUBY_EXTERN VALUE rb_cRubyString;
 RUBY_EXTERN VALUE rb_cNSArray;
 RUBY_EXTERN VALUE rb_cNSMutableArray;
 RUBY_EXTERN VALUE rb_cRubyArray;
@@ -1147,11 +1096,6 @@
 RUBY_EXTERN VALUE rb_cBoxed;
 RUBY_EXTERN VALUE rb_cPointer;
 RUBY_EXTERN VALUE rb_cTopLevel;
-
-bool __CFStringIsMutable(void *);
-#define RSTRING_IMMUTABLE(o) \
-    (*(VALUE *)o == rb_cCFString ? !__CFStringIsMutable((void *)o) : false)
-
 #endif
 
 RUBY_EXTERN VALUE rb_eException;
@@ -1257,7 +1201,7 @@
     }
 #if WITH_OBJC
     else if ((k = *(Class *)obj) != NULL) {
-	if (k == (Class)rb_cCFString) {
+	if (k == (Class)rb_cRubyString) {
 	    return T_STRING;
 	}
 	if (k == (Class)rb_cRubyArray) {
@@ -1413,20 +1357,20 @@
 /* locale insensitive functions */
 
 #define rb_isascii(c) ((unsigned long)(c) < 128)
-int rb_isalnum(int c);
-int rb_isalpha(int c);
-int rb_isblank(int c);
-int rb_iscntrl(int c);
-int rb_isdigit(int c);
-int rb_isgraph(int c);
-int rb_islower(int c);
-int rb_isprint(int c);
-int rb_ispunct(int c);
-int rb_isspace(int c);
-int rb_isupper(int c);
-int rb_isxdigit(int c);
-int rb_tolower(int c);
-int rb_toupper(int c);
+#define rb_isalnum(c) (rb_isascii(c) && isalnum(c))
+#define rb_isalpha(c) (rb_isascii(c) && isalpha(c))
+#define rb_isblank(c) (rb_isascii(c) && isblank(c))
+#define rb_iscntrl(c) (rb_isascii(c) && iscntrl(c))
+#define rb_isdigit(c) (rb_isascii(c) && isdigit(c))
+#define rb_isgraph(c) (rb_isascii(c) && isgraph(c))
+#define rb_islower(c) (rb_isascii(c) && islower(c))
+#define rb_isprint(c) (rb_isascii(c) && isprint(c))
+#define rb_ispunct(c) (rb_isascii(c) && ispunct(c))
+#define rb_isspace(c) (rb_isascii(c) && isspace(c))
+#define rb_isupper(c) (rb_isascii(c) && isupper(c))
+#define rb_isxdigit(c) (rb_isascii(c) && isxdigit(c))
+#define rb_tolower(c) (rb_isascii(c) && tolower(c))
+#define rb_toupper(c) (rb_isascii(c) && toupper(c))
 
 #ifndef ISPRINT
 #define ISASCII(c) rb_isascii((unsigned char)(c))

Modified: MacRuby/trunk/inits.c
===================================================================
--- MacRuby/trunk/inits.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/inits.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -36,7 +36,8 @@
 void Init_Object(void);
 void Init_pack(void);
 void Init_Precision(void);
-void Init_sym(void);
+void Init_Symbol(void);
+void Init_PreSymbol(void);
 void Init_id(void);
 void Init_process(void);
 void Init_Random(void);
@@ -63,7 +64,7 @@
 void
 rb_call_inits()
 {
-    Init_sym();
+    Init_PreSymbol();
     Init_id();
     Init_var_tables();
     Init_Object();
@@ -73,6 +74,7 @@
     Init_Enumerable();
     Init_Precision();
     Init_String();
+    Init_Symbol();
     Init_Exception();
     Init_eval();
     Init_jump();

Modified: MacRuby/trunk/io.c
===================================================================
--- MacRuby/trunk/io.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/io.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -16,6 +16,7 @@
 #include "vm.h"
 #include "objc.h"
 #include "id.h"
+#include "encoding.h"
 
 #include <errno.h>
 #include <paths.h>
@@ -407,36 +408,24 @@
  */
 
 static VALUE
-io_write(VALUE io, SEL sel, VALUE to_write)
+io_write(VALUE io, SEL sel, VALUE data)
 {
     rb_secure(4);
 
     VALUE tmp = rb_io_check_io(io);
     if (NIL_P(tmp)) {
 	// receiver is not IO, dispatch the write method on it
-	return rb_vm_call(io, selWrite, 1, &to_write, false);
+	return rb_vm_call(io, selWrite, 1, &data, false);
     }
     io = tmp;
     
     rb_io_t *io_struct = ExtractIOStruct(io);
     rb_io_assert_writable(io_struct);
-    to_write = rb_obj_as_string(to_write);
+    data = rb_obj_as_string(data);
 
-    UInt8 *buffer;
-    size_t length;
-    if (CLASS_OF(to_write) == rb_cByteString) {
-	CFMutableDataRef data = rb_bytestring_wrapped_data(to_write);
-	buffer = CFDataGetMutableBytePtr(data);
-	length = CFDataGetLength(data);
-    }
-    else {
-	buffer = (UInt8 *)RSTRING_PTR(to_write);
-	if (buffer == NULL) {
-	    rb_raise(rb_eRuntimeError,
-		    "could not extract a string from the read data.");
-	}
-	length = strlen((char *)buffer);
-    }
+    data = rb_str_bstr(data);
+    const uint8_t *buffer = rb_bstr_bytes(data);
+    const long length = rb_bstr_length(data);
 
     if (length == 0) {
         return INT2FIX(0);
@@ -860,15 +849,21 @@
 rb_io_inspect(VALUE io, SEL sel)
 {
     rb_io_t *io_struct = ExtractIOStruct(io);
-    if ((io_struct == NULL) || (io_struct->path == NULL)) {
+    if (io_struct == NULL || io_struct->path == 0) {
         return rb_any_to_s(io);
     }
-    const char *status = (rb_io_is_open(io_struct) ? "" : " (closed)");
 
-    CFStringRef s = CFStringCreateWithFormat(NULL, NULL, CFSTR("#<%s:%@%s>"),
-	    rb_obj_classname(io), io_struct->path, status);
-    CFMakeCollectable(s);
-    return (VALUE)s;
+    VALUE str = rb_str_new2("#<");
+    rb_str_cat2(str, rb_obj_classname(io));
+    rb_str_cat2(str, ":");
+    rb_str_concat(str, io_struct->path);
+    if (!rb_io_is_open(io_struct)) {
+	rb_str_cat2(str, " (closed)>");
+    }
+    else {
+	rb_str_cat2(str, ">");
+    }
+    return str;
 }
 
 /*
@@ -987,24 +982,26 @@
 }
 
 static VALUE 
-rb_io_read_all(rb_io_t *io_struct, VALUE bytestring_buffer) 
+rb_io_read_all(rb_io_t *io_struct, VALUE outbuf) 
 {
+    outbuf = rb_str_bstr(outbuf);
+
     const long BUFSIZE = 512;
-    CFMutableDataRef data = rb_bytestring_wrapped_data(bytestring_buffer);
     long bytes_read = 0;
-    const long original_position = (long)CFDataGetLength(data);
-    for (;;) {
-        CFDataIncreaseLength(data, BUFSIZE);
-        UInt8 *b = CFDataGetMutableBytePtr(data) + original_position
-	    + bytes_read;
-        const long last_read = rb_io_read_internal(io_struct, b, BUFSIZE);
+    const long original_position = rb_bstr_length(outbuf);
+
+    while (true) {
+	rb_bstr_resize(outbuf, original_position + bytes_read + BUFSIZE);
+	uint8_t *bytes = rb_bstr_bytes(outbuf) + original_position + bytes_read;
+        const long last_read = rb_io_read_internal(io_struct, bytes, BUFSIZE);
         bytes_read += last_read;
 	if (last_read == 0) {
 	    break;
 	}
     }
-    CFDataSetLength(data, original_position + bytes_read);
-    return bytestring_buffer; 
+
+    rb_bstr_set_length(outbuf, original_position + bytes_read);
+    return outbuf; 
 }
 
 long
@@ -1122,13 +1119,8 @@
     rb_io_assert_readable(io_struct);
 
     if (NIL_P(outbuf)) {
-	outbuf = rb_bytestring_new();
+	outbuf = rb_bstr_new();
     }
-    else if (CLASS_OF(outbuf) != rb_cByteString) {
-	// TODO: Get the magical pointer incantations right.
-	rb_raise(rb_eIOError,
-		"writing to non-bytestrings is not supported at this time.");
-    }
 
     if (NIL_P(len)) {
         return rb_io_read_all(io_struct, outbuf);
@@ -1146,15 +1138,15 @@
 	rb_raise(rb_eArgError, "given size `%ld' is too big", size);
     }
 
-    CFMutableDataRef data = rb_bytestring_wrapped_data(outbuf);
-    CFDataIncreaseLength(data, size);
-    UInt8 *buf = CFDataGetMutableBytePtr(data);
+    outbuf = rb_str_bstr(outbuf);
+    rb_bstr_resize(outbuf, size);
+    uint8_t *bytes = rb_bstr_bytes(outbuf);
 
-    const long data_read = rb_io_read_internal(io_struct, buf, size);
+    const long data_read = rb_io_read_internal(io_struct, bytes, size);
     if (data_read == 0) {
 	return Qnil;
     }
-    CFDataSetLength(data, data_read);
+    rb_bstr_set_length(outbuf, data_read);
 
     return outbuf;
 }
@@ -1288,24 +1280,25 @@
 	}
     }
     const long line_limit = NIL_P(limit) ? -1 : FIX2LONG(limit);
-    // now that we've got our parameters, let's get down to business.
 
-    VALUE bstr = rb_bytestring_new();
-    CFMutableDataRef data = rb_bytestring_wrapped_data(bstr);
+    VALUE bstr = rb_bstr_new();
     if (line_limit != -1) {
-	CFDataIncreaseLength(data, line_limit);
-	UInt8 *b = CFDataGetMutableBytePtr(data);
-	rb_io_read_internal(io_struct, b, line_limit);
+	rb_bstr_resize(bstr, line_limit);
+	uint8_t *bytes = rb_bstr_bytes(bstr);
+	rb_io_read_internal(io_struct, bytes, line_limit);
+#if 0 // TODO
 	CFRange r = CFStringFind((CFStringRef)bstr, (CFStringRef)sep, 0);
 	if (r.location != kCFNotFound) {
 	    CFDataSetLength(data, r.location);
 	}
+#endif
     }
     else {
 	const char *sepstr = RSTRING_PTR(sep);
 	const long seplen = RSTRING_LEN(sep);
 	assert(seplen > 0);
 
+#if 0 // TODO
 	// Pre-cache if possible.
 	rb_io_read_internal(io_struct, NULL, 0);
 	if (io_struct->buf != NULL && CFDataGetLength(io_struct->buf) > 0) {
@@ -1335,23 +1328,24 @@
 	    CFDataAppendBytes(data, cache, data_read);
 	    rb_io_read_update(io_struct, data_read);
 	}
-	else {
+	else 
+#endif
+	{
 	    // Read from IO (slow).
 	    long s = 512;
 	    long data_read = 0;
-	    CFDataSetLength(data, s);
+	    rb_bstr_resize(bstr, s);
 
-	    UInt8 *buf = CFDataGetMutableBytePtr(data);
-	    UInt8 *tmp_buf = alloca(seplen);
-
+	    uint8_t *buf = rb_bstr_bytes(bstr);
+	    uint8_t *tmp_buf = (uint8_t *)malloc(seplen);
 	    while (true) {
 		if (rb_io_read_internal(io_struct, tmp_buf, seplen) != seplen) {
 		    break;
 		}
 		if (data_read >= s) {
 		    s += s;
-		    CFDataSetLength(data, s);
-		    buf = CFDataGetMutableBytePtr(data);
+		    rb_bstr_resize(bstr, s);
+		    buf = rb_bstr_bytes(bstr);
 		}
 		memcpy(&buf[data_read], tmp_buf, seplen);
 		data_read += seplen;
@@ -1360,10 +1354,12 @@
 		    break;
 		}
 	    }
+	    free(tmp_buf);
+
 	    if (data_read == 0) {
 		return Qnil;
 	    }
-	    CFDataSetLength(data, data_read);
+	    rb_bstr_set_length(bstr, data_read);
 	}
     }
     OBJ_TAINT(bstr);
@@ -3347,11 +3343,11 @@
 	io_s->pid = -1;
     }
 
-    VALUE bstr = rb_bytestring_new();
-    rb_io_read_all(ExtractIOStruct(io), bstr);
+    VALUE outbuf = rb_bstr_new();
+    rb_io_read_all(ExtractIOStruct(io), outbuf);
     rb_io_close(io);
 
-    return bstr;
+    return outbuf;
 }
 
 /*
@@ -3843,9 +3839,9 @@
 	}
     }
 
-    CFMutableDataRef data = rb_bytestring_wrapped_data(outbuf);
-    UInt8 *buf = CFDataGetMutableBytePtr(data);
-    const long length = CFDataGetLength(data);
+    outbuf = rb_str_bstr(outbuf);
+    uint8_t *bytes = rb_bstr_bytes(outbuf);
+    const long length = rb_bstr_length(outbuf);
 
     VALUE ary = rb_ary_new();
 
@@ -3854,9 +3850,9 @@
 
 	long pos = 0;
 	void *ptr;
-	while ((ptr = memchr(&buf[pos], byte, length - pos)) != NULL) {
-	    const long s =  (long)ptr - (long)&buf[pos] + 1;
-	    rb_ary_push(ary, rb_bytestring_new_with_data(&buf[pos], s));
+	while ((ptr = memchr(&bytes[pos], byte, length - pos)) != NULL) {
+	    const long s = (long)ptr - (long)&bytes[pos] + 1;
+	    rb_ary_push(ary, rb_bstr_new_with_data(&bytes[pos], s));
 	    pos += s; 
 	}
     }
@@ -3975,7 +3971,7 @@
 rb_io_external_encoding(VALUE io, SEL sel)
 {
     // TODO
-    return Qnil;
+    return (VALUE)rb_locale_encoding();
 }
 
 /*
@@ -3990,7 +3986,7 @@
 rb_io_internal_encoding(VALUE io, SEL sel)
 {
     // TODO
-    return Qnil;
+    return (VALUE)rb_locale_encoding();
 }
 
 /*

Modified: MacRuby/trunk/lib/irb/locale.rb
===================================================================
--- MacRuby/trunk/lib/irb/locale.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/lib/irb/locale.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -12,6 +12,9 @@
   class Locale
     @RCS_ID='-$Id: locale.rb 20889 2008-12-20 02:02:48Z yugui $-'
 
+    LOCALE_NAME_RE = //
+=begin
+    # This regexp can't compile on MacRuby because of named captures.
     LOCALE_NAME_RE = %r[
       (?<language>[[:alpha:]]{2})
       (?:_
@@ -24,6 +27,7 @@
        (?<modifier>.*)
       )?
     ]x
+=end
     LOCALE_DIR = "/lc/"
 
     @@legacy_encoding_alias_map = {}.freeze
@@ -31,6 +35,7 @@
     def initialize(locale = nil)
       @lang = @territory = @encoding_name = @modifier = nil
       @locale = locale || ENV["IRB_LANG"] || ENV["LC_MESSAGES"] || ENV["LC_ALL"] || ENV["LANG"] || "C" 
+=begin
       if m = LOCALE_NAME_RE.match(@locale)
 	@lang, @territory, @encoding_name, @modifier = m[:language], m[:territory], m[:codeset], m[:modifier]
 
@@ -42,6 +47,7 @@
 	  @encoding = Encoding.find(@encoding_name) rescue nil
 	end
       end
+=end
       @encoding ||= (Encoding.find('locale') rescue Encoding::ASCII_8BIT)
     end
 

Modified: MacRuby/trunk/lib/net/telnet.rb
===================================================================
--- MacRuby/trunk/lib/net/telnet.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/lib/net/telnet.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -320,7 +320,8 @@
             hexvals = line.unpack('H*')[0]
             hexvals += ' ' * (32 - hexvals.length)
             hexvals = format("%s %s %s %s  " * 4, *hexvals.unpack('a2' * 16))
-            line = line.gsub(/[\000-\037\177-\377]/n, '.')
+            # XXX MacRuby will not parse this regexp.
+            #line = line.gsub(/[\000-\037\177-\377]/n, '.')
             printf "%s 0x%5.5x: %s%s\n", dir, addr, hexvals, line
             addr += 16
             offset += 16

Modified: MacRuby/trunk/lib/rubygems/remote_fetcher.rb
===================================================================
--- MacRuby/trunk/lib/rubygems/remote_fetcher.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/lib/rubygems/remote_fetcher.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -263,9 +263,7 @@
       if data.nil?
         raise Gem::RemoteFetcher::FetchError, "error when fetching data from #{uri}"
       end
-      string = String.__new_bytestring__(data)
-      #block.call(string) if block
-      return string
+      return String.new(data)
     end
     raise "block is dead" if block_given?
 

Modified: MacRuby/trunk/lib/stringio.rb
===================================================================
--- MacRuby/trunk/lib/stringio.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/lib/stringio.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -52,7 +52,7 @@
   #
   # Creates new StringIO instance from with _string_ and _mode_.
   #
-  def initialize(string = ByteString.new, mode = nil)
+  def initialize(string = String.new, mode = nil)
     @string = string.to_str  
     @pos = 0
     @lineno = 0
@@ -127,7 +127,7 @@
   #
   # See IO#read.
   #
-  def read(length = nil, buffer = ByteString.new)
+  def read(length = nil, buffer = String.new)
     raise IOError, "not opened for reading" unless @readable
     raise TypeError unless buffer.respond_to?(:to_str)
     buffer = buffer.to_str      
@@ -152,7 +152,7 @@
   #
   # Similar to #read, but raises +EOFError+ at end of string instead of
   # returning +nil+, as well as IO#sysread does.
-  def sysread(length = nil, buffer = ByteString.new)
+  def sysread(length = nil, buffer = String.new)
     val = read(length, buffer)
     ( buffer.clear && raise(IO::EOFError, "end of file reached")) if val == nil
     val
@@ -329,7 +329,7 @@
   #   strio.getc   -> string or nil
   #
   # Gets the next character from io.
-  # Returns nil if called at end of file
+  # Returns nil if called at end of file
   def getc
     return nil if eof?
     result = string[pos]

Modified: MacRuby/trunk/lib/strscan.rb
===================================================================
--- MacRuby/trunk/lib/strscan.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/lib/strscan.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -299,11 +299,6 @@
   #   s.getch           # => "a"
   #   s.getch           # => "b"
   #   s.getch           # => nil
-  #    
-  #   # encoding: EUC-JP
-  #   s = StringScanner.new("\244\242")
-  #   s.getch           # => "あ"
-  #   s.getch           # => nil
   #
   def getch
     scan(/./m)

Modified: MacRuby/trunk/marshal.c
===================================================================
--- MacRuby/trunk/marshal.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/marshal.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -14,6 +14,7 @@
 #include "ruby/st.h"
 #include "ruby/util.h"
 #include "ruby/encoding.h"
+#include "encoding.h"
 #include "id.h"
 
 #include <math.h>
@@ -83,7 +84,15 @@
 static ID s_dump_data, s_load_data, s_alloc;
 static ID s_getbyte, s_read, s_write, s_binmode;
 
-ID rb_id_encoding(void);
+static ID
+rb_id_encoding(void)
+{
+    static ID id = 0;
+    if (id == 0) {
+	id = rb_intern("encoding");
+    }
+    return id;
+}
 
 typedef struct {
     VALUE newclass;
@@ -210,12 +219,16 @@
 w_nbyte(const char *s, int n, struct dump_arg *arg)
 {
     VALUE buf = arg->str;
-    rb_str_buf_cat(buf, s, n);
+    rb_bstr_concat(buf, (const uint8_t *)s, n);
+#if 0 // unused
     if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
-	if (arg->taint) OBJ_TAINT(buf);
+	if (arg->taint) {
+	    OBJ_TAINT(buf);
+	}
 	rb_io_write(arg->dest, 0, buf);
 	rb_str_resize(buf, 0);
     }
+#endif
 }
 
 static void
@@ -773,8 +786,9 @@
 	  case T_REGEXP:
 	    w_uclass(obj, rb_cRegexp, arg);
 	    w_byte(TYPE_REGEXP, arg);
-	    w_bytes(RREGEXP(obj)->str, RREGEXP(obj)->len, arg);
-	    w_byte((char)rb_reg_options(obj), arg);
+	    // TODO    
+	    //w_bytes(RREGEXP(obj)->str, RREGEXP(obj)->len, arg);
+	    //w_byte((char)rb_reg_options(obj), arg);
 	    break;
 
 	  case T_ARRAY:
@@ -886,10 +900,12 @@
 dump(struct dump_call_arg *arg)
 {
     w_object(arg->obj, arg->arg, arg->limit);
+#if 0 // unused
     if (arg->arg->dest) {
 	rb_io_write(arg->arg->dest, 0, arg->arg->str);
-	rb_str_resize(arg->arg->str, 0);
+	rb_bstr_resize(arg->arg->str, 0);
     }
+#endif
     return 0;
 }
 
@@ -967,19 +983,23 @@
 	}
     }
     arg->dest = 0;
+    bool got_io = false;
     if (!NIL_P(port)) {
 	if (!rb_obj_respond_to(port, s_write, Qtrue)) {
 type_error:
 	    rb_raise(rb_eTypeError, "instance of IO needed");
 	}
-	GC_WB(&arg->str, rb_bytestring_new());
+	GC_WB(&arg->str, rb_bstr_new());
+#if 0 // unused
 	GC_WB(&arg->dest, port);
+#endif
 	if (rb_obj_respond_to(port, s_binmode, Qtrue)) {
 	    rb_funcall2(port, s_binmode, 0, 0);
 	}
+	got_io = true;
     }
     else {
-	port = rb_bytestring_new();
+	port = rb_bstr_new();
 	GC_WB(&arg->str, port);
     }
 
@@ -998,6 +1018,12 @@
 
     rb_ensure(dump, (VALUE)c_arg, dump_ensure, (VALUE)arg);
 
+    // If we got an IO object as the port, make sure to write the bytestring
+    // to it before leaving!
+    if (got_io) {
+	rb_io_write(port, 0, arg->str);	
+    }
+
     return port;
 }
 
@@ -1111,9 +1137,9 @@
     }
     if (TYPE(arg->src) == T_STRING) {
 	if (RSTRING_LEN(arg->src) - arg->offset >= len) {
-	    str = rb_bytestring_new();
-	    rb_bytestring_resize(str, len + 1);
-	    UInt8 *data = rb_bytestring_byte_pointer(str);
+	    str = rb_bstr_new();
+	    rb_bstr_resize(str, len + 1);
+	    uint8_t *data = rb_bstr_bytes(str);
 	    memcpy(data, (UInt8 *)RSTRING_PTR(arg->src) + arg->offset, len);
 	    data[len] = '\0';
 	    arg->offset += len;
@@ -1733,17 +1759,7 @@
     v = rb_check_string_type(port);
     if (!NIL_P(v)) {
 	arg->taint = OBJ_TAINTED(port); /* original taintedness */
-	if (*(VALUE *)v != rb_cByteString) {
-	    // Given string is not a ByteString, let's create one based on every
-	    // character. This sucks but this is how life is.
-	    const long n = RSTRING_LEN(v);
-	    UInt8 *bytes = alloca(n + 1);
-	    for (long i = 0; i < n; i++) {
-		UniChar c = CFStringGetCharacterAtIndex((CFStringRef)v, i);
-		bytes[i] = (char)c;
-	    }
-	    v = rb_bytestring_new_with_data(bytes, n);
-	}
+	v = rb_str_bstr(v);
 	port = v;
     }
     else if (rb_obj_respond_to(port, s_getbyte, Qtrue)

Modified: MacRuby/trunk/objc.h
===================================================================
--- MacRuby/trunk/objc.h	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/objc.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -234,6 +234,8 @@
 #define RB2OC(obj) (rb_rval_to_ocid((VALUE)obj))
 #define OC2RB(obj) (rb_ocid_to_rval((id)obj))
 
+void rb_objc_exception_raise(const char *name, const char *message);
+
 #if defined(__cplusplus)
 }
 #endif

Modified: MacRuby/trunk/objc.m
===================================================================
--- MacRuby/trunk/objc.m	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/objc.m	2010-03-12 23:56:52 UTC (rev 3746)
@@ -197,32 +197,35 @@
     NSString *res = (NSString *)FilePathValue(fname);
 
     if ([res isAbsolutePath]) {
-      NSString *tmp = [res stringByResolvingSymlinksInPath];
-      // Make sure we don't have an invalid user path.
-      if ([res hasPrefix:@"~"] && [tmp isEqualTo:res]) {
-        NSString *user = [[[res pathComponents] objectAtIndex:0] substringFromIndex:1];
-        rb_raise(rb_eArgError, "user %s doesn't exist", [user UTF8String]);
-      }
-      res = tmp;
+	NSString *tmp = [res stringByResolvingSymlinksInPath];
+	// Make sure we don't have an invalid user path.
+	if ([res hasPrefix:@"~"] && [tmp isEqualTo:res]) {
+	    NSString *user = [[[res pathComponents] objectAtIndex:0]
+		substringFromIndex:1];
+	    rb_raise(rb_eArgError, "user %s doesn't exist", [user UTF8String]);
+	}
+	res = tmp;
     }
     else {
-      NSString *dir = dname != Qnil ?
-        (NSString *)FilePathValue(dname) : [[NSFileManager defaultManager] currentDirectoryPath];
+	NSString *dir = dname != Qnil
+	    ? (NSString *)FilePathValue(dname)
+	    : [[NSFileManager defaultManager] currentDirectoryPath];
 
-      if (![dir isAbsolutePath]) {
-        dir = (NSString *)rb_file_expand_path((VALUE)dir, Qnil);
-      }
+	if (![dir isAbsolutePath]) {
+	    dir = (NSString *)rb_file_expand_path((VALUE)dir, Qnil);
+	}
 
-      // stringByStandardizingPath does not expand "/." to "/".
-      if ([res isEqualTo:@"."] && [dir isEqualTo:@"/"]) {
-        res = @"/";
-      }
-      else {
-        res = [[dir stringByAppendingPathComponent:res] stringByStandardizingPath];
-      }
+	// stringByStandardizingPath does not expand "/." to "/".
+	if ([res isEqualTo:@"."] && [dir isEqualTo:@"/"]) {
+	    res = @"/";
+	}
+	else {
+	    res = [[dir stringByAppendingPathComponent:res]
+		stringByStandardizingPath];
+	}
     }
 
-    return (VALUE)[res mutableCopy];
+    return rb_str_new2([res fileSystemRepresentation]);
 }
 
 static VALUE
@@ -605,6 +608,14 @@
     return rb_exc_new2(rb_eRuntimeError, buf);
 }
 
+void
+rb_objc_exception_raise(const char *name, const char *message)
+{
+    assert(name != NULL && message != NULL);
+    [[NSException exceptionWithName:[NSString stringWithUTF8String:name]
+	reason:[NSString stringWithUTF8String:message] userInfo:nil] raise];
+}
+
 size_t
 rb_objc_type_size(const char *type)
 {

Modified: MacRuby/trunk/object.c
===================================================================
--- MacRuby/trunk/object.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/object.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -21,6 +21,7 @@
 #include <float.h>
 #include "objc.h"
 #include "vm.h"
+#include "encoding.h"
 #include "array.h"
 #include "hash.h"
 
@@ -163,11 +164,11 @@
     while (RCLASS_SINGLETON(cl)) {
 	cl = RCLASS_SUPER(cl);
     }
-    if (cl == rb_cCFString) {
-	return rb_cNSMutableString;
-    }
     if (!RCLASS_RUBY(cl)) {
 	const long v = RCLASS_VERSION(cl);
+	if (v & RCLASS_IS_STRING_SUBCLASS) {
+	    return rb_cRubyString;
+	}
 	if (v & RCLASS_IS_HASH_SUBCLASS) {
 	    return rb_cRubyHash;
 	}
@@ -814,7 +815,7 @@
 		}
 		// fall through
 	    case T_STRING:
-		if (*(VALUE *)obj == rb_cByteString) {
+		if (rb_klass_is_rstr(*(VALUE *)obj)) {
 		    return RBASIC(obj)->flags & FL_TAINT ? Qtrue : Qfalse;
 		}
 		// fall through
@@ -867,7 +868,7 @@
 		}
 		// fall through
 	    case T_STRING:
-		if (*(VALUE *)obj == rb_cByteString) {
+		if (rb_klass_is_rstr(*(VALUE *)obj)) {
 		    RBASIC(obj)->flags |= FL_TAINT;
 		    break;
 		}
@@ -921,7 +922,7 @@
 		}	
 		// fall through
 	    case T_STRING:
-		if (*(VALUE *)obj == rb_cByteString) {
+		if (rb_klass_is_rstr(*(VALUE *)obj)) {
 		    RBASIC(obj)->flags &= ~FL_TAINT;
 		    break;
 		}
@@ -965,7 +966,7 @@
 		}
 		// fall through
 	    case T_STRING:
-		if (*(VALUE *)obj == rb_cByteString) {
+		if (rb_klass_is_rstr(*(VALUE *)obj)) {
 		    return RBASIC(obj)->flags & FL_UNTRUSTED ? Qtrue : Qfalse;
 		}
 		// fall through
@@ -1005,7 +1006,7 @@
 		}
 		// fall through
 	    case T_STRING:
-		if (*(VALUE *)obj == rb_cByteString) {
+		if (rb_klass_is_rstr(*(VALUE *)obj)) {
 		    RBASIC(obj)->flags &= ~FL_UNTRUSTED;
 		    break;
 		}
@@ -1051,7 +1052,7 @@
 		}
 		// fall through
 	    case T_STRING:
-		if (*(VALUE *)obj == rb_cByteString) {
+		if (rb_klass_is_rstr(*(VALUE *)obj)) {
 		    RBASIC(obj)->flags |= FL_UNTRUSTED;
 		    break;
 		}
@@ -1136,7 +1137,7 @@
 		    }
 		    // fall through
 		case T_STRING:
-		    if (*(VALUE *)obj == rb_cByteString) {
+		    if (rb_klass_is_rstr(*(VALUE *)obj)) {
 			RBASIC(obj)->flags |= FL_FREEZE;
 			break;
 		    }
@@ -1200,7 +1201,7 @@
 	    }
 	    // fall through
 	case T_STRING:
-	    if (*(VALUE *)obj == rb_cByteString) {
+	    if (rb_klass_is_rstr(*(VALUE *)obj)) {
 		return RBASIC(obj)->flags & FL_FREEZE ? Qtrue : Qfalse;
 	    }
 	    // fall through
@@ -3094,8 +3095,8 @@
 
     rb_objc_define_method(rb_mKernel, "__native__?", rb_obj_is_native, 0);
 
-    rb_objc_define_module_function(rb_mKernel, "sprintf", rb_f_sprintf_imp, -1); /* in sprintf.cpp */
-    rb_objc_define_module_function(rb_mKernel, "format", rb_f_sprintf_imp, -1);  /* in sprintf.cpp */
+    rb_objc_define_module_function(rb_mKernel, "sprintf", rb_f_sprintf_imp, -1); /* in sprintf.c */
+    rb_objc_define_module_function(rb_mKernel, "format", rb_f_sprintf_imp, -1);  /* in sprintf.c */
 
     rb_objc_define_module_function(rb_mKernel, "Integer", rb_f_integer, -1);
     rb_objc_define_module_function(rb_mKernel, "Float", rb_f_float, 1);

Modified: MacRuby/trunk/pack.c
===================================================================
--- MacRuby/trunk/pack.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/pack.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -9,13 +9,15 @@
 
 **********************************************************************/
 
-#include "ruby/ruby.h"
-#include "ruby/node.h"
-#include "vm.h"
 #include <sys/types.h>
 #include <ctype.h>
 #include <errno.h>
 
+#include "ruby/ruby.h"
+#include "ruby/node.h"
+#include "vm.h"
+#include "encoding.h"
+
 #define SIZE16 2
 #define SIZE32 4
 
@@ -61,16 +63,16 @@
     unsigned char *s, *t;		\
     int i;				\
 					\
-    zp = xmalloc(sizeof(xtype));	\
+    zp = malloc(sizeof(xtype));	\
     *zp = z;				\
     s = (unsigned char*)zp;		\
-    t = xmalloc(sizeof(xtype));		\
+    t = malloc(sizeof(xtype));		\
     for (i=0; i<sizeof(xtype); i++) {	\
 	t[sizeof(xtype)-i-1] = s[i];	\
     }					\
     r = *(xtype *)t;			\
-    xfree(t);				\
-    xfree(zp);				\
+    free(t);				\
+    free(zp);				\
     return r;				\
 }
 
@@ -364,8 +366,8 @@
 #endif
 static const char toofew[] = "too few arguments";
 
-static void encodes(CFMutableDataRef,const char*,long,int,int);
-static void qpencode(CFMutableDataRef,VALUE,long);
+static void encodes(VALUE,const char*,long,int,int);
+static void qpencode(VALUE,VALUE,long);
 
 static unsigned long utf8_to_uv(const char*,long*);
 
@@ -452,26 +454,11 @@
     p = RSTRING_PTR(fmt);
     pend = p + RSTRING_LEN(fmt);
 
-    VALUE bres = rb_bytestring_new();
-    CFMutableDataRef data = rb_bytestring_wrapped_data(bres);
+    VALUE data = rb_bstr_new();
 
     items = RARRAY_LEN(ary);
     idx = 0;
 
-    // Taint the ByteString accordingly.
-    if (OBJ_TAINTED(fmt)) {
-	OBJ_TAINT(bres);
-    }
-    else {
-	long i;
-	for (i = 0; i < items; i++) {
-	    if (OBJ_TAINTED(RARRAY_AT(ary, i))) {
-		OBJ_TAINT(bres);
-		break;
-	    }
-	}
-    }
-
 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
 #define THISFROM (items > 0 ? RARRAY_AT(ary, idx) : TOO_FEW)
 #define NEXTFROM (items-- > 0 ? RARRAY_AT(ary, idx++) : TOO_FEW)
@@ -543,19 +530,19 @@
 	      case 'A':         /* arbitrary binary string (ASCII space padded) */
 	      case 'Z':         /* null terminated string  */
 		if (plen >= len) {
-		    CFDataAppendBytes(data, (const UInt8 *)ptr, len);
+		    rb_bstr_concat(data, (const UInt8 *)ptr, len);
 		    if (p[-1] == '*' && type == 'Z') {
-			CFDataAppendBytes(data, (const UInt8 *)nul10, 1);
+			rb_bstr_concat(data, (const UInt8 *)nul10, 1);
 		    }
 		}
 		else {
-		    CFDataAppendBytes(data, (const UInt8 *)ptr, plen);
+		    rb_bstr_concat(data, (const UInt8 *)ptr, plen);
 		    len -= plen;
 		    while (len >= 10) {
-			CFDataAppendBytes(data, (const UInt8 *)((type == 'A')?spc10:nul10), 10);
+			rb_bstr_concat(data, (const UInt8 *)((type == 'A')?spc10:nul10), 10);
 			len -= 10;
 		    }
-		    CFDataAppendBytes(data, (const UInt8 *)((type == 'A')?spc10:nul10), len);
+		    rb_bstr_concat(data, (const UInt8 *)((type == 'A')?spc10:nul10), len);
 		}
 		break;
 
@@ -575,7 +562,7 @@
 			    byte >>= 1;
 			else {
 			    char c = byte & 0xff;
-			    CFDataAppendBytes(data, (const UInt8 *)&c, 1);
+			    rb_bstr_concat(data, (const UInt8 *)&c, 1);
 			    byte = 0;
 			}
 		    }
@@ -583,7 +570,7 @@
 			char c;
 			byte >>= 7 - (len & 7);
 			c = byte & 0xff;
-			CFDataAppendBytes(data, (const UInt8 *)&c, 1);
+			rb_bstr_concat(data, (const UInt8 *)&c, 1);
 		    }
 		    len = j;
 		    goto grow;
@@ -605,7 +592,7 @@
 			    byte <<= 1;
 			else {
 			    char c = byte & 0xff;
-			    CFDataAppendBytes(data, (const UInt8 *)&c, 1);
+			    rb_bstr_concat(data, (const UInt8 *)&c, 1);
 			    byte = 0;
 			}
 		    }
@@ -613,7 +600,7 @@
 			char c;
 			byte <<= 7 - (len & 7);
 			c = byte & 0xff;
-			CFDataAppendBytes(data, (const UInt8 *)&c, 1);
+			rb_bstr_concat(data, (const UInt8 *)&c, 1);
 		    }
 		    len = j;
 		    goto grow;
@@ -638,13 +625,13 @@
 			    byte >>= 4;
 			else {
 			    char c = byte & 0xff;
-			    CFDataAppendBytes(data, (const UInt8 *)&c, 1);
+			    rb_bstr_concat(data, (const UInt8 *)&c, 1);
 			    byte = 0;
 			}
 		    }
 		    if (len & 1) {
 			char c = byte & 0xff;
-			CFDataAppendBytes(data, (const UInt8 *)&c, 1);
+			rb_bstr_concat(data, (const UInt8 *)&c, 1);
 		    }
 		    len = j;
 		    goto grow;
@@ -669,13 +656,13 @@
 			    byte <<= 4;
 			else {
 			    char c = byte & 0xff;
-			    CFDataAppendBytes(data, (const UInt8 *)&c, 1);
+			    rb_bstr_concat(data, (const UInt8 *)&c, 1);
 			    byte = 0;
 			}
 		    }
 		    if (len & 1) {
 			char c = byte & 0xff;
-			CFDataAppendBytes(data, (const UInt8 *)&c, 1);
+			rb_bstr_concat(data, (const UInt8 *)&c, 1);
 		    }
 		    len = j;
 		    goto grow;
@@ -691,7 +678,7 @@
 
 		from = NEXTFROM;
 		c = num2i32(from);
-		CFDataAppendBytes(data, (const UInt8 *)&c, 1);
+		rb_bstr_concat(data, (const UInt8 *)&c, 1);
 	    }
 	    break;
 
@@ -702,7 +689,7 @@
 
 		from = NEXTFROM;
 		s = num2i32(from);
-		CFDataAppendBytes(data, (const UInt8 *)OFF16(&s), NATINT_LEN(short,2));
+		rb_bstr_concat(data, (const UInt8 *)OFF16(&s), NATINT_LEN(short,2));
 	    }
 	    break;
 
@@ -713,7 +700,7 @@
 
 		from = NEXTFROM;
 		i = num2i32(from);
-		CFDataAppendBytes(data, (const UInt8 *)OFF32(&i), NATINT_LEN(int,4));
+		rb_bstr_concat(data, (const UInt8 *)OFF32(&i), NATINT_LEN(int,4));
 	    }
 	    break;
 
@@ -724,7 +711,7 @@
 
 		from = NEXTFROM;
 		l = num2i32(from);
-		CFDataAppendBytes(data, (const UInt8 *)OFF32(&l), NATINT_LEN(long,4));
+		rb_bstr_concat(data, (const UInt8 *)OFF32(&l), NATINT_LEN(long,4));
 	    }
 	    break;
 
@@ -735,7 +722,7 @@
 
 		from = NEXTFROM;
 		rb_quad_pack(tmp, from);
-		CFDataAppendBytes(data, (const UInt8 *)&tmp, QUAD_SIZE);
+		rb_bstr_concat(data, (const UInt8 *)&tmp, QUAD_SIZE);
 	    }
 	    break;
 
@@ -746,7 +733,7 @@
 		from = NEXTFROM;
 		s = num2i32(from);
 		s = NATINT_HTONS(s);
-		CFDataAppendBytes(data, (const UInt8 *)OFF16(&s), NATINT_LEN(short,2));
+		rb_bstr_concat(data, (const UInt8 *)OFF16(&s), NATINT_LEN(short,2));
 	    }
 	    break;
 
@@ -757,7 +744,7 @@
 		from = NEXTFROM;
 		l = num2i32(from);
 		l = NATINT_HTONL(l);
-		CFDataAppendBytes(data, (const UInt8 *)OFF32(&l), NATINT_LEN(long,4));
+		rb_bstr_concat(data, (const UInt8 *)OFF32(&l), NATINT_LEN(long,4));
 	    }
 	    break;
 
@@ -768,7 +755,7 @@
 		from = NEXTFROM;
 		s = num2i32(from);
 		s = NATINT_HTOVS(s);
-		CFDataAppendBytes(data, (const UInt8 *)OFF16(&s), NATINT_LEN(short,2));
+		rb_bstr_concat(data, (const UInt8 *)OFF16(&s), NATINT_LEN(short,2));
 	    }
 	    break;
 
@@ -779,7 +766,7 @@
 		from = NEXTFROM;
 		l = num2i32(from);
 		l = NATINT_HTOVL(l);
-		CFDataAppendBytes(data, (const UInt8 *)OFF32(&l), NATINT_LEN(long,4));
+		rb_bstr_concat(data, (const UInt8 *)OFF32(&l), NATINT_LEN(long,4));
 	    }
 	    break;
 
@@ -790,7 +777,7 @@
 
 		from = NEXTFROM;
 		f = RFLOAT_VALUE(rb_Float(from));
-		CFDataAppendBytes(data, (const UInt8 *)&f, sizeof(float));
+		rb_bstr_concat(data, (const UInt8 *)&f, sizeof(float));
 	    }
 	    break;
 
@@ -802,7 +789,7 @@
 		from = NEXTFROM;
 		f = RFLOAT_VALUE(rb_Float(from));
 		f = HTOVF(f,ftmp);
-		CFDataAppendBytes(data, (const UInt8 *)&f, sizeof(float));
+		rb_bstr_concat(data, (const UInt8 *)&f, sizeof(float));
 	    }
 	    break;
 
@@ -814,7 +801,7 @@
 		from = NEXTFROM;
 		d = RFLOAT_VALUE(rb_Float(from));
 		d = HTOVD(d,dtmp);
-		CFDataAppendBytes(data, (const UInt8 *)&d, sizeof(double));
+		rb_bstr_concat(data, (const UInt8 *)&d, sizeof(double));
 	    }
 	    break;
 
@@ -825,7 +812,7 @@
 
 		from = NEXTFROM;
 		d = RFLOAT_VALUE(rb_Float(from));
-		CFDataAppendBytes(data, (const UInt8 *)&d, sizeof(double));
+		rb_bstr_concat(data, (const UInt8 *)&d, sizeof(double));
 	    }
 	    break;
 
@@ -837,7 +824,7 @@
 		from = NEXTFROM;
 		f = RFLOAT_VALUE(rb_Float(from));
 		f = HTONF(f,ftmp);
-		CFDataAppendBytes(data, (const UInt8 *)&f, sizeof(float));
+		rb_bstr_concat(data, (const UInt8 *)&f, sizeof(float));
 	    }
 	    break;
 
@@ -849,30 +836,30 @@
 		from = NEXTFROM;
 		d = RFLOAT_VALUE(rb_Float(from));
 		d = HTOND(d,dtmp);
-		CFDataAppendBytes(data, (const UInt8 *)&d, sizeof(double));
+		rb_bstr_concat(data, (const UInt8 *)&d, sizeof(double));
 	    }
 	    break;
 
 	  case 'x':		/* null byte */
 	  grow:
 	    while (len >= 10) {
-		CFDataAppendBytes(data, (const UInt8 *)nul10, 10);
+		rb_bstr_concat(data, (const UInt8 *)nul10, 10);
 		len -= 10;
 	    }
-	    CFDataAppendBytes(data, (const UInt8 *)nul10, len);
+	    rb_bstr_concat(data, (const UInt8 *)nul10, len);
 	    break;
 
 	  case 'X':		/* back up byte */
 	  shrink:
-	    plen = CFDataGetLength(data);
+	    plen = rb_bstr_length(data);
 	    if (plen < len) {
 		rb_raise(rb_eArgError, "X outside of string");
 	    }
-	    CFDataSetLength(data, plen - len);
+	    rb_bstr_set_length(data, plen - len);
 	    break;
 
 	  case '@':		/* null fill to absolute position */
-	    len -= CFDataGetLength(data);
+	    len -= rb_bstr_length(data);
 	    if (len > 0) {
 		goto grow;
 	    }
@@ -899,7 +886,7 @@
 		    rb_raise(rb_eRangeError, "pack(U): value out of range");
 		}
 		le = rb_uv_to_utf8(buf, l);
-		CFDataAppendBytes(data, (const UInt8 *)buf, le);
+		rb_bstr_concat(data, (const UInt8 *)buf, le);
 	    }
 	    break;
 
@@ -970,14 +957,14 @@
 		}
 		rb_ary_push(associates, from);
 		rb_obj_taint(from);
-		CFDataAppendBytes(data, (const UInt8 *)&t, sizeof(char*));
+		rb_bstr_concat(data, (const UInt8 *)&t, sizeof(char*));
 	    }
 	    break;
 
 	  case 'w':		/* BER compressed integer  */
 	    while (len-- > 0) {
 		unsigned long ul;
-		CFMutableDataRef bufdata = CFDataCreateMutable(NULL, 0);
+		VALUE bufdata = rb_bstr_new();
 		char c, *bufs, *bufe;
 
 		from = NEXTFROM;
@@ -986,7 +973,7 @@
 		    while (TYPE(from) == T_BIGNUM) {
 			from = rb_big_divmod(from, big128);
 			c = NUM2INT(RARRAY_AT(from, 1)) | 0x80; /* mod */
-			CFDataAppendBytes(bufdata, (const UInt8 *)&c, sizeof(char));
+			rb_bstr_concat(bufdata, (const UInt8 *)&c, sizeof(char));
 			from = RARRAY_AT(from, 0); /* div */
 		    }
 		}
@@ -999,28 +986,26 @@
 
 		while (ul) {
 		    c = ((ul & 0x7f) | 0x80);
-		    CFDataAppendBytes(bufdata, (const UInt8 *)&c, sizeof(char));
+		    rb_bstr_concat(bufdata, (const UInt8 *)&c, sizeof(char));
 		    ul >>=  7;
 		}
 
-		if (CFDataGetLength(bufdata) > 0) {
-		    UInt8 *buf_beg = CFDataGetMutableBytePtr(bufdata);
+		if (rb_bstr_length(bufdata) > 0) {
+		    UInt8 *buf_beg = rb_bstr_bytes(bufdata);
 		    bufs = (char *)buf_beg;
-		    bufe = bufs + CFDataGetLength(bufdata) - 1;
+		    bufe = bufs + rb_bstr_length(bufdata) - 1;
 		    *bufs &= 0x7f; /* clear continue bit */
 		    while (bufs < bufe) { /* reverse */
 			c = *bufs;
 			*bufs++ = *bufe;
 			*bufe-- = c;
 		    }
-		    CFDataAppendBytes(data, buf_beg, CFDataGetLength(bufdata));
+		    rb_bstr_concat(data, buf_beg, rb_bstr_length(bufdata));
 		}
 		else {
 		    c = 0;
-		    CFDataAppendBytes(data, (const UInt8 *)&c, sizeof(char));
+		    rb_bstr_concat(data, (const UInt8 *)&c, sizeof(char));
 		}
-
-		CFRelease(bufdata);
 	    }
 	    break;
 
@@ -1029,11 +1014,19 @@
 	}
     }
 
-    if (associates) {
-	rb_str_associate(bres, associates);
+    // Taint the ByteString accordingly.
+    if (OBJ_TAINTED(fmt)) {
+	OBJ_TAINT(data);
     }
-
-    return bres;
+    else {
+	for (long i = 0; i < items; i++) {
+	    if (OBJ_TAINTED(RARRAY_AT(ary, i))) {
+		OBJ_TAINT(data);
+		break;
+	    }
+	}
+    }
+    return data;
 }
 
 static const char uu_table[] =
@@ -1042,9 +1035,9 @@
 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 
 static void
-encodes(CFMutableDataRef data, const char *s, long len, int type, int tail_lf)
+encodes(VALUE data, const char *s, long len, int type, int tail_lf)
 {
-    char *buff = ALLOCA_N(char, len * 4 / 3 + 6);
+    char *buff = (char *)malloc(len * 4 / 3 + 6);
     long i = 0;
     const char *trans = type == 'u' ? uu_table : b64_table;
     int padding;
@@ -1066,7 +1059,7 @@
 	    len -= 3;
 	}
 	if (sizeof(buff) - i < 4) {
-	    CFDataAppendBytes(data, (const UInt8 *)buff, i);
+	    rb_bstr_concat(data, (const UInt8 *)buff, i);
 	    i = 0;
 	}
     }
@@ -1085,12 +1078,13 @@
     if (tail_lf) {
 	buff[i++] = '\n';
     }
-    CFDataAppendBytes(data, (const UInt8 *)buff, i);
+    rb_bstr_concat(data, (const UInt8 *)buff, i);
+    free(buff);
 }
 static const char hex_table[] = "0123456789ABCDEF";
 
 static void
-qpencode(CFMutableDataRef data, VALUE from, long len)
+qpencode(VALUE data, VALUE from, long len)
 {
     char buff[1024];
     long i = 0, n = 0, prev = EOF;
@@ -1128,7 +1122,7 @@
             prev = '\n';
         }
 	if (i > 1024 - 5) {
-	    CFDataAppendBytes(data, (const UInt8 *)buff, i);
+	    rb_bstr_concat(data, (const UInt8 *)buff, i);
 	    i = 0;
 	}
 	s++;
@@ -1138,7 +1132,7 @@
 	buff[i++] = '\n';
     }
     if (i > 0) {
-	CFDataAppendBytes(data, (const UInt8 *)buff, i);
+	rb_bstr_concat(data, (const UInt8 *)buff, i);
     }
 }
 
@@ -1453,10 +1447,10 @@
 		if (p[-1] == '*' || len > (send - s) * 8)
 		    len = (send - s) * 8;
 		bits = 0;
-		bitstr = rb_bytestring_new();
-		rb_bytestring_resize(bitstr, len);
+		bitstr = rb_bstr_new();
+		rb_bstr_resize(bitstr, len);
 		UNPACK_PUSH(bitstr);
-		t = (char *)rb_bytestring_byte_pointer(bitstr);
+		t = (char *)rb_bstr_bytes(bitstr);
 		for (i=0; i<len; i++) {
 		    if (i & 7) {
 			bits >>= 1;
@@ -1479,10 +1473,10 @@
 		if (p[-1] == '*' || len > (send - s) * 8)
 		    len = (send - s) * 8;
 		bits = 0;
-		bitstr = rb_bytestring_new();
-		rb_bytestring_resize(bitstr, len);
+		bitstr = rb_bstr_new();
+		rb_bstr_resize(bitstr, len);
 		UNPACK_PUSH(bitstr);
-		t = (char *)rb_bytestring_byte_pointer(bitstr);
+		t = (char *)rb_bstr_bytes(bitstr);
 		for (i=0; i<len; i++) {
 		    if (i & 7) {
 			bits <<= 1;
@@ -1505,10 +1499,10 @@
 		if (p[-1] == '*' || len > (send - s) * 2)
 		    len = (send - s) * 2;
 		bits = 0;
-		bitstr = rb_bytestring_new();
-		rb_bytestring_resize(bitstr, len);
+		bitstr = rb_bstr_new();
+		rb_bstr_resize(bitstr, len);
 		UNPACK_PUSH(bitstr);
-		t = (char *)rb_bytestring_byte_pointer(bitstr);
+		t = (char *)rb_bstr_bytes(bitstr);
 		for (i=0; i<len; i++) {
 		    if (i & 1) {
 			bits >>= 4;
@@ -1531,10 +1525,10 @@
 		if (p[-1] == '*' || len > (send - s) * 2)
 		    len = (send - s) * 2;
 		bits = 0;
-		bitstr = rb_bytestring_new();
-		rb_bytestring_resize(bitstr, len);
+		bitstr = rb_bstr_new();
+		rb_bstr_resize(bitstr, len);
 		UNPACK_PUSH(bitstr);
-		t = (char *)rb_bytestring_byte_pointer(bitstr);
+		t = (char *)rb_bstr_bytes(bitstr);
 		for (i=0; i<len; i++) {
 		    if (i & 1) {
 			bits <<= 4;
@@ -1789,11 +1783,11 @@
 
 	  case 'u':
 	    {
-		VALUE buf = rb_bytestring_new();
-		rb_bytestring_resize(buf, (send - s)*3/4);
-		char *ptr = (char *)rb_bytestring_byte_pointer(buf);
+		VALUE buf = rb_bstr_new();
+		rb_bstr_resize(buf, (send - s)*3/4);
+		char *ptr = (char *)rb_bstr_bytes(buf);
 		long total = 0;
-		const long buflen = rb_bytestring_length(buf);
+		const long buflen = rb_bstr_length(buf);
 
 		while (s < send && *s > ' ' && *s < 'a') {
 		    long a,b,c,d;
@@ -1852,16 +1846,16 @@
 		    }
 		}
 
-		rb_bytestring_resize(buf, total);
+		rb_bstr_resize(buf, total);
 		UNPACK_PUSH(buf);
 	    }
 	    break;
 
 	  case 'm':
 	    {
-		VALUE buf = rb_bytestring_new();
-		rb_bytestring_resize(buf, (send - s)*3/4);
-		char *ptr = (char *)rb_bytestring_byte_pointer(buf);
+		VALUE buf = rb_bstr_new();
+		rb_bstr_resize(buf, (send - s)*3/4);
+		char *ptr = (char *)rb_bstr_bytes(buf);
 		char *ptr_beg = ptr;
 		int a = -1,b = -1,c = 0,d;
 		static int first = 1;
@@ -1905,16 +1899,16 @@
 			*ptr++ = b << 4 | c >> 2;
 		    }
 		}
-		rb_bytestring_resize(buf, ptr - ptr_beg);
+		rb_bstr_resize(buf, ptr - ptr_beg);
 		UNPACK_PUSH(buf);
 	    }
 	    break;
 
 	  case 'M':
 	    {
-		VALUE buf = rb_bytestring_new();
-		rb_bytestring_resize(buf, send - s);
-		char *ptr = (char *)rb_bytestring_byte_pointer(buf);
+		VALUE buf = rb_bstr_new();
+		rb_bstr_resize(buf, send - s);
+		char *ptr = (char *)rb_bstr_bytes(buf);
 		char *ptr_beg = ptr;
 		int c1, c2;
 
@@ -1935,7 +1929,7 @@
 		    }
 		    s++;
 		}
-		rb_bytestring_resize(buf, ptr - ptr_beg);
+		rb_bstr_resize(buf, ptr - ptr_beg);
 		UNPACK_PUSH(buf);
 	    }
 	    break;

Modified: MacRuby/trunk/parse.y
===================================================================
--- MacRuby/trunk/parse.y	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/parse.y	2010-03-12 23:56:52 UTC (rev 3746)
@@ -21,8 +21,10 @@
 #include "ruby/node.h"
 #include "ruby/st.h"
 #include "ruby/encoding.h"
+#include "encoding.h"
+#include "symbol.h"
 #include "id.h"
-#include "regenc.h"
+#include "re.h"
 #include <stdio.h>
 #include <errno.h>
 #include <ctype.h>
@@ -32,36 +34,12 @@
 #define YYCALLOC(nelem, size)	rb_parser_calloc(parser, nelem, size)
 #define YYFREE(ptr)		rb_parser_free(parser, ptr)
 static inline void *orig_malloc(size_t l) { return malloc(l); }
+static inline void orig_free(void *ptr) { free(ptr); }
 #define malloc	YYMALLOC
 #define realloc	YYREALLOC
 #define calloc	YYCALLOC
 #define free	YYFREE
 
-#define ID_SCOPE_SHIFT 3
-#define ID_SCOPE_MASK 0x07
-#define ID_LOCAL      0x00
-#define ID_INSTANCE   0x01
-#define ID_GLOBAL     0x03
-#define ID_ATTRSET    0x04
-#define ID_CONST      0x05
-#define ID_CLASS      0x06
-#define ID_JUNK       0x07
-#define ID_INTERNAL   ID_JUNK
-
-#define is_notop_id(id) ((id)>tLAST_TOKEN)
-#define is_local_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_LOCAL)
-#define is_global_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_GLOBAL)
-#define is_instance_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_INSTANCE)
-#define is_attrset_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_ATTRSET)
-#define is_const_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CONST)
-#define is_class_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CLASS)
-#define is_junk_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_JUNK)
-
-#define is_asgn_or_id(id) ((is_notop_id(id)) && \
-	(((id)&ID_SCOPE_MASK) == ID_GLOBAL || \
-	 ((id)&ID_SCOPE_MASK) == ID_INSTANCE || \
-	 ((id)&ID_SCOPE_MASK) == ID_CLASS))
-
 enum lex_state_e {
     EXPR_BEG,			/* ignore newline, +/- is a sign. */
     EXPR_END,			/* newline significant, +/- is a operator. */
@@ -289,23 +267,10 @@
 };
 
 #if WITH_OBJC
+// TODO: we should probably mimic what 1.9 does here and use the right/given
+// encoding instead of always UTF8.
 # define UTF8_ENC() (NULL)
-static inline VALUE
-__new_tmp_str(const char *ptr, const size_t len)
-{
-    if (ptr != NULL) {
-	CFStringRef str = CFStringCreateWithBytes(NULL, (UInt8 *)ptr, len,
-		kCFStringEncodingUTF8, false);
-	if (str != NULL) {
-	    CFMutableStringRef str2 =
-		CFStringCreateMutableCopy(NULL, 0, str);
-	    assert(str2 != NULL);
-	    CFRelease(str);
-	    return (VALUE)CFMakeCollectable(str2);
-	}
-    }
-    return rb_usascii_str_new(ptr, len);
-}
+# define __new_tmp_str(p, n) (rb_str_new(p, n))
 # define STR_NEW(p,n) __new_tmp_str(p, n)
 # define STR_NEW0() __new_tmp_str(0, 0)
 # define STR_NEW2(p) __new_tmp_str(p, strlen(p))
@@ -319,7 +284,7 @@
 # define STR_NEW3(p,n,e,func) parser_str_new((p),(n),(e),(func),parser->enc)
 #endif
 #if WITH_OBJC
-# define STR_ENC(m) (parser->enc)
+# define STR_ENC(m) (NULL)
 # define ENC_SINGLE(cr) (1)
 #else
 # define STR_ENC(m) ((m)?parser->enc:rb_usascii_encoding())
@@ -510,7 +475,6 @@
 #define reg_fragment_check(str,options) reg_fragment_check_gen(parser, str, options)
 static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match);
 #define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,regexp,match)
-int rb_enc_symname2_p(const char *, int, rb_encoding *);
 #else
 #define remove_begin(node) (node)
 #endif /* !RIPPER */
@@ -4911,10 +4875,6 @@
 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
 #endif
 
-#if !WITH_OBJC
-#define parser_mbclen()  mbclen((lex_p-1),lex_pend,parser->enc)
-#define parser_precise_mbclen()  rb_enc_precise_mbclen((lex_p-1),lex_pend,parser->enc)
-#endif
 #define is_identchar(p,e,enc) (rb_enc_isalnum(*p,enc) || (*p) == '_' || !ISASCII(*p))
 #define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),lex_pend,parser->enc))
 
@@ -5066,62 +5026,34 @@
 }
 #endif /* !RIPPER */
 
-#ifndef RIPPER
-static VALUE
-lex_get_bstr(struct parser_params *parser, VALUE s)
-{
-    long beg = 0; 
-    const long n = rb_bytestring_length(s);
-    if (lex_gets_ptr > 0) {
-	if (n == lex_gets_ptr) {
-	    return Qnil;
-	}
-	beg += lex_gets_ptr;
-    }
+struct lex_get_str_context {
+    VALUE str;
+    UChar *chars;
+    long chars_len;
+};
 
-    const UInt8 *data = rb_bytestring_byte_pointer(s);
-    UInt8 *pos = memchr(data + beg, '\n', n - beg);
-    if (pos != NULL) {
-	lex_gets_ptr = pos - data + 1;
-    }
-    else {
-	lex_gets_ptr = n;
-    }
-
-    return rb_bytestring_new_with_data(data + beg, lex_gets_ptr - beg);
-}
-#endif
-
 static VALUE
 lex_get_str(struct parser_params *parser, VALUE s)
 {
-    long beg = 0, len; 
-    const long n = CFStringGetLength((CFStringRef)s);
+    struct lex_get_str_context *ctx = (struct lex_get_str_context *)s;
+
+    long beg = 0;
     if (lex_gets_ptr > 0) {
-	if (n == lex_gets_ptr) {
+	if (ctx->chars_len == lex_gets_ptr) {
 	    return Qnil;
 	}
 	beg += lex_gets_ptr;
     }
 
-    CFRange search_range;  
-    if (CFStringFindCharacterFromSet((CFStringRef)s, 
-		CFCharacterSetGetPredefined(kCFCharacterSetNewline),
-		CFRangeMake(beg, n - beg),
-		0,
-		&search_range)) {
-	lex_gets_ptr = search_range.location + 1;
-	len = search_range.location - beg;
+    lex_gets_ptr = ctx->chars_len;
+    for (long i = beg; i < ctx->chars_len; i++) {
+	if (ctx->chars[i] == '\n') {
+	    lex_gets_ptr = i + 1;
+	    break;
+	}
     }
-    else {
-	lex_gets_ptr = n;
-	len = lex_gets_ptr - beg;	
-    }
 
-    CFStringRef subs = CFStringCreateWithSubstring(NULL, (CFStringRef)s, 
-	    CFRangeMake(beg, lex_gets_ptr - beg));
-    CFMakeCollectable(subs);
-    return (VALUE)subs;
+    return rb_unicode_str_new(&ctx->chars[beg], lex_gets_ptr - beg);
 }
 
 static VALUE
@@ -5143,19 +5075,37 @@
     return rb_parser_compile_string(rb_parser_new(), f, s, line);
 }
 
-NODE*
+NODE *
 rb_parser_compile_string(VALUE vparser, const char *f, VALUE s, int line)
 {
     struct parser_params *parser;
     Data_Get_Struct(vparser, struct parser_params, parser);
 
-    lex_gets = CLASS_OF(s) == rb_cByteString ? lex_get_bstr : lex_get_str;
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+    rb_str_get_uchars(s, &chars, &chars_len, &need_free);
+
+    struct lex_get_str_context *ctx = (struct lex_get_str_context *)
+	xmalloc(sizeof(struct lex_get_str_context));
+    GC_WB(&ctx->str, s);
+    ctx->chars = chars;
+    ctx->chars_len = chars_len;
+
+    lex_gets = lex_get_str;
     lex_gets_ptr = 0;
-    GC_WB(&lex_input, s);
+    GC_WB(&lex_input, ctx);
     lex_pbeg = lex_p = lex_pend = 0;
     compile_for_eval = rb_parse_in_eval();
 
-    return yycompile(parser, f, line);
+    NODE *node = yycompile(parser, f, line);
+
+    if (need_free && chars != NULL) {
+	orig_free(chars);
+	chars = NULL;
+    }
+
+    return node;
 }
 
 NODE*
@@ -5652,23 +5602,19 @@
     return 0;
 }
 
-extern int rb_char_to_option_kcode(int c, int *option, int *kcode);
-
 static int
 parser_regx_options(struct parser_params *parser)
 {
-    int kcode = 0;
     int options = 0;
-    int c, opt, kc;
+    int c, opt;
 
     newtok();
     while (c = nextc(), ISALPHA(c)) {
         if (c == 'o') {
             options |= RE_OPTION_ONCE;
         }
-        else if (rb_char_to_option_kcode(c, &opt, &kc)) {
+	else if (rb_char_to_icu_option(c, &opt)) {
             options |= opt;
-	    if (kc >= 0) kcode = c;
         }
         else {
 	    tokadd(c);
@@ -5680,7 +5626,7 @@
 	compile_error(PARSER_ARG "unknown regexp option%s - %s",
 		      toklen() > 1 ? "s" : "", tok());
     }
-    return options | RE_OPTION_ENCODING(kcode);
+    return options;
 }
 
 static void
@@ -6126,6 +6072,7 @@
 static int
 parser_encode_length(struct parser_params *parser, const char *name, int len)
 {
+#if 0
     int nlen;
 
     if (len > 5 && name[nlen = len - 5] == '-') {
@@ -6138,6 +6085,7 @@
 	if (rb_memcicmp(name + nlen + 1, "mac", 3) == 0)
 	    return nlen;
     }
+#endif
     return len;
 }
 
@@ -8182,14 +8130,6 @@
     }
 }
 
-ID
-rb_id_attrset(ID id)
-{
-    id &= ~ID_SCOPE_MASK;
-    id |= ID_ATTRSET;
-    return id;
-}
-
 static NODE *
 attrset_gen(struct parser_params *parser, NODE *recv, ID id)
 {
@@ -9034,9 +8974,6 @@
 	    vtable_included(lvtbl->vars, id));
 }
 
-VALUE rb_reg_compile(VALUE str, int options);
-VALUE rb_reg_check_preprocess(VALUE);
-
 static void
 reg_fragment_setenc_gen(struct parser_params* parser, VALUE str, int options)
 {
@@ -9093,6 +9030,7 @@
     }
 }
 
+#if 0
 typedef struct {
     struct parser_params* parser;
     rb_encoding *enc;
@@ -9139,10 +9077,14 @@
         newline_node(node_assign(assignable(var,0), NEW_LIT(Qnil))));
     return ST_CONTINUE;
 }
+#endif
 
 static NODE *
 reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match)
 {
+    // TODO
+    return match;
+#if 0
     reg_named_capture_assign_t arg;
 
     arg.parser = parser;
@@ -9170,6 +9112,7 @@
                     newline_node(arg.fail_block),
                     newline_node(
                         NEW_LIT(Qnil)))));
+#endif
 }
 
 static VALUE
@@ -9274,534 +9217,53 @@
     return scope;
 }
 
-static const struct {
-    ID token;
-    const char *name;
-} op_tbl[] = {
-    {tDOT2,	".."},
-    {tDOT3,	"..."},
-    {'+',	"+"},
-    {'-',	"-"},
-    {'+',	"+(binary)"},
-    {'-',	"-(binary)"},
-    {'*',	"*"},
-    {'/',	"/"},
-    {'%',	"%"},
-    {tPOW,	"**"},
-    {tUPLUS,	"+@"},
-    {tUMINUS,	"-@"},
-    {'|',	"|"},
-    {'^',	"^"},
-    {'&',	"&"},
-    {'!',	"!"},
-    {tCMP,	"<=>"},
-    {'>',	">"},
-    {tGEQ,	">="},
-    {'<',	"<"},
-    {tLEQ,	"<="},
-    {tEQ,	"=="},
-    {tEQQ,	"==="},
-    {tNEQ,	"!="},
-    {tMATCH,	"=~"},
-    {tNMATCH,	"!~"},
-    {'~',	"~"},
-    {'!',	"!"},
-    {tAREF,	"[]"},
-    {tASET,	"[]="},
-    {tLSHFT,	"<<"},
-    {tRSHFT,	">>"},
-    {tCOLON2,	"::"},
-    {'`',	"`"},
-    {0,	0}
-};
-
-static struct symbols {
-    ID last_id;
-#if WITH_OBJC
-    CFMutableDictionaryRef sym_id;
-    CFMutableDictionaryRef id_str;
-#else
-    st_table *sym_id;
-    st_table *id_str;
-#endif
-    VALUE op_sym[tLAST_TOKEN];
-} global_symbols = {tLAST_TOKEN >> ID_SCOPE_SHIFT};
-
-static const struct st_hash_type symhash = {
-    rb_str_hash_cmp,
-    rb_str_hash,
-};
-
-struct ivar2_key {
-    ID id;
-    VALUE klass;
-};
-
-static int
-ivar2_cmp(struct ivar2_key *key1, struct ivar2_key *key2)
-{
-    if (key1->id == key2->id && key1->klass == key2->klass) {
-	return 0;
-    }
-    return 1;
-}
-
-static int
-ivar2_hash(struct ivar2_key *key)
-{
-    return (key->id << 8) ^ (key->klass >> 2);
-}
-
-static const struct st_hash_type ivar2_hash_type = {
-    ivar2_cmp,
-    ivar2_hash,
-};
-
-void
-Init_sym(void)
-{
-#if WITH_OBJC
-    global_symbols.sym_id = CFDictionaryCreateMutable(NULL,
-	0, NULL, NULL);
-    GC_ROOT(&global_symbols.sym_id);
-    global_symbols.id_str = CFDictionaryCreateMutable(NULL,
-	0, NULL, NULL);
-    GC_ROOT(&global_symbols.id_str);
-    rb_cSymbol = rb_objc_create_class("Symbol", (VALUE)objc_getClass("NSString"));
-#else
-    global_symbols.sym_id = st_init_table_with_size(&symhash, 1000);
-    global_symbols.id_str = st_init_numtable_with_size(1000);
-#endif
-    rb_intern2("", 0);
-}
-
-#if !WITH_OBJC 
-void
-rb_gc_mark_symbols(void)
-{
-    rb_mark_tbl(global_symbols.id_str);
-    rb_gc_mark_locations(global_symbols.op_sym,
-			 global_symbols.op_sym + tLAST_TOKEN);
-}
-#endif
-
 // XXX not thread-safe
 static long internal_count = 0;
 
 static ID
 internal_id_gen(struct parser_params *parser)
 {
-#if 1
     char buf[100];
     snprintf(buf, sizeof buf, "__internal_id_tmp_%ld__", internal_count++);
     return rb_intern(buf);
-#else
-    ID id = (ID)vtable_size(lvtbl->args) + (ID)vtable_size(lvtbl->vars);
-    id += ((tLAST_TOKEN - ID_INTERNAL) >> ID_SCOPE_SHIFT) + 1;
-    return ID_INTERNAL | (id << ID_SCOPE_SHIFT);
-#endif
 }
 
-static int
-is_special_global_name(const char *m, const char *e, rb_encoding *enc)
-{
-    int mb = 0;
+struct rb_op_tbl_entry rb_op_tbl[] = {
+    {'+',       "+"},
+    {'-',       "-"},
+    {'*',       "*"},
+    {'/',       "/"},
+    {'%',       "%"},
+    {'|',       "|"},
+    {'^',       "^"},
+    {'&',       "&"},
+    {'!',       "!"},
+    {'>',       ">"},
+    {'<',       "<"},
+    {'~',       "~"},
+    {'!',       "!"},
+    {'`',       "`"},
+    {tDOT2,     ".."},
+    {tDOT3,     "..."},
+    {tPOW,      "**"},
+    {tUPLUS,    "+@"},
+    {tUMINUS,   "-@"},
+    {tCMP,      "<=>"},
+    {tGEQ,      ">="},
+    {tLEQ,      "<="},
+    {tEQ,       "=="},
+    {tEQQ,      "==="},
+    {tNEQ,      "!="},
+    {tMATCH,    "=~"},
+    {tNMATCH,   "!~"},
+    {tAREF,     "[]"},
+    {tASET,     "[]="},
+    {tLSHFT,    "<<"},
+    {tRSHFT,    ">>"},
+    {tCOLON2,   "::"},
+    {0,         NULL}
+};
 
-    if (m >= e) return 0;
-    switch (*m) {
-      case '~': case '*': case '$': case '?': case '!': case '@':
-      case '/': case '\\': case ';': case ',': case '.': case '=':
-      case ':': case '<': case '>': case '\"':
-      case '&': case '`': case '\'': case '+':
-      case '0':
-	++m;
-	break;
-      case '-':
-	++m;
-	if (m < e && is_identchar(m, e, enc)) {
-	    if (!ISASCII(*m)) mb = 1;
-#if WITH_OBJC
-	    m += 1;
-#else
-	    m += rb_enc_mbclen(m, e, enc);
-#endif
-	}
-	break;
-      default:
-	if (!rb_enc_isdigit(*m, enc)) return 0;
-	do {
-	    if (!ISASCII(*m)) mb = 1;
-	    ++m;
-	} while (rb_enc_isdigit(*m, enc));
-    }
-    return m == e ? mb + 1 : 0;
-}
-
-int
-rb_symname_p(const char *name)
-{
-#if WITH_OBJC
-    return rb_enc_symname_p(name, NULL);
-#else
-    return rb_enc_symname_p(name, rb_ascii8bit_encoding());
-#endif
-}
-
-int
-rb_enc_symname_p(const char *name, rb_encoding *enc)
-{
-    return rb_enc_symname2_p(name, strlen(name), enc);
-}
-
-int
-rb_enc_symname2_p(const char *name, int len, rb_encoding *enc)
-{
-    const char *m = name;
-    const char *e = m + len;
-    int localid = Qfalse;
-
-    if (!m) return Qfalse;
-    switch (*m) {
-      case '\0':
-	return Qfalse;
-
-      case '$':
-	if (is_special_global_name(++m, e, enc)) return Qtrue;
-	goto id;
-
-      case '@':
-	if (*++m == '@') ++m;
-	goto id;
-
-      case '<':
-	switch (*++m) {
-	  case '<': ++m; break;
-	  case '=': if (*++m == '>') ++m; break;
-	  default: break;
-	}
-	break;
-
-      case '>':
-	switch (*++m) {
-	  case '>': case '=': ++m; break;
-	}
-	break;
-
-      case '=':
-	switch (*++m) {
-	  case '~': ++m; break;
-	  case '=': if (*++m == '=') ++m; break;
-	  default: return Qfalse;
-	}
-	break;
-
-      case '*':
-	if (*++m == '*') ++m;
-	break;
-
-      case '+': case '-':
-	if (*++m == '@') ++m;
-	break;
-
-      case '|': case '^': case '&': case '/': case '%': case '~': case '`':
-	++m;
-	break;
-
-      case '[':
-	if (*++m != ']') return Qfalse;
-	if (*++m == '=') ++m;
-	break;
-
-      case '!':
-	switch (*++m) {
-	  case '\0': return Qtrue;
-	  case '=': case '~': ++m; break;
-	  default: return Qfalse;
-	}
-	break;
-	    
-      default:
-	localid = !rb_enc_isupper(*m, enc);
-      id:
-	if (m >= e || (*m != '_' && !rb_enc_isalpha(*m, enc) && ISASCII(*m)))
-	    return Qfalse;
-#if WITH_OBJC
-	while (m < e && is_identchar(m, e, enc)) m += 1;
-#else
-	while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
-#endif
-	if (localid) {
-	    switch (*m) {
-	      case '!': case '?': case '=': ++m;
-	    }
-	}
-	break;
-    }
-    return *m ? Qfalse : Qtrue;
-}
-
-#if WITH_OBJC
-static inline VALUE
-rsymbol_new(const char *name, const int len, ID id)
-{
-    VALUE sym;
-
-    sym = (VALUE)orig_malloc(sizeof(struct RSymbol));
-    RSYMBOL(sym)->str = orig_malloc(len + 1);
-    RSYMBOL(sym)->klass = rb_cSymbol;
-    strcpy(RSYMBOL(sym)->str, name);
-    RSYMBOL(sym)->len = len;
-    RSYMBOL(sym)->id = id;
-    
-    return sym;
-}
-#endif
-
-ID
-rb_intern3(const char *name, long len, rb_encoding *enc)
-{
-    const char *m = name;
-    const char *e = m + len;
-    VALUE str;
-    ID id;
-    int last;
-    int mb;
-#if !WITH_OBJC
-    struct RString fake_str;
-    fake_str.basic.isa = NULL;
-    fake_str.basic.flags = T_STRING|RSTRING_NOEMBED|FL_FREEZE;
-    fake_str.basic.klass = rb_cString;
-    fake_str.as.heap.len = len;
-    fake_str.as.heap.ptr = (char *)name;
-    fake_str.as.heap.aux.capa = len;
-    str = (VALUE)&fake_str;
-    rb_enc_associate(str, enc);
-
-    if (st_lookup(global_symbols.sym_id, str, (st_data_t *)&id))
-	return id;
-#else
-    long sname = strlen(name);
-    assert(len <= sname);
-    if (sname != len) {
-	char *tmp = (char *)alloca(len + 1);
-	memcpy(tmp, name, len);
-	tmp[len] = '\0';
-	m = name = tmp;
-	e = m + len;
-    }
-    SEL name_hash = sel_registerName(name);
-    if (name_hash == sel_ignored) {
-	if (strcmp(name, "retain") == 0) {
-	    name_hash = (SEL)0x1000;
-	}
-	else if (strcmp(name, "release") == 0) {
-	    name_hash = (SEL)0x2000;
-	}
-	else if (strcmp(name, "retainCount") == 0) {
-	    name_hash = (SEL)0x3000;
-	}
-	else if (strcmp(name, "autorelease") == 0) {
-	    name_hash = (SEL)0x4000;
-	}
-	else if (strcmp(name, "dealloc") == 0) {
-	    name_hash = (SEL)0x5000;
-	}
-	else {
-	    printf("unrecognized ignored sel %s\n", name);
-	    abort();
-	}
-    }
-    id = (ID)CFDictionaryGetValue((CFDictionaryRef)global_symbols.sym_id, 
-	(const void *)name_hash);
-    if (id != 0)
-	return id; 
-#endif
-
-    last = len-1;
-    id = 0;
-    switch (*m) {
-      case '$':
-	id |= ID_GLOBAL;
-	if ((mb = is_special_global_name(++m, e, enc)) != 0) {
-	    if (!--mb) {
-#if WITH_OBJC
-		enc = NULL;
-#else
-		enc = rb_ascii8bit_encoding();
-#endif
-	    }
-	    goto new_id;
-	}
-	break;
-      case '@':
-	if (m[1] == '@') {
-	    m++;
-	    id |= ID_CLASS;
-	}
-	else {
-	    id |= ID_INSTANCE;
-	}
-	m++;
-	break;
-      default:
-	if (m[0] != '_' && rb_enc_isascii((unsigned char)m[0], enc)
-	    && !rb_enc_isalnum(m[0], enc)) {
-	    /* operators */
-	    int i;
-
-	    for (i=0; op_tbl[i].token; i++) {
-		if (*op_tbl[i].name == *m &&
-		    strcmp(op_tbl[i].name, m) == 0) {
-		    id = op_tbl[i].token;
-		    goto id_register;
-		}
-	    }
-	}
-
-	if (m[last] == '=') {
-	    /* attribute assignment */
-	    id = rb_intern3(name, last, enc);
-	    if (id > tLAST_TOKEN && !is_attrset_id(id)) {
-		enc = rb_enc_get(rb_id2str(id));
-		id = rb_id_attrset(id);
-		goto id_register;
-	    }
-	    id = ID_ATTRSET;
-	}
-	else if (rb_enc_isupper(m[0], enc)) {
-	    id = ID_CONST;
-        }
-	else {
-	    id = ID_LOCAL;
-	}
-	break;
-    }
-    mb = 0;
-    if (!rb_enc_isdigit(*m, enc)) {
-	while (m <= name + last && is_identchar(m, e, enc)) {
-	    if (ISASCII(*m)) {
-		m++;
-	    }
-	    else {
-		mb = 1;
-#if WITH_OBJC
-		m += 1;
-#else
-		m += rb_enc_mbclen(m, e, enc);
-#endif
-	    }
-	}
-    }
-    if (m - name < len) id = ID_JUNK;
-#if !WITH_OBJC
-    if (enc != rb_usascii_encoding()) {
-	/*
-	 * this clause makes sense only when called from other than
-	 * rb_intern_str() taking care of code-range.
-	 */
-	if (!mb) {
-	    for (; m <= name + len; ++m) {
-		if (!ISASCII(*m)) goto mbstr;
-	    }
-	    enc = rb_usascii_encoding();
-	}
-      mbstr:;
-    }
-#endif
-  new_id:
-    id |= ++global_symbols.last_id << ID_SCOPE_SHIFT;
-  id_register:
-    str = rsymbol_new(name, len, id);
-#if WITH_OBJC
-    CFDictionarySetValue(global_symbols.sym_id, (const void *)name_hash, 
-	(const void *)id);
-    CFDictionarySetValue(global_symbols.id_str, (const void *)id,
-	(const void *)str);
-#else
-    st_add_direct(global_symbols.sym_id, (st_data_t)str, id);
-    st_add_direct(global_symbols.id_str, id, (st_data_t)str);
-#endif
-    return id;
-}
-
-ID
-rb_intern2(const char *name, long len)
-{
-#if WITH_OBJC
-    return rb_intern3(name, len, NULL);
-#else
-    return rb_intern3(name, len, rb_usascii_encoding());
-#endif
-}
-
-#undef rb_intern
-ID
-rb_intern(const char *name)
-{
-    return rb_intern2(name, strlen(name));
-}
-
-ID
-rb_intern_str(VALUE str)
-{
-    const char *s = RSTRING_PTR(str);
-    return rb_intern3(s, strlen(s), NULL);
-}
-
-VALUE
-rb_id2str(ID id)
-{
-    st_data_t data;
-
-    if (id < tLAST_TOKEN) {
-	int i = 0;
-
-	for (i=0; op_tbl[i].token; i++) {
-	    if (op_tbl[i].token == id) {
-		VALUE str = global_symbols.op_sym[i];
-		if (!str) {
-		    str = rsymbol_new(op_tbl[i].name, strlen(op_tbl[i].name), op_tbl[i].token);
-		    global_symbols.op_sym[i] = str;
-		}
-		return str;
-	    }
-	}
-    }
-
-    data = (VALUE)CFDictionaryGetValue(
-	(CFDictionaryRef)global_symbols.id_str,
-	(const void *)id);
-    if (data != 0) {
-	return data;
-    }
-
-    if (is_attrset_id(id)) {
-	ID id2 = (id & ~ID_SCOPE_MASK) | ID_LOCAL;
-	VALUE str;
-
-	while (!(str = rb_id2str(id2))) {
-	    if (!is_local_id(id2)) return 0;
-	    id2 = (id & ~ID_SCOPE_MASK) | ID_CONST;
-	}
-	str = rb_str_dup(str);
-	rb_str_cat(str, "=", 1);
-	rb_intern_str(str);
-	data = (VALUE)CFDictionaryGetValue(
-	    (CFDictionaryRef)global_symbols.id_str,
-	    (const void *)id);
-	if (data != 0) {
-	    return data;
-	}
-    }
-    return 0;
-}
-
-VALUE
-rb_name2sym(const char *name)
-{
-    return rb_id2str(rb_intern(name));
-}
-
 const char *
 ruby_node_name(int node)
 {
@@ -9812,41 +9274,7 @@
 	    return 0;
     }
 }
- 
-/*
- *  call-seq:
- *     Symbol.all_symbols    => array
- *
- *  Returns an array of all the symbols currently in Ruby's symbol
- *  table.
- *
- *     Symbol.all_symbols.size    #=> 903
- *     Symbol.all_symbols[1,20]   #=> [:floor, :ARGV, :Binding, :symlink,
- *                                     :chown, :EOFError, :$;, :String,
- *                                     :LOCK_SH, :"setuid?", :$<,
- *                                     :default_proc, :compact, :extend,
- *                                     :Tms, :getwd, :$=, :ThreadGroup,
- *                                     :wait2, :$>]
- */
 
-VALUE
-rb_sym_all_symbols(void)
-{
-    const void **values;
-    long count;
-    VALUE ary;
-
-    ary = rb_ary_new();
-    count = CFDictionaryGetCount(global_symbols.id_str);
-    if (count == 0) {
-	return ary;
-    }
-    values = alloca(sizeof(void *) * count);
-    CFDictionaryGetKeysAndValues(global_symbols.id_str, NULL, values);
-    CFArrayReplaceValues((CFMutableArrayRef)ary, CFRangeMake(0, 0), values, count);   
-    return ary;
-}
-
 int
 rb_is_const_id(ID id)
 {
@@ -9882,6 +9310,14 @@
     return Qfalse;
 }
 
+ID
+rb_id_attrset(ID id)
+{
+    id &= ~ID_SCOPE_MASK;
+    id |= ID_ATTRSET;
+    return id;
+}
+
 #endif /* !RIPPER */
 
 static void

Modified: MacRuby/trunk/rakelib/builder/builder.rb
===================================================================
--- MacRuby/trunk/rakelib/builder/builder.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/rakelib/builder/builder.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -3,15 +3,10 @@
 OBJS = %w{
   array bignum class compar complex enum enumerator error eval file load proc 
   gc hash env inits io math numeric object pack parse prec dir process
-  random range rational re onig/regcomp onig/regext onig/regposix onig/regenc
-  onig/reggnu onig/regsyntax onig/regerror onig/regparse onig/regtrav
-  onig/regexec onig/regposerr onig/regversion onig/enc/ascii onig/enc/unicode
-  onig/enc/utf8 onig/enc/euc_jp onig/enc/sjis onig/enc/iso8859_1
-  onig/enc/utf16_be onig/enc/utf16_le onig/enc/utf32_be onig/enc/utf32_le
-  ruby signal sprintf st string struct time transcode util variable version
-  thread id objc bs encoding main dln dmyext marshal gcd
-  vm_eval prelude miniprelude gc-stub bridgesupport compiler dispatcher vm
-  debugger MacRuby MacRubyDebuggerConnector NSArray NSDictionary
+  random range rational re ruby signal sprintf st string struct time transcode
+  util variable version thread id objc bs ucnv encoding main dln dmyext marshal
+  gcd vm_eval prelude miniprelude gc-stub bridgesupport compiler dispatcher vm
+  symbol debugger MacRuby MacRubyDebuggerConnector NSArray NSDictionary
 }
 
 EXTENSIONS = %w{

Modified: MacRuby/trunk/rakelib/builder/options.rb
===================================================================
--- MacRuby/trunk/rakelib/builder/options.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/rakelib/builder/options.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -32,7 +32,8 @@
 if `sw_vers -productVersion`.strip.to_f >= 10.7 and File.exist?('/AppleInternal')
   $stderr.puts "Welcome bleeding-edge adventurer!"
   llvm_default_path = '/Developer/usr/local'
-  ENV['LLVM_TOT'] = '1'
+  #ENV['LLVM_TOT'] = '1'
+  ENV['LLVM_PRE_TOT'] = '1'
 end
 
 RUBY_INSTALL_NAME       = b.option('ruby_install_name', 'macruby')
@@ -100,27 +101,36 @@
 ARCHFLAGS = ARCHS.map { |a| '-arch ' + a }.join(' ')
 LLVM_MODULES = "core jit nativecodegen bitwriter"
 
-CC = '/usr/bin/gcc'
-CXX = '/usr/bin/g++'
-CFLAGS = "-I. -I./include -I./onig -I/usr/include/libxml2 #{ARCHFLAGS} -fno-common -pipe -O3 -g -Wall -fexceptions"
+CC = '/usr/bin/gcc-4.2'
+CXX = '/usr/bin/g++-4.2'
+CFLAGS = "-I. -I./include -I/usr/include/libxml2 #{ARCHFLAGS} -fno-common -pipe -O3 -g -Wall -fexceptions"
 CFLAGS << " -Wno-parentheses -Wno-deprecated-declarations -Werror" if NO_WARN_BUILD
 OBJC_CFLAGS = CFLAGS + " -fobjc-gc-only"
 CXXFLAGS = `#{LLVM_CONFIG} --cxxflags #{LLVM_MODULES}`.sub(/-DNDEBUG/, '').sub(/-fno-exceptions/, '').strip
 CXXFLAGS << " -I. -I./include -g -Wall #{ARCHFLAGS}"
 CXXFLAGS << " -Wno-parentheses -Wno-deprecated-declarations -Werror" if NO_WARN_BUILD
 CXXFLAGS << " -DLLVM_TOT" if ENV['LLVM_TOT']
+CXXFLAGS << " -DLLVM_PRE_TOT" if ENV['LLVM_PRE_TOT']
 LDFLAGS = `#{LLVM_CONFIG} --ldflags --libs #{LLVM_MODULES}`.strip.gsub(/\n/, '')
-LDFLAGS << " -lpthread -ldl -lxml2 -lobjc -lauto -framework Foundation"
+LDFLAGS << " -lpthread -ldl -lxml2 -lobjc -lauto -licucore -framework Foundation"
 DLDFLAGS = "-dynamiclib -undefined suppress -flat_namespace -install_name #{INSTALL_NAME} -current_version #{MACRUBY_VERSION} -compatibility_version #{MACRUBY_VERSION}"
 DLDFLAGS << " -unexported_symbols_list #{UNEXPORTED_SYMBOLS_LIST}" if UNEXPORTED_SYMBOLS_LIST
 CFLAGS << " -std=c99" # we add this one later to not conflict with C++ flags
 OBJC_CFLAGS << " -std=c99"
 
+if `sw_vers -productVersion`.to_f <= 10.6
+  CFLAGS << " -I./icu-1060"
+  CXXFLAGS << " -I./icu-1060"
+else
+  if !File.exist?('/usr/local/include/unicode')
+    $stderr.puts "Cannot locate ICU headers for this version of Mac OS X."
+    exit 1
+  end
+end
+
 OBJS_CFLAGS = {
   # Make sure everything gets inlined properly + compile as Objective-C++.
   'dispatcher' => '--param inline-unit-growth=10000 --param large-function-growth=10000 -x objective-c++',
-  # Disable optimizations to work around a silly bug.
-  're' => '-O0'
 }
 
 # We monkey-patch the method that Rake uses to display the tasks so we can add

Modified: MacRuby/trunk/random.c
===================================================================
--- MacRuby/trunk/random.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/random.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -11,6 +11,7 @@
 #include "ruby/node.h"
 #include "vm.h"
 #include "id.h"
+#include "encoding.h"
 
 #include <unistd.h>
 #include <time.h>
@@ -563,7 +564,7 @@
 	ptr[i] = (char)r;
 	r >>= CHAR_BIT;
     }
-    VALUE bytes = rb_bytestring_new_with_data(ptr, n);
+    VALUE bytes = rb_bstr_new_with_data(ptr, n);
     free(ptr);
     return bytes;
 }

Modified: MacRuby/trunk/rational.c
===================================================================
--- MacRuby/trunk/rational.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/rational.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -8,7 +8,6 @@
 #include "ruby.h"
 #include <math.h>
 #include <float.h>
-#include "ruby/re.h"
 
 #ifdef HAVE_IEEEFP_H
 #include <ieeefp.h>

Deleted: MacRuby/trunk/re.c
===================================================================
--- MacRuby/trunk/re.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/re.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,3794 +0,0 @@
-/**********************************************************************
-
-  re.c -
-
-  $Author: mame $
-  created at: Mon Aug  9 18:24:49 JST 1993
-
-  Copyright (C) 1993-2007 Yukihiro Matsumoto
-
-**********************************************************************/
-
-#include "oniguruma.h"
-#include "ruby/ruby.h"
-#include "ruby/re.h"
-#include "ruby/encoding.h"
-#include "ruby/util.h"
-#include "regint.h"
-#include "objc.h"
-#include <ctype.h>
-
-VALUE rb_eRegexpError;
-
-typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
-
-#define BEG(no) regs->beg[no]
-#define END(no) regs->end[no]
-
-#if 'a' == 97   /* it's ascii */
-static const char casetable[] = {
-        '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
-        '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
-        '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
-        '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
-        /* ' '     '!'     '"'     '#'     '$'     '%'     '&'     ''' */
-        '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
-        /* '('     ')'     '*'     '+'     ','     '-'     '.'     '/' */
-        '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
-        /* '0'     '1'     '2'     '3'     '4'     '5'     '6'     '7' */
-        '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
-        /* '8'     '9'     ':'     ';'     '<'     '='     '>'     '?' */
-        '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
-        /* '@'     'A'     'B'     'C'     'D'     'E'     'F'     'G' */
-        '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
-        /* 'H'     'I'     'J'     'K'     'L'     'M'     'N'     'O' */
-        '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
-        /* 'P'     'Q'     'R'     'S'     'T'     'U'     'V'     'W' */
-        '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
-        /* 'X'     'Y'     'Z'     '['     '\'     ']'     '^'     '_' */
-        '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
-        /* '`'     'a'     'b'     'c'     'd'     'e'     'f'     'g' */
-        '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
-        /* 'h'     'i'     'j'     'k'     'l'     'm'     'n'     'o' */
-        '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
-        /* 'p'     'q'     'r'     's'     't'     'u'     'v'     'w' */
-        '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
-        /* 'x'     'y'     'z'     '{'     '|'     '}'     '~' */
-        '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
-        '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
-        '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
-        '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
-        '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
-        '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
-        '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
-        '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
-        '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
-        '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
-        '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
-        '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
-        '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
-        '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
-        '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
-        '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
-        '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
-};
-#else
-# error >>> "You lose. You will need a translation table for your character set." <<<
-#endif
-
-int
-rb_memcicmp(const void *x, const void *y, long len)
-{
-    const unsigned char *p1 = x, *p2 = y;
-    int tmp;
-
-    while (len--) {
-	if ((tmp = casetable[(unsigned)*p1++] - casetable[(unsigned)*p2++]))
-	    return tmp;
-    }
-    return 0;
-}
-
-#undef rb_memcmp
-
-int
-rb_memcmp(const void *p1, const void *p2, long len)
-{
-    return memcmp(p1, p2, len);
-}
-
-static inline long
-rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
-{
-    const unsigned char *x = xs, *xe = xs + m;
-    const unsigned char *y = ys, *ye = ys + n;
-#ifndef VALUE_MAX
-# if SIZEOF_VALUE == 8
-#  define VALUE_MAX 0xFFFFFFFFFFFFFFFFULL
-# elif SIZEOF_VALUE == 4
-#  define VALUE_MAX 0xFFFFFFFFUL
-# endif
-#endif
-    VALUE hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT);
-
-    if (m > SIZEOF_VALUE)
-	rb_bug("!!too long pattern string!!");
-
-    	/* Prepare hash value */
-    for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
-	hx <<= CHAR_BIT;
-	hy <<= CHAR_BIT;
-	hx |= *x;
-	hy |= *y;
-    }
-    /* Searching */
-    while (hx != hy) {
-	if (y == ye)
-	    return -1;
-	hy <<= CHAR_BIT;
-	hy |= *y;
-	hy &= mask;
-	y++;
-    }
-    return y - ys - m;
-}
-
-static inline long
-rb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
-{
-    const unsigned char *x = xs, *xe = xs + m;
-    const unsigned char *y = ys;
-    VALUE i, qstable[256];
-
-    /* Preprocessing */
-    for (i = 0; i < 256; ++i)
-	qstable[i] = m + 1;
-    for (; x < xe; ++x)
-	qstable[*x] = xe - x;
-    /* Searching */
-    for (; y + m <= ys + n; y += *(qstable + y[m])) {
-	if (*xs == *y && memcmp(xs, y, m) == 0)
-	    return y - ys;
-    }
-    return -1;
-}
-
-static inline unsigned int
-rb_memsearch_qs_utf8_hash(const unsigned char *x)
-{
-    register const unsigned int mix = 8353;
-    register unsigned int h = *x;
-    if (h < 0xC0) {
-	return h + 256;
-    }
-    else if (h < 0xE0) {
-	h *= mix;
-	h += x[1];
-    }
-    else if (h < 0xF0) {
-	h *= mix;
-	h += x[1];
-	h *= mix;
-	h += x[2];
-    }
-    else if (h < 0xF5) {
-	h *= mix;
-	h += x[1];
-	h *= mix;
-	h += x[2];
-	h *= mix;
-	h += x[3];
-    }
-    else {
-	return h + 256;
-    }
-    return (unsigned char)h;
-}
-
-static inline long
-rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n)
-{
-    const unsigned char *x = xs, *xe = xs + m;
-    const unsigned char *y = ys;
-    VALUE i, qstable[512];
-
-    /* Preprocessing */
-    for (i = 0; i < 512; ++i) {
-	qstable[i] = m + 1;
-    }
-    for (; x < xe; ++x) {
-	qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
-    }
-    /* Searching */
-    for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
-	if (*xs == *y && memcmp(xs, y, m) == 0)
-	    return y - ys;
-    }
-    return -1;
-}
-
-long
-rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
-{
-    const unsigned char *x = x0, *y = y0;
-
-    if (m > n) return -1;
-    else if (m == n) {
-	return memcmp(x0, y0, m) == 0 ? 0 : -1;
-    }
-    else if (m < 1) {
-	return 0;
-    }
-    else if (m == 1) {
-	const unsigned char *ys = y, *ye = ys + n;
-	for (; y < ye; ++y) {
-	    if (*x == *y)
-		return y - ys;
-	}
-	return -1;
-    }
-    else if (m <= SIZEOF_VALUE) {
-	return rb_memsearch_ss(x0, m, y0, n);
-    }
-#if !WITH_OBJC
-    else if (enc == rb_utf8_encoding()){
-	return rb_memsearch_qs_utf8(x0, m, y0, n);
-    }
-#endif
-    else {
-	return rb_memsearch_qs(x0, m, y0, n);
-    }
-}
-
-#define REG_LITERAL FL_USER5
-#define REG_ENCODING_NONE FL_USER6
-#define REG_BUSY FL_USER7
-
-#define KCODE_FIXED FL_USER4
-
-#define ARG_REG_OPTION_MASK \
-    (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
-#define ARG_ENCODING_FIXED    16
-#define ARG_ENCODING_NONE     32
-
-static int
-char_to_option(int c)
-{
-    int val;
-
-    switch (c) {
-      case 'i':
-	val = ONIG_OPTION_IGNORECASE;
-	break;
-      case 'x':
-	val = ONIG_OPTION_EXTEND;
-	break;
-      case 'm':
-	val = ONIG_OPTION_MULTILINE;
-	break;
-      default:
-	val = 0;
-	break;
-    }
-    return val;
-}
-
-static char *
-option_to_str(char str[4], int options)
-{
-    char *p = str;
-    if (options & ONIG_OPTION_MULTILINE) *p++ = 'm';
-    if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i';
-    if (options & ONIG_OPTION_EXTEND) *p++ = 'x';
-    *p = 0;
-    return str;
-}
-
-extern int
-rb_char_to_option_kcode(int c, int *option, int *kcode)
-{
-    *option = 0;
-
-    switch (c) {
-#if !WITH_OBJC
-      case 'e':
-	*kcode = rb_enc_find_index("EUC-JP");
-	break;
-      case 's':
-	*kcode = rb_enc_find_index("Windows-31J");
-	break;
-      case 'u':
-	*kcode = rb_enc_find_index("UTF-8");
-	break;
-#else
-      case 'e':
-      case 's':
-      case 'u':
-#endif
-      case 'n':
-        *kcode = -1;
-        return (*option = ARG_ENCODING_NONE);
-      default:
-	*kcode = -1;
-	return (*option = char_to_option(c));
-    }
-    *option = ARG_ENCODING_FIXED;
-    return 1;
-}
-
-static void
-rb_reg_check(VALUE re)
-{
-    if (!RREGEXP(re)->ptr || !RREGEXP(re)->str) {
-	rb_raise(rb_eTypeError, "uninitialized Regexp");
-    }
-}
-
-static void
-rb_reg_expr_str(VALUE str, const char *s, long len)
-{
-#if !WITH_OBJC
-    rb_encoding *enc = rb_enc_get(str);
-#endif
-    const char *p, *pend;
-    int need_escape = 0;
-    int c, clen;
-
-    p = s; pend = p + len;
-    while (p<pend) {
-#if WITH_OBJC
-	c = *p;
-	clen = 1;
-	if (0) {}
-#else
-        c = rb_enc_ascget(p, pend, &clen, enc);
-        if (c == -1) {
-            p += mbclen(p, pend, enc);
-        }
-#endif
-        else if (c != '/' && rb_enc_isprint(c, enc)) {
-            p += clen;
-        }
-        else {
-	    need_escape = 1;
-	    break;
-        }
-    }
-    if (!need_escape) {
-	rb_str_buf_cat(str, s, len);
-    }
-    else {
-	p = s;
-	while (p<pend) {
-#if WITH_OBJC
-	    c = *p;
-	    clen = 1;
-#else
-            c = rb_enc_ascget(p, pend, &clen, enc);
-#endif
-	    if (c == '\\' && p+clen < pend) {
-#if WITH_OBJC
-		int n = clen + (pend - (p+clen));
-#else
-		int n = clen + mbclen(p+clen, pend, enc);
-#endif
-		rb_str_buf_cat(str, p, n);
-		p += n;
-		continue;
-	    }
-	    else if (c == '/') {
-		char c = '\\';
-		rb_str_buf_cat(str, &c, 1);
-		rb_str_buf_cat(str, p, clen);
-	    }
-#if !WITH_OBJC
-	    else if (c == -1) {
-                int l = mbclen(p, pend, enc);
-	    	rb_str_buf_cat(str, p, l);
-		p += l;
-		continue;
-	    }
-#endif
-	    else if (rb_enc_isprint(c, enc)) {
-		rb_str_buf_cat(str, p, clen);
-	    }
-	    else if (!rb_enc_isspace(c, enc)) {
-		char b[8];
-
-		sprintf(b, "\\x%02X", (unsigned char)c);
-		rb_str_buf_cat(str, b, 4);
-	    }
-	    else {
-		rb_str_buf_cat(str, p, clen);
-	    }
-	    p += clen;
-	}
-    }
-}
-
-static VALUE
-rb_reg_desc(const char *s, long len, VALUE re)
-{
-    VALUE str = rb_str_buf_new2("/");
-
-#if !WITH_OBJC
-    rb_enc_copy(str, re);
-#endif
-    rb_reg_expr_str(str, s, len);
-    rb_str_buf_cat2(str, "/");
-    if (re) {
-	char opts[4];
-	rb_reg_check(re);
-	if (*option_to_str(opts, RREGEXP(re)->ptr->options))
-	    rb_str_buf_cat2(str, opts);
-    }
-    OBJ_INFECT(str, re);
-    return str;
-}
-
-
-/*
- *  call-seq:
- *      rxp.source   => str
- *
- *  Returns the original string of the pattern.
- *
- *      /ab+c/ix.source #=> "ab+c"
- *
- *  Note that escape sequences are retained as is.
- *
- *     /\x20\+/.source  #=> "\\x20\\+"
- *
- */
-
-static VALUE
-rb_reg_source(VALUE re, SEL sel)
-{
-    VALUE str;
-    const char *cstr;
-    long clen;
-
-    rb_reg_check(re);
-    cstr = RREGEXP(re)->str;
-    clen = RREGEXP(re)->len;
-    if (clen == 0) {
-	cstr = NULL;
-    }
-    str = rb_enc_str_new(cstr, clen, rb_enc_get(re));
-    if (OBJ_TAINTED(re)) {
-	OBJ_TAINT(str);
-    }
-    return str;
-}
-
-/*
- * call-seq:
- *    rxp.inspect   => string
- *
- * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly,
- * <code>#inspect</code> actually produces the more natural version of
- * the string than <code>#to_s</code>.
- *
- *      /ab+c/ix.inspect        #=> "/ab+c/ix"
- *
- */
-
-static VALUE
-rb_reg_inspect(VALUE re, SEL sel)
-{
-    rb_reg_check(re);
-    return rb_reg_desc(RREGEXP(re)->str, RREGEXP(re)->len, re);
-}
-
-
-/*
- *  call-seq:
- *     rxp.to_s   => str
- *
- *  Returns a string containing the regular expression and its options (using the
- *  <code>(?opts:source)</code> notation. This string can be fed back in to
- *  <code>Regexp::new</code> to a regular expression with the same semantics as
- *  the original. (However, <code>Regexp#==</code> may not return true when
- *  comparing the two, as the source of the regular expression itself may
- *  differ, as the example shows).  <code>Regexp#inspect</code> produces a
- *  generally more readable version of <i>rxp</i>.
- *
- *      r1 = /ab+c/ix           #=> /ab+c/ix
- *      s1 = r1.to_s            #=> "(?ix-m:ab+c)"
- *      r2 = Regexp.new(s1)     #=> /(?ix-m:ab+c)/
- *      r1 == r2                #=> false
- *      r1.source               #=> "ab+c"
- *      r2.source               #=> "(?ix-m:ab+c)"
- */
-
-static VALUE
-rb_reg_to_s(VALUE re, SEL sel)
-{
-    int options, opt;
-    const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
-    long len;
-    const UChar* ptr;
-    VALUE str = rb_str_buf_new2("(?");
-    char optbuf[5];
-
-    rb_reg_check(re);
-
-#if !WITH_OBJC
-    rb_enc_copy(str, re);
-#endif
-    options = RREGEXP(re)->ptr->options;
-    ptr = (UChar*)RREGEXP(re)->str;
-    len = RREGEXP(re)->len;
-  again:
-    if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
-	int err = 1;
-	ptr += 2;
-	if ((len -= 2) > 0) {
-	    do {
-                opt = char_to_option((int )*ptr);
-                if (opt != 0) {
-                    options |= opt;
-                }
-                else {
-                    break;
-                }
-		++ptr;
-	    } while (--len > 0);
-	}
-	if (len > 1 && *ptr == '-') {
-	    ++ptr;
-	    --len;
-	    do {
-                opt = char_to_option((int )*ptr);
-                if (opt != 0) {
-                    options &= ~opt;
-                }
-                else {
-                    break;
-                }
-		++ptr;
-	    } while (--len > 0);
-	}
-	if (*ptr == ')') {
-	    --len;
-	    ++ptr;
-	    goto again;
-	}
-	if (*ptr == ':' && ptr[len-1] == ')') {
-	    int r;
-	    Regexp *rp;
-	    OnigEncoding oenc;
-	    
-#if WITH_OBJC
-	    oenc = ONIG_ENCODING_ASCII;
-#else
-	    oenc = rb_enc_get(re);
-#endif
-
-            r = onig_alloc_init(&rp, ONIG_OPTION_DEFAULT,
-                                ONIGENC_CASE_FOLD_DEFAULT,
-                                oenc,
-                                OnigDefaultSyntax);
-	    if (r == 0) {
-		 ++ptr;
- 		 len -= 2;
-		 err = (onig_compile(rp, ptr, ptr + len, NULL) != 0);
-	    }
-	    onig_free(rp);
-	}
-	if (err) {
-	    options = RREGEXP(re)->ptr->options;
-	    ptr = (UChar*)RREGEXP(re)->str;
-	    len = RREGEXP(re)->len;
-	}
-    }
-
-    if (*option_to_str(optbuf, options)) rb_str_buf_cat2(str, optbuf);
-
-    if ((options & embeddable) != embeddable) {
-	optbuf[0] = '-';
-	option_to_str(optbuf + 1, ~options);
-	rb_str_buf_cat2(str, optbuf);
-    }
-
-    rb_str_buf_cat2(str, ":");
-    rb_reg_expr_str(str, (char*)ptr, len);
-    rb_str_buf_cat2(str, ")");
-#if !WITH_OBJC
-    rb_enc_copy(str, re);
-#endif
-
-    OBJ_INFECT(str, re);
-    return str;
-}
-
-static void
-rb_reg_raise(const char *s, long len, const char *err, VALUE re)
-{
-    VALUE desc = rb_reg_desc(s, len, re);
-
-    rb_raise(rb_eRegexpError, "%s: %s", err, RSTRING_PTR(desc));
-}
-
-static VALUE
-rb_enc_reg_error_desc(const char *s, long len, rb_encoding *enc, int options, const char *err)
-{
-    char opts[6];
-    VALUE desc = rb_str_buf_new2(err);
-
-#if !WITH_OBJC
-    rb_enc_associate(desc, enc);
-#endif
-    rb_str_buf_cat2(desc, ": /");
-    rb_reg_expr_str(desc, s, len);
-    opts[0] = '/';
-    option_to_str(opts + 1, options);
-    rb_str_buf_cat2(desc, opts);
-    return rb_exc_new3(rb_eRegexpError, desc);
-}
-
-static void
-rb_enc_reg_raise(const char *s, long len, rb_encoding *enc, int options, const char *err)
-{
-    rb_exc_raise(rb_enc_reg_error_desc(s, len, enc, options, err));
-}
-
-static VALUE
-rb_reg_error_desc(VALUE str, int options, const char *err)
-{
-    return rb_enc_reg_error_desc(RSTRING_PTR(str), RSTRING_LEN(str),
-				 rb_enc_get(str), options, err);
-}
-
-static void
-rb_reg_raise_str(VALUE str, int options, const char *err)
-{
-    rb_exc_raise(rb_reg_error_desc(str, options, err));
-}
-
-
-/*
- *  call-seq:
- *     rxp.casefold?   => true or false
- *
- *  Returns the value of the case-insensitive flag.
- *
- *      /a/.casefold?           #=> false
- *      /a/i.casefold?          #=> true
- *      /(?i:a)/.casefold?      #=> false
- */
-
-static VALUE
-rb_reg_casefold_p(VALUE re, SEL sel)
-{
-    rb_reg_check(re);
-    if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return Qtrue;
-    return Qfalse;
-}
-
-
-/*
- *  call-seq:
- *     rxp.options   => fixnum
- *
- *  Returns the set of bits corresponding to the options used when creating this
- *  Regexp (see <code>Regexp::new</code> for details. Note that additional bits
- *  may be set in the returned options: these are used internally by the regular
- *  expression code. These extra bits are ignored if the options are passed to
- *  <code>Regexp::new</code>.
- *
- *     Regexp::IGNORECASE                  #=> 1
- *     Regexp::EXTENDED                    #=> 2
- *     Regexp::MULTILINE                   #=> 4
- *
- *     /cat/.options                       #=> 0
- *     /cat/ix.options                     #=> 3
- *     Regexp.new('cat', true).options     #=> 1
- *     /\xa1\xa2/e.options                 #=> 16
- *
- *     r = /cat/ix
- *     Regexp.new(r.source, r.options)     #=> /cat/ix
- */
-
-static VALUE
-rb_reg_options_m(VALUE re, SEL sel)
-{
-    int options = rb_reg_options(re);
-    return INT2NUM(options);
-}
-
-static int
-reg_names_iter(const OnigUChar *name, const OnigUChar *name_end,
-          int back_num, int *back_refs, OnigRegex regex, void *arg)
-{
-    VALUE ary = (VALUE)arg;
-    rb_ary_push(ary, rb_str_new((const char *)name, name_end-name));
-    return 0;
-}
-
-/*
- * call-seq:
- *    rxp.names   => [name1, name2, ...]
- *
- * Returns a list of names of captures as an array of strings.
- *
- *     /(?<foo>.)(?<bar>.)(?<baz>.)/.names
- *     #=> ["foo", "bar", "baz"]
- *
- *     /(?<foo>.)(?<foo>.)/.names
- *     #=> ["foo"]
- *
- *     /(.)(.)/.names
- *     #=> []
- */
-
-static VALUE
-rb_reg_names(VALUE re, SEL sel)
-{
-    VALUE ary = rb_ary_new();
-    onig_foreach_name(RREGEXP(re)->ptr, reg_names_iter, (void*)ary);
-    return ary;
-}
-
-static int
-reg_named_captures_iter(const OnigUChar *name, const OnigUChar *name_end,
-          int back_num, int *back_refs, OnigRegex regex, void *arg)
-{
-    VALUE hash = (VALUE)arg;
-    VALUE ary = rb_ary_new2(back_num);
-    int i;
-
-    for(i = 0; i < back_num; i++)
-        rb_ary_store(ary, i, INT2NUM(back_refs[i]));
-
-    rb_hash_aset(hash, rb_str_new((const char*)name, name_end-name),ary);
-
-    return 0;
-}
-
-/*
- * call-seq:
- *    rxp.named_captures  => hash
- *
- * Returns a hash representing information about named captures of <i>rxp</i>.
- *
- * A key of the hash is a name of the named captures.
- * A value of the hash is an array which is list of indexes of corresponding
- * named captures.
- * 
- *    /(?<foo>.)(?<bar>.)/.named_captures
- *    #=> {"foo"=>[1], "bar"=>[2]}
- *
- *    /(?<foo>.)(?<foo>.)/.named_captures
- *    #=> {"foo"=>[1, 2]}
- *
- * If there are no named captures, an empty hash is returned.
- *
- *    /(.)(.)/.named_captures
- *    #=> {}
- */
-
-static VALUE
-rb_reg_named_captures(VALUE re, SEL sel)
-{
-    VALUE hash = rb_hash_new();
-    onig_foreach_name(RREGEXP(re)->ptr, reg_named_captures_iter, (void*)hash);
-    return hash;
-}
-
-static Regexp*
-make_regexp(const char *s, long len, rb_encoding *enc, int flags,
-	onig_errmsg_buffer err)
-{
-    Regexp *rp;
-    int r;
-    OnigErrorInfo einfo;
-    OnigEncoding oenc;
-
-    /* Handle escaped characters first. */
-
-    /* Build a copy of the string (in dest) with the
-       escaped characters translated,  and generate the regex
-       from that.
-    */
-
-#if WITH_OBJC
-    oenc = enc == 0 ? ONIG_ENCODING_ASCII : (OnigEncoding)enc;
-#else
-    oenc = enc;
-#endif
-
-    r = onig_alloc_init(&rp, flags, ONIGENC_CASE_FOLD_DEFAULT,
-                        oenc, OnigDefaultSyntax);
-    if (r) {
-	onig_error_code_to_str((UChar*)err, r);
-	return 0;
-    }
-
-    r = onig_compile(rp, (UChar*)s, (UChar*)(s + len), &einfo);
-
-    if (r != 0) {
-	onig_free(rp);
-	(void )onig_error_code_to_str((UChar*)err, r, &einfo);
-	return 0;
-    }
-    return rp;
-}
-
-
-/*
- *  Document-class: MatchData
- *
- *  <code>MatchData</code> is the type of the special variable <code>$~</code>,
- *  and is the type of the object returned by <code>Regexp#match</code> and
- *  <code>Regexp.last_match</code>. It encapsulates all the results of a pattern
- *  match, results normally accessed through the special variables
- *  <code>$&</code>, <code>$'</code>, <code>$`</code>, <code>$1</code>,
- *  <code>$2</code>, and so on.
- *
- */
-
-VALUE rb_cMatch;
-
-static VALUE
-match_alloc(VALUE klass, SEL sel)
-{
-    NEWOBJ(match, struct RMatch);
-    OBJSETUP(match, klass, T_MATCH);
-
-    match->str = 0;
-    match->rmatch = 0;
-    match->regexp = 0;
-    GC_WB(&match->rmatch, ALLOC(struct rmatch));
-    MEMZERO(match->rmatch, struct rmatch, 1);
-
-    return (VALUE)match;
-}
-
-static IMP rb_objc_match_finalize_super = NULL; 
-
-static void
-rb_objc_match_finalize(void *rcv, SEL sel)
-{
-    onig_region_free(RMATCH_REGS(rcv), 0);
-    if (rb_objc_match_finalize_super != NULL) {
-	((void(*)(void *, SEL))rb_objc_match_finalize_super)(rcv, sel);
-    }
-}
-
-typedef struct {
-    int byte_pos;
-    int char_pos;
-} pair_t;
-
-static int
-pair_byte_cmp(const void *pair1, const void *pair2)
-{
-    return ((pair_t*)pair1)->byte_pos - ((pair_t*)pair2)->byte_pos;
-}
-
-static void
-update_char_offset(VALUE match)
-{
-    struct rmatch *rm = RMATCH(match)->rmatch;
-    struct re_registers *regs;
-    int num_regs;
-    int i, num_pos, c;
-    const char *s, *p, *q, *e;
-    rb_encoding *enc;
-    pair_t *pairs;
-
-    if (rm->char_offset_updated)
-        return;
-
-    regs = &rm->regs;
-    num_regs = rm->regs.num_regs;
-
-    if (rm->char_offset_num_allocated < num_regs) {
-        REALLOC_N(rm->char_offset, struct rmatch_offset, num_regs);
-        rm->char_offset_num_allocated = num_regs;
-    }
-
-    enc = rb_enc_get(RMATCH(match)->str);
-    if (rb_enc_mbmaxlen(enc) == 1) {
-        for (i = 0; i < num_regs; i++) {
-            rm->char_offset[i].beg = BEG(i);
-            rm->char_offset[i].end = END(i);
-        }
-        rm->char_offset_updated = 1;
-        return;
-    }
-
-    pairs = ALLOCA_N(pair_t, num_regs*2);
-    num_pos = 0;
-    for (i = 0; i < num_regs; i++) {
-        if (BEG(i) < 0)
-            continue;
-        pairs[num_pos++].byte_pos = BEG(i);
-        pairs[num_pos++].byte_pos = END(i);
-    }
-    qsort(pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
-
-    s = p = RSTRING_PTR(RMATCH(match)->str);
-    e = s + RSTRING_LEN(RMATCH(match)->str);
-    c = 0;
-    for (i = 0; i < num_pos; i++) {
-        q = s + pairs[i].byte_pos;
-#if WITH_OBJC
-	c += q-p;
-#else
-        c += rb_enc_strlen(p, q, enc);
-#endif
-        pairs[i].char_pos = c;
-        p = q;
-    }
-
-    for (i = 0; i < num_regs; i++) {
-        pair_t key, *found;
-        if (BEG(i) < 0) {
-            rm->char_offset[i].beg = -1;
-            rm->char_offset[i].end = -1;
-            continue;
-        }
-
-        key.byte_pos = BEG(i);
-        found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
-        rm->char_offset[i].beg = found->char_pos;
-
-        key.byte_pos = END(i);
-        found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
-        rm->char_offset[i].end = found->char_pos;
-    }
-
-    rm->char_offset_updated = 1;
-}
-
-static void
-match_check(VALUE match)
-{
-    if (!RMATCH(match)->regexp) {
-	rb_raise(rb_eTypeError, "uninitialized Match");
-    }
-}
-
-/* :nodoc: */
-static VALUE
-match_init_copy(VALUE obj, SEL sel, VALUE orig)
-{
-    struct rmatch *rm;
-
-    if (obj == orig) return obj;
-
-    if (!rb_obj_is_instance_of(orig, rb_obj_class(obj))) {
-	rb_raise(rb_eTypeError, "wrong argument class");
-    }
-    RMATCH(obj)->str = RMATCH(orig)->str;
-    RMATCH(obj)->regexp = RMATCH(orig)->regexp;
-
-    rm = RMATCH(obj)->rmatch;
-    onig_region_copy(&rm->regs, RMATCH_REGS(orig));
-
-    if (!RMATCH(orig)->rmatch->char_offset_updated) {
-        rm->char_offset_updated = 0;
-    }
-    else {
-        if (rm->char_offset_num_allocated < rm->regs.num_regs) {
-            REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
-            rm->char_offset_num_allocated = rm->regs.num_regs;
-        }
-        MEMCPY(rm->char_offset, RMATCH(orig)->rmatch->char_offset,
-               struct rmatch_offset, rm->regs.num_regs);
-        rm->char_offset_updated = 1;
-    }
-
-    return obj;
-}
-
-
-/*
- * call-seq:
- *    mtch.regexp   => regexp
- *
- * Returns the regexp.
- *
- *     m = /a.*b/.match("abc")
- *     m.regexp #=> /a.*b/
- */
-
-static VALUE
-match_regexp(VALUE match, SEL sel)
-{
-    match_check(match);
-    return RMATCH(match)->regexp;
-}
-
-/*
- * call-seq:
- *    mtch.names   => [name1, name2, ...]
- *
- * Returns a list of names of captures as an array of strings.
- * It is same as mtch.regexp.names.
- *
- *     /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
- *     #=> ["foo", "bar", "baz"]
- *
- *     m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
- *     m.names                          #=> ["x", "y"]
- */
-
-static VALUE
-match_names(VALUE match, SEL sel)
-{
-    match_check(match);
-    return rb_reg_names(RMATCH(match)->regexp, 0);
-}
-
-/*
- *  call-seq:
- *     mtch.length   => integer
- *     mtch.size     => integer
- *
- *  Returns the number of elements in the match array.
- *
- *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- *     m.length   #=> 5
- *     m.size     #=> 5
- */
-
-static VALUE
-match_size(VALUE match, SEL sel)
-{
-    match_check(match);
-    return INT2FIX(RMATCH_REGS(match)->num_regs);
-}
-
-static int
-match_backref_number(VALUE match, VALUE backref)
-{
-    const char *name;
-    int num;
-
-    struct re_registers *regs = RMATCH_REGS(match);
-    VALUE regexp = RMATCH(match)->regexp;
-
-    match_check(match);
-    switch(TYPE(backref)) {
-      default:
-        return NUM2INT(backref);
-
-      case T_SYMBOL:
-        name = rb_sym2name(backref);
-        break;
-
-      case T_STRING:
-        name = StringValueCStr(backref);
-        break;
-    }
-
-    num = onig_name_to_backref_number(RREGEXP(regexp)->ptr,
-              (const unsigned char*)name,
-              (const unsigned char*)name + strlen(name),
-              regs);
-
-    if (num < 1) {
-        rb_raise(rb_eIndexError, "undefined group name reference: %s", name);
-    }
-
-    return num;
-}
-
-
-/*
- *  call-seq:
- *     mtch.offset(n)   => array
- *
- *  Returns a two-element array containing the beginning and ending offsets of
- *  the <em>n</em>th match.
- *  <em>n</em> can be a string or symbol to reference a named capture.
- *
- *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- *     m.offset(0)      #=> [1, 7]
- *     m.offset(4)      #=> [6, 7]
- *
- *     m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
- *     p m.offset(:foo) #=> [0, 1]
- *     p m.offset(:bar) #=> [2, 3]
- *
- */
-
-static VALUE
-match_offset(VALUE match, SEL sel, VALUE n)
-{
-    int i = match_backref_number(match, n);
-    struct re_registers *regs = RMATCH_REGS(match);
-
-    match_check(match);
-    if (i < 0 || regs->num_regs <= i)
-	rb_raise(rb_eIndexError, "index %d out of matches", i);
-
-    if (BEG(i) < 0)
-	return rb_assoc_new(Qnil, Qnil);
-
-    update_char_offset(match);
-    return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
-			INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
-}
-
-
-/*
- *  call-seq:
- *     mtch.begin(n)   => integer
- *
- *  Returns the offset of the start of the <em>n</em>th element of the match
- *  array in the string.
- *  <em>n</em> can be a string or symbol to reference a named capture.
- *
- *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- *     m.begin(0)       #=> 1
- *     m.begin(2)       #=> 2
- *
- *     m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
- *     p m.begin(:foo)  #=> 0
- *     p m.begin(:bar)  #=> 2
- */
-
-static VALUE
-match_begin(VALUE match, SEL sel, VALUE n)
-{
-    int i = match_backref_number(match, n);
-    struct re_registers *regs = RMATCH_REGS(match);
-
-    match_check(match);
-    if (i < 0 || regs->num_regs <= i)
-	rb_raise(rb_eIndexError, "index %d out of matches", i);
-
-    if (BEG(i) < 0)
-	return Qnil;
-
-    update_char_offset(match);
-    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
-}
-
-
-/*
- *  call-seq:
- *     mtch.end(n)   => integer
- *
- *  Returns the offset of the character immediately following the end of the
- *  <em>n</em>th element of the match array in the string.
- *  <em>n</em> can be a string or symbol to reference a named capture.
- *
- *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- *     m.end(0)         #=> 7
- *     m.end(2)         #=> 3
- *
- *     m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
- *     p m.end(:foo)    #=> 1
- *     p m.end(:bar)    #=> 3
- */
-
-static VALUE
-match_end(VALUE match, SEL sel, VALUE n)
-{
-    int i = match_backref_number(match, n);
-    struct re_registers *regs = RMATCH_REGS(match);
-
-    match_check(match);
-    if (i < 0 || regs->num_regs <= i)
-	rb_raise(rb_eIndexError, "index %d out of matches", i);
-
-    if (BEG(i) < 0)
-	return Qnil;
-
-    update_char_offset(match);
-    return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
-}
-
-#define MATCH_BUSY FL_USER2
-
-void
-rb_match_busy(VALUE match)
-{
-    FL_SET(match, MATCH_BUSY);
-}
-
-/*
- *  call-seq:
- *     rxp.fixed_encoding?   => true or false
- *
- *  Returns false if rxp is applicable to
- *  a string with any ASCII compatible encoding.
- *  Returns true otherwise.
- *
- *      r = /a/
- *      r.fixed_encoding?                               #=> false
- *      r =~ "\u{6666} a"                               #=> 2
- *      r =~ "\xa1\xa2 a".force_encoding("euc-jp")      #=> 2
- *      r =~ "abc".force_encoding("euc-jp")             #=> 0
- *
- *      r = /a/u
- *      r.fixed_encoding?                               #=> true
- *      r.encoding                                      #=> #<Encoding:UTF-8>
- *      r =~ "\u{6666} a"                               #=> 2
- *      r =~ "\xa1\xa2".force_encoding("euc-jp")        #=> ArgumentError
- *      r =~ "abc".force_encoding("euc-jp")             #=> 0
- *
- *      r = /\u{6666}/
- *      r.fixed_encoding?                               #=> true
- *      r.encoding                                      #=> #<Encoding:UTF-8>
- *      r =~ "\u{6666} a"                               #=> 0
- *      r =~ "\xa1\xa2".force_encoding("euc-jp")        #=> ArgumentError
- *      r =~ "abc".force_encoding("euc-jp")             #=> nil
- */
-
-static VALUE
-rb_reg_fixed_encoding_p(VALUE re, SEL sel)
-{
-    if (FL_TEST(re, KCODE_FIXED))
-        return Qtrue;
-    else
-        return Qfalse;
-}
-
-static VALUE
-rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc,
-        rb_encoding **fixed_enc, onig_errmsg_buffer err);
-
-#if !WITH_OBJC
-static void
-reg_enc_error(VALUE re, VALUE str)
-{
-    rb_raise(rb_eArgError,
-	     "incompatible encoding regexp match (%s regexp with %s string)",
-	     rb_enc_name(rb_enc_get(re)),
-	     rb_enc_name(rb_enc_get(str)));
-}
-#endif
-
-static void
-get_cstring(VALUE str, CFStringEncoding enc, char **pcstr, size_t *pcharsize,
-	bool *should_free)
-{
-    if (pcstr != NULL && pcharsize != NULL && should_free != NULL) {
-	char *p = (char *)CFStringGetCStringPtr((CFStringRef)str, enc);
-	if (p != NULL) {
-	    *should_free = false;
-	}
-	else {
-	    const size_t s = CFStringGetMaximumSizeForEncoding(
-		    CFStringGetLength((CFStringRef)str), enc);
-	    p = (char *)malloc(s + 1);
-	    assert(CFStringGetCString((CFStringRef)str, p, s + 1, enc));
-	    *should_free = true;
-	}
-	*pcstr = p;
-	*pcharsize = sizeof(char);
-    }
-}
-
-static void
-get_unistring(VALUE str, CFStringEncoding enc, char **pcstr, size_t *pcharsize,
-	bool *should_free)
-{
-    if (pcstr != NULL && pcharsize != NULL && should_free != NULL) {
-	UniChar *p = (UniChar *)CFStringGetCharactersPtr((CFStringRef)str);
-	const size_t str_len = CFStringGetLength((CFStringRef)str);
-	if (p != NULL) {
-	    *should_free = false;
-	}
-	else {
-	    const size_t s = CFStringGetMaximumSizeForEncoding(
-		    str_len, enc);
-	    p = (UniChar *)malloc(s);
-	    CFStringGetCharacters((CFStringRef)str,
-		    CFRangeMake(0, str_len),
-		    p);
-	    *should_free = true;
-	}
-	*pcstr = (char *)p;
-	*pcharsize = sizeof(UniChar);
-    }
-}
-
-static inline bool
-multibyte_encoding(rb_encoding *enc)
-{
-    return enc == (rb_encoding *)ONIG_ENCODING_UTF16_BE
-	|| enc == (rb_encoding *)ONIG_ENCODING_UTF16_LE
-	|| enc == (rb_encoding *)ONIG_ENCODING_UTF32_BE
-	|| enc == (rb_encoding *)ONIG_ENCODING_UTF32_LE;
-}
-
-static rb_encoding*
-rb_reg_prepare_enc(VALUE re, VALUE str, char **pcstr, size_t *pcharsize,
-	bool *should_free)
-{
-    if (*(VALUE *)str == rb_cByteString) {
-	*pcstr = (char *)rb_bytestring_byte_pointer(str);
-	*pcharsize = 1;
-	*should_free = false;
-	return (rb_encoding *)ONIG_ENCODING_ASCII;
-    }
-    CFStringEncoding enc = CFStringGetSmallestEncoding((CFStringRef)str);
-    switch (enc) {
-	default:
-	    // The user probably has the __CF_USER_TEXT_ENCODING environment
-	    // variable set to some exotic encoding, let's assume it's a
-	    // 8 bits one & fall through.
-	case kCFStringEncodingMacRoman:
-	case kCFStringEncodingWindowsLatin1:
-	case kCFStringEncodingISOLatin1:
-	case kCFStringEncodingNextStepLatin:
-	case kCFStringEncodingASCII:
-	case kCFStringEncodingNonLossyASCII:
-	    get_cstring(str, enc, pcstr, pcharsize, should_free);
-	    return (rb_encoding *)ONIG_ENCODING_ASCII;
-
-	case kCFStringEncodingUTF8:
-	case kCFStringEncodingUTF16:
-	case kCFStringEncodingUTF16BE:
-	case kCFStringEncodingUTF16LE:
-	case kCFStringEncodingUTF32:
-	case kCFStringEncodingUTF32BE:
-	case kCFStringEncodingUTF32LE:
-	    get_unistring(str, enc, pcstr, pcharsize, should_free);
-	    return (rb_encoding *)ONIG_ENCODING_UTF16_LE;
-    }
-
-#if 0
-    // Never reached.
-    CFStringRef enc_name = CFStringConvertEncodingToIANACharSetName(enc);
-    rb_raise(rb_eArgError,
-	    "given string `%s' has unrecognized encoding `%s' (%ld)",
-	    RSTRING_PTR(rb_inspect(str)),
-	    enc_name == NULL ? "unknown" : RSTRING_PTR(enc_name),
-	    (long)enc);
-#endif
-#if 0
-    rb_encoding *enc = 0;
-
-#if !WITH_OBJC
-    if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) {
-        rb_raise(rb_eArgError,
-            "broken %s string",
-            rb_enc_name(rb_enc_get(str)));
-    }
-#endif
-
-    rb_reg_check(re);
-    enc = rb_enc_get(str);
-#if !WITH_OBJC
-    if (!rb_enc_str_asciicompat_p(str)) {
-        if (RREGEXP(re)->ptr->enc != enc) {
-	    reg_enc_error(re, str);
-	}
-    }
-    else if (rb_reg_fixed_encoding_p(re, 0)) {
-        if (RREGEXP(re)->ptr->enc != enc &&
-	    (!rb_enc_asciicompat(RREGEXP(re)->ptr->enc) ||
-	     rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT)) {
-	    reg_enc_error(re, str);
-	}
-	enc = RREGEXP(re)->ptr->enc;
-    }
-    if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) &&
-	enc != rb_ascii8bit_encoding() &&
-	rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
-	rb_warn("regexp match /.../n against to %s string",
-		rb_enc_name(enc));
-    }
-#endif
-    return enc;
-#endif
-}
-
-static regex_t *
-rb_reg_prepare_re(VALUE re, VALUE str, char **pcstr, size_t *pcharsize,
-	bool *should_free)
-{
-    regex_t *reg = RREGEXP(re)->ptr;
-    onig_errmsg_buffer err = "";
-    int r;
-    OnigErrorInfo einfo;
-    const char *pattern;
-    VALUE unescaped;
-    rb_encoding *fixed_enc = 0;
-    rb_encoding *enc = rb_reg_prepare_enc(re, str, pcstr, pcharsize,
-	    should_free);
-
-    if ((rb_encoding *)reg->enc == enc) {
-	return reg;
-    }
-
-    rb_reg_check(re);
-    reg = RREGEXP(re)->ptr;
-    pattern = RREGEXP(re)->str;
-
-    unescaped = rb_reg_preprocess(pattern, pattern + RREGEXP(re)->len, enc,
-	&fixed_enc, err);
-
-    if (unescaped == Qnil) {
-	rb_raise(rb_eArgError, "regexp preprocess failed: %s", err);
-    }
-
-    UChar *begin, *end;
-    if (multibyte_encoding(enc)) {
-	UniChar *chars = (UniChar *)CFStringGetCharactersPtr(
-		(CFStringRef)unescaped);
-	const long len = RSTRING_LEN(unescaped);
-	if (chars == NULL) {
-	    chars = (UniChar *)alloca(sizeof(UniChar) * len);
-	    CFStringGetCharacters((CFStringRef)unescaped,
-		    CFRangeMake(0, len), chars);
-	}
-	begin = (UChar *)chars;
-	end = (UChar *)chars + (sizeof(UniChar) * len);
-    }
-    else {
-	begin = (UChar *)RSTRING_PTR(unescaped);
-	end = begin + RSTRING_LEN(unescaped);
-    }
-
-    r = onig_new(&reg, begin, end, reg->options, (OnigEncoding)enc,
-	    OnigDefaultSyntax, &einfo);
-    if (r != 0) {
-	onig_error_code_to_str((UChar*)err, r, &einfo);
-	rb_reg_raise(pattern, RREGEXP(re)->len, err, re);
-    }
-
-    return reg;
-}
-
-int
-rb_reg_adjust_startpos(VALUE re, VALUE str, int pos, int reverse)
-{
-    int range;
-    rb_encoding *enc;
-#if !WITH_OBJC
-    UChar *p, *string;
-#endif
-
-    enc = rb_reg_prepare_enc(re, str, NULL, NULL, NULL);
-
-    if (reverse) {
-	range = -pos;
-    }
-    else {
-	range = RSTRING_LEN(str) - pos;
-    }
-
-#if !WITH_OBJC
-    if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING_LEN(str)) {
-	 string = (UChar*)RSTRING_PTR(str);
-
-	 if (range > 0) {
-	      p = onigenc_get_right_adjust_char_head(enc, string, string + pos);
-	 }
-	 else {
-	      p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, string, string + pos);
-	 }
-	 return p - string;
-    }
-#endif
-
-    return pos;
-}
-
-int
-rb_reg_search2(VALUE re, VALUE str, int pos, int reverse, bool need_match_str)
-{
-    regex_t *reg0 = RREGEXP(re)->ptr;
-    int busy = FL_TEST(re, REG_BUSY);
-
-    static struct re_registers *regs = NULL;
-    if (regs == NULL) {
-	regs = xmalloc(sizeof(struct re_registers));
-	rb_objc_root(&regs);
-    }
-    struct re_registers *pregs = regs;
-
-    const size_t clen = RSTRING_LEN(str);
-    if (pos > clen || pos < 0) {
-	rb_backref_set(Qnil);
-	return -1;
-    }
-
-    char *cstr = NULL;
-    size_t charsize = 0;
-    bool should_free = false;
-    regex_t *reg = rb_reg_prepare_re(re, str, &cstr, &charsize, &should_free);
-
-    char *range = cstr;
-    FL_SET(re, REG_BUSY);
-    if (!reverse) {
-	range += (clen * charsize);
-    }
-    MEMZERO(pregs, struct re_registers, 1);
-    int result = onig_search(reg,
-	    (UChar*)cstr,
-	    ((UChar*)cstr + (clen * charsize)),
-	    ((UChar*)cstr + (pos * charsize)),
-	    ((UChar*)range),
-	    pregs, ONIG_OPTION_NONE);
-
-    if (should_free) {
-	free(cstr);
-	cstr = NULL;
-    }
-
-    if (RREGEXP(re)->ptr != reg) {
-	if (busy) {
-	    onig_free(reg);
-	}
-	else {
-	    onig_free(reg0);
-	    RREGEXP(re)->ptr = reg;
-	}
-    }
-    if (!busy) {
-	FL_UNSET(re, REG_BUSY);
-    }
-    if (result < 0) {
-	onig_region_free(pregs, 0);
-	if (result == ONIG_MISMATCH) {
-	    rb_backref_set(Qnil);
-	    return result;
-	}
-	else {
-	    onig_errmsg_buffer err = "";
-	    onig_error_code_to_str((UChar*)err, result);
-	    rb_reg_raise(RREGEXP(re)->str, RREGEXP(re)->len, err, 0);
-	}
-    }
-
-    if (charsize > 1) {
-	int i;
-	for (i = 0; i < pregs->num_regs; i++) {
-	    if (pregs->beg[i] > 0) {
-		assert((pregs->beg[i] % charsize) == 0);
-		pregs->beg[i] /= charsize;
-	    }
-	    if (pregs->end[i] > 0) {
-		assert((pregs->end[i] % charsize) == 0);
-		pregs->end[i] /= charsize;
-	    }
-	}
-	assert((result % charsize) == 0);
-	result /= charsize;
-    }
-
-    VALUE match = rb_backref_get();
-    if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
-	match = match_alloc(rb_cMatch, 0);
-	rb_backref_set(match);
-    }
-    else {
-	if (rb_safe_level() >= 3) {
-	    OBJ_TAINT(match);
-	}
-	else {
-	    FL_UNSET(match, FL_TAINT);
-	}
-    }
-
-    onig_region_copy(RMATCH_REGS(match), pregs);
-    onig_region_free(pregs, 0);
-    if (need_match_str) {
-	if (RMATCH(match)->str == 0
-		|| !CFEqual((CFTypeRef)RMATCH(match)->str, (CFTypeRef)str)) {
-	    GC_WB(&RMATCH(match)->str, rb_str_new4(str));
-	}
-    }
-    else {
-	RMATCH(match)->str = 0;
-    }
-    if (RMATCH(match)->regexp != re) {
-	GC_WB(&RMATCH(match)->regexp, re);
-    }
-    RMATCH(match)->rmatch->char_offset_updated = 0;
-
-    OBJ_INFECT(match, re);
-    OBJ_INFECT(match, str);
-
-    return result;
-}
-
-int
-rb_reg_search(VALUE re, VALUE str, int pos, int reverse)
-{
-    return rb_reg_search2(re, str, pos, reverse, true);
-}
-
-VALUE
-rb_reg_nth_defined(int nth, VALUE match)
-{
-    struct re_registers *regs;
-    if (NIL_P(match)) return Qnil;
-    match_check(match);
-    regs = RMATCH_REGS(match);
-    if (nth >= regs->num_regs) {
-	return Qnil;
-    }
-    if (nth < 0) {
-	nth += regs->num_regs;
-	if (nth <= 0) return Qnil;
-    }
-    if (BEG(nth) == -1) return Qfalse;
-    return Qtrue;
-}
-
-VALUE
-rb_reg_nth_match(int nth, VALUE match)
-{
-    VALUE str;
-    long start, end, len;
-    struct re_registers *regs;
-
-    if (NIL_P(match)) {
-	return Qnil;
-    }
-    match_check(match);
-    regs = RMATCH_REGS(match);
-    if (nth >= regs->num_regs) {
-	return Qnil;
-    }
-    if (nth < 0) {
-	nth += regs->num_regs;
-	if (nth <= 0) {
-	    return Qnil;
-	}
-    }
-    start = BEG(nth);
-    if (start == -1) {
-	return Qnil;
-    }
-    end = END(nth);
-    len = end - start;
-    str = rb_str_subseq(RMATCH(match)->str, start, len);
-    return str;
-}
-
-VALUE
-rb_reg_last_match(VALUE match)
-{
-    return rb_reg_nth_match(0, match);
-}
-
-
-/*
- *  call-seq:
- *     mtch.pre_match   => str
- *
- *  Returns the portion of the original string before the current match.
- *  Equivalent to the special variable <code>$`</code>.
- *
- *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- *     m.pre_match   #=> "T"
- */
-
-VALUE
-rb_reg_match_pre(VALUE match, SEL sel)
-{
-    VALUE str;
-    struct re_registers *regs;
-
-    if (NIL_P(match)) return Qnil;
-    match_check(match);
-    regs = RMATCH_REGS(match);
-    if (BEG(0) == -1) return Qnil;
-    str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
-    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
-    return str;
-}
-
-
-/*
- *  call-seq:
- *     mtch.post_match   => str
- *
- *  Returns the portion of the original string after the current match.
- *  Equivalent to the special variable <code>$'</code>.
- *
- *     m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
- *     m.post_match   #=> ": The Movie"
- */
-
-VALUE
-rb_reg_match_post(VALUE match, SEL sel)
-{
-    VALUE str;
-    long pos;
-    struct re_registers *regs;
-
-    if (NIL_P(match)) return Qnil;
-    match_check(match);
-    regs = RMATCH_REGS(match);
-    if (BEG(0) == -1) return Qnil;
-    str = RMATCH(match)->str;
-    pos = END(0);
-    str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
-    if (OBJ_TAINTED(match)) OBJ_TAINT(str);
-    return str;
-}
-
-VALUE
-rb_reg_match_last(VALUE match)
-{
-    int i;
-    struct re_registers *regs;
-
-    if (NIL_P(match)) return Qnil;
-    match_check(match);
-    regs = RMATCH_REGS(match);
-    if (BEG(0) == -1) return Qnil;
-
-    for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--)
-	;
-    if (i == 0) return Qnil;
-    return rb_reg_nth_match(i, match);
-}
-
-static VALUE
-last_match_getter(void)
-{
-    return rb_reg_last_match(rb_backref_get());
-}
-
-static VALUE
-prematch_getter(void)
-{
-    return rb_reg_match_pre(rb_backref_get(), 0);
-}
-
-static VALUE
-postmatch_getter(void)
-{
-    return rb_reg_match_post(rb_backref_get(), 0);
-}
-
-static VALUE
-last_paren_match_getter(void)
-{
-    return rb_reg_match_last(rb_backref_get());
-}
-
-static VALUE
-match_array(VALUE match, int start)
-{
-    struct re_registers *regs = RMATCH_REGS(match);
-    VALUE ary = rb_ary_new2(regs->num_regs);
-    VALUE target = RMATCH(match)->str;
-    int i;
-    int taint = OBJ_TAINTED(match);
-
-    match_check(match);
-    for (i=start; i<regs->num_regs; i++) {
-	if (regs->beg[i] == -1) {
-	    rb_ary_push(ary, Qnil);
-	}
-	else {
-	    VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]);
-	    if (taint) OBJ_TAINT(str);
-	    rb_ary_push(ary, str);
-	}
-    }
-    return ary;
-}
-
-
-/* [MG]:FIXME: I put parens around the /.../.match() in the first line of the
-   second example to prevent the '*' followed by a '/' from ending the
-   comment. */
-
-/*
- *  call-seq:
- *     mtch.to_a   => anArray
- *
- *  Returns the array of matches.
- *
- *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- *     m.to_a   #=> ["HX1138", "H", "X", "113", "8"]
- *
- *  Because <code>to_a</code> is called when expanding
- *  <code>*</code><em>variable</em>, there's a useful assignment
- *  shortcut for extracting matched fields. This is slightly slower than
- *  accessing the fields directly (as an intermediate array is
- *  generated).
- *
- *     all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
- *     all   #=> "HX1138"
- *     f1    #=> "H"
- *     f2    #=> "X"
- *     f3    #=> "113"
- */
-
-static VALUE
-match_to_a(VALUE match, SEL sel)
-{
-    return match_array(match, 0);
-}
-
-
-/*
- *  call-seq:
- *     mtch.captures   => array
- *
- *  Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>.
- *
- *     f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
- *     f1    #=> "H"
- *     f2    #=> "X"
- *     f3    #=> "113"
- *     f4    #=> "8"
- */
-static VALUE
-match_captures(VALUE match, SEL sel)
-{
-    return match_array(match, 1);
-}
-
-static int
-name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end)
-{
-    int num;
-
-    num = onig_name_to_backref_number(RREGEXP(regexp)->ptr,
-	(const unsigned char* )name, (const unsigned char* )name_end, regs);
-    if (num >= 1) {
-	return num;
-    }
-    else {
-	VALUE s = rb_str_new(name, (long )(name_end - name));
-	rb_raise(rb_eIndexError, "undefined group name reference: %s",
-				 StringValuePtr(s));
-    }
-}
-
-/*
- *  call-seq:
- *     mtch[i]               => str or nil
- *     mtch[start, length]   => array
- *     mtch[range]           => array
- *     mtch[name]            => str or nil
- *
- *  Match Reference---<code>MatchData</code> acts as an array, and may be
- *  accessed using the normal array indexing techniques.  <i>mtch</i>[0] is
- *  equivalent to the special variable <code>$&</code>, and returns the entire
- *  matched string.  <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
- *  of the matched backreferences (portions of the pattern between parentheses).
- *
- *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- *     m          #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
- *     m[0]       #=> "HX1138"
- *     m[1, 2]    #=> ["H", "X"]
- *     m[1..3]    #=> ["H", "X", "113"]
- *     m[-3, 2]   #=> ["X", "113"]
- *
- *     m = /(?<foo>a+)b/.match("ccaaab")
- *     m          #=> #<MatchData "aaab" foo:"aaa">
- *     m["foo"]   #=> "aaa"
- *     m[:foo]    #=> "aaa"
- */
-
-static VALUE
-match_aref(VALUE match, SEL sel, int argc, VALUE *argv)
-{
-    VALUE idx, rest;
-
-    match_check(match);
-    rb_scan_args(argc, argv, "11", &idx, &rest);
-
-    if (NIL_P(rest)) {
-      if (FIXNUM_P(idx)) {
-        if (FIX2INT(idx) >= 0) {
-          return rb_reg_nth_match(FIX2INT(idx), match);
-        }
-      }
-      else {
-        const char *p;
-        int num;
-
-        switch (TYPE(idx)) {
-          case T_SYMBOL:
-            p = rb_sym2name(idx);
-            goto name_to_backref;
-            break;
-          case T_STRING:
-            p = StringValuePtr(idx);
-
-          name_to_backref:
-            num = name_to_backref_number(RMATCH_REGS(match),
-                       RMATCH(match)->regexp, p, p + strlen(p));
-            return rb_reg_nth_match(num, match);
-            break;
-
-          default:
-            break;
-        }
-      }
-    }
-
-    return rb_ary_aref(match_to_a(match, 0), 0, argc, argv);
-}
-
-static VALUE
-match_entry(VALUE match, long n)
-{
-    return rb_reg_nth_match(n, match);
-}
-
-
-/*
- *  call-seq:
- *
- *     mtch.values_at([index]*)   => array
- *
- *  Uses each <i>index</i> to access the matching values, returning an array of
- *  the corresponding matches.
- *
- *     m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
- *     m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
- *     m.values_at(0, 2, -2)   #=> ["HX1138", "X", "113"]
- */
-
-static VALUE
-match_values_at(VALUE match, SEL sel, int argc, VALUE *argv)
-{
-    struct re_registers *regs = RMATCH_REGS(match);
-    match_check(match);
-    return rb_get_values_at(match, regs->num_regs, argc, argv, match_entry);
-}
-
-/*
- *  call-seq:
- *     mtch.to_s   => str
- *
- *  Returns the entire matched string.
- *
- *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- *     m.to_s   #=> "HX1138"
- */
-
-static VALUE
-match_to_s(VALUE match, SEL sel)
-{
-    VALUE str = rb_reg_last_match(match);
-
-    match_check(match);
-    if (NIL_P(str)) {
-	str = rb_str_new(0,0);
-    }
-    if (OBJ_TAINTED(match)) {
-	OBJ_TAINT(str);
-    }
-    else if (OBJ_TAINTED(RMATCH(match)->str)) {
-	OBJ_TAINT(str);
-    }
-    return str;
-}
-
-
-/*
- *  call-seq:
- *     mtch.string   => str
- *
- *  Returns a frozen copy of the string passed in to <code>match</code>.
- *
- *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- *     m.string   #=> "THX1138."
- */
-
-static VALUE
-match_string(VALUE match, SEL sel)
-{
-    match_check(match);
-    return RMATCH(match)->str;	/* str is frozen */
-}
-
-struct backref_name_tag {
-    const UChar *name;
-    long len;
-};
-
-static int
-match_inspect_name_iter(const OnigUChar *name, const OnigUChar *name_end,
-          int back_num, int *back_refs, OnigRegex regex, void *arg0)
-{
-    struct backref_name_tag *arg = (struct backref_name_tag *)arg0;
-    int i;
-
-    for (i = 0; i < back_num; i++) {
-        arg[back_refs[i]].name = name;
-        arg[back_refs[i]].len = name_end - name;
-    }
-    return 0;
-}
-
-/*
- * call-seq:
- *    mtch.inspect   => str
- *
- * Returns a printable version of <i>mtch</i>.
- *
- *     puts /.$/.match("foo").inspect
- *     #=> #<MatchData "o">
- *
- *     puts /(.)(.)(.)/.match("foo").inspect
- *     #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">
- *
- *     puts /(.)(.)?(.)/.match("fo").inspect
- *     #=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
- *
- *     puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
- *     #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
- *
- */
-
-VALUE rb_str_inspect(VALUE, SEL);
-
-static VALUE
-match_inspect(VALUE match, SEL sel)
-{
-    const char *cname = rb_obj_classname(match);
-    VALUE str;
-    int i;
-    struct re_registers *regs = RMATCH_REGS(match);
-    int num_regs = regs->num_regs;
-    struct backref_name_tag *names;
-    VALUE regexp = RMATCH(match)->regexp;
-
-    if (regexp == 0) {
-        return rb_sprintf("#<%s:%p>", cname, (void*)match);
-    }
-
-    names = ALLOCA_N(struct backref_name_tag, num_regs);
-    MEMZERO(names, struct backref_name_tag, num_regs);
-
-    onig_foreach_name(RREGEXP(regexp)->ptr,
-            match_inspect_name_iter, names);
-
-    str = rb_str_buf_new2("#<");
-    rb_str_buf_cat2(str, cname);
-
-    for (i = 0; i < num_regs; i++) {
-        VALUE v;
-        rb_str_buf_cat2(str, " ");
-        if (i > 0) {
-            if (names[i].name) { 
-                rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
-	    }
-            else {
-                char buf[sizeof(i)*3+1];
-                snprintf(buf, sizeof(buf), "%d", i);
-                rb_str_buf_cat2(str, buf);
-            }
-            rb_str_buf_cat2(str, ":");
-        }
-        v = rb_reg_nth_match(i, match);
-        if (v == Qnil) {
-            rb_str_buf_cat2(str, "nil");
-	}
-        else {
-            rb_str_buf_append(str, rb_str_inspect(v, 0));
-	}
-    }
-    rb_str_buf_cat2(str, ">");
-
-    return str;
-}
-
-VALUE rb_cRegexp;
-
-static int
-read_escaped_byte(const char **pp, const char *end, onig_errmsg_buffer err)
-{
-    const char *p = *pp;
-    int code;
-    int meta_prefix = 0, ctrl_prefix = 0;
-    int len;
-    int retbyte;
-
-    retbyte = -1;
-    if (p == end || *p++ != '\\') {
-        strcpy(err, "too short escaped multibyte character");
-        return -1;
-    }
-
-again:
-    if (p == end) {
-        strcpy(err, "too short escape sequence");
-        return -1;
-    }
-    switch (*p++) {
-      case '\\': code = '\\'; break;
-      case 'n': code = '\n'; break;
-      case 't': code = '\t'; break;
-      case 'r': code = '\r'; break;
-      case 'f': code = '\f'; break;
-      case 'v': code = '\013'; break;
-      case 'a': code = '\007'; break;
-      case 'e': code = '\033'; break;
-
-      /* \OOO */
-      case '0': case '1': case '2': case '3':
-      case '4': case '5': case '6': case '7':
-        p--;
-        code = ruby_scan_oct(p, end < p+3 ? end-p : 3, &len);
-        p += len;
-        break;
-
-      case 'x': /* \xHH */
-        code = ruby_scan_hex(p, end < p+2 ? end-p : 2, &len);
-        if (len < 1) {
-            strcpy(err, "invalid hex escape");
-            return -1;
-        }
-        p += len;
-        break;
-
-      case 'M': /* \M-X, \M-\C-X, \M-\cX */
-        if (meta_prefix) {
-            strcpy(err, "duplicate meta escape");
-            return -1;
-        }
-        meta_prefix = 1;
-        if (p+1 < end && *p++ == '-' && (*p & 0x80) == 0) {
-            if (*p == '\\') {
-                p++;
-                goto again;
-            }
-            else {
-                code = *p++;
-                break;
-            }
-        }
-        strcpy(err, "too short meta escape");
-        return -1;
-
-      case 'C': /* \C-X, \C-\M-X */
-        if (p == end || *p++ != '-') {
-            strcpy(err, "too short control escape");
-            return -1;
-        }
-      case 'c': /* \cX, \c\M-X */
-        if (ctrl_prefix) {
-            strcpy(err, "duplicate control escape");
-            return -1;
-        }
-        ctrl_prefix = 1;
-        if (p < end && (*p & 0x80) == 0) {
-            if (*p == '\\') {
-                p++;
-                goto again;
-            }
-            else {
-                code = *p++;
-                break;
-            }
-        }
-        strcpy(err, "too short control escape");
-        return -1;
-
-      default:
-        strcpy(err, "unexpected escape sequence");
-        return -1;
-    }
-    if (code < 0 || 0xff < code) {
-        strcpy(err, "invalid escape code");
-        return -1;
-    }
-
-    if (ctrl_prefix)
-        code &= 0x1f;
-    if (meta_prefix)
-        code |= 0x80;
-
-    *pp = p;
-    return code;
-}
-
-__attribute__((noinline))
-static int
-unescape_escaped_nonascii(const char **pp, const char *end, rb_encoding *enc,
-        VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
-{
-    const char *p = *pp;
-    int chmaxlen = rb_enc_mbmaxlen(enc);
-    char chbuf[5];
-    int chlen = 0;
-    int byte;
-#if !WITH_OBJC
-    int l;
-#endif
-
-    assert(chmaxlen < sizeof(chbuf));
-    memset(chbuf, 0, sizeof(chbuf));
-
-    byte = read_escaped_byte(&p, end, err);
-    if (byte == -1) {
-        return -1;
-    }
-
-    chbuf[chlen++] = byte;
-    while (chlen < chmaxlen 
-#if WITH_OBJC
-	&& 1) {
-#else
-	&& MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
-#endif
-        byte = read_escaped_byte(&p, end, err);
-        if (byte == -1) {
-            return -1;
-        }
-        chbuf[chlen++] = byte;
-    }
-
-#if !WITH_OBJC
-    l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
-    if (MBCLEN_INVALID_P(l)) {
-        strcpy(err, "invalid multibyte escape");
-        return -1;
-    }
-#endif
-    if (1 < chlen || (chbuf[0] & 0x80)) {
-        rb_str_buf_cat(buf, chbuf, chlen);
-
-        if (*encp == 0)
-            *encp = enc;
-        else if (*encp != enc) {
-            strcpy(err, "escaped non ASCII character in UTF-8 regexp");
-            return -1;
-        }
-    }
-    else {
-        char escbuf[5];
-        snprintf(escbuf, sizeof(escbuf), "\\x%02X", chbuf[0]&0xff);
-        rb_str_buf_cat(buf, escbuf, 4);
-    }
-    *pp = p;
-    return 0;
-}
-
-static int
-check_unicode_range(unsigned long code, onig_errmsg_buffer err)
-{
-    if ((0xd800 <= code && code <= 0xdfff) || /* Surrogates */
-        0x10ffff < code) {
-        strcpy(err, "invalid Unicode range");
-        return -1;
-    }
-    return 0;
-}
-
-static int
-append_utf8(unsigned long uv,
-        VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
-{
-    if (check_unicode_range(uv, err) != 0)
-        return -1;
-    if (uv < 0x80) {
-        char escbuf[5];
-        snprintf(escbuf, sizeof(escbuf), "\\x%02X", (int)uv);
-        rb_str_buf_cat(buf, escbuf, 4);
-    }
-    else {
-        int len;
-        char utf8buf[6];
-        len = rb_uv_to_utf8(utf8buf, uv);
-        rb_str_buf_cat(buf, utf8buf, len);
-
-#if !WITH_OBJC
-        if (*encp == 0)
-            *encp = rb_utf8_encoding();
-        else if (*encp != rb_utf8_encoding()) {
-            strcpy(err, "UTF-8 character in non UTF-8 regexp");
-            return -1;
-        }
-#endif
-    }
-    return 0;
-}
-
-static int
-unescape_unicode_list(const char **pp, const char *end,
-        VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
-{
-    const char *p = *pp;
-    int has_unicode = 0;
-    unsigned long code;
-    int len;
-
-    while (p < end && ISSPACE(*p)) p++;
-
-    while (1) {
-        code = ruby_scan_hex(p, end-p, &len);
-        if (len == 0)
-            break;
-        if (6 < len) { /* max 10FFFF */
-            strcpy(err, "invalid Unicode range");
-            return -1;
-        }
-        p += len;
-        if (append_utf8(code, buf, encp, err) != 0)
-            return -1;
-        has_unicode = 1;
-
-        while (p < end && ISSPACE(*p)) p++;
-    }
-
-    if (has_unicode == 0) {
-        strcpy(err, "invalid Unicode list");
-        return -1;
-    }
-
-    *pp = p;
-
-    return 0;
-}
-
-static int
-unescape_unicode_bmp(const char **pp, const char *end,
-        VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
-{
-    const char *p = *pp;
-    int len;
-    unsigned long code;
-
-    if (end < p+4) {
-        strcpy(err, "invalid Unicode escape");
-        return -1;
-    }
-    code = ruby_scan_hex(p, 4, &len);
-    if (len != 4) {
-        strcpy(err, "invalid Unicode escape");
-        return -1;
-    }
-    if (append_utf8(code, buf, encp, err) != 0)
-        return -1;
-    *pp = p + 4;
-    return 0;
-}
-
-static int
-unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
-        VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
-{
-    char c;
-    char smallbuf[2];
-
-    while (p < end) {
-#if WITH_OBJC
-	int chlen = 1;
-#else
-        int chlen = rb_enc_precise_mbclen(p, end, enc);
-        if (!MBCLEN_CHARFOUND_P(chlen)) {
-            strcpy(err, "invalid multibyte character");
-            return -1;
-        }
-        chlen = MBCLEN_CHARFOUND_LEN(chlen);
-#endif
-        if (1 < chlen || (*p & 0x80)) {
-            rb_str_buf_cat(buf, p, chlen);
-            p += chlen;
-            if (*encp == 0)
-                *encp = enc;
-            else if (*encp != enc) {
-                strcpy(err, "non ASCII character in UTF-8 regexp");
-                return -1;
-            }
-            continue;
-        }
-
-        switch (c = *p++) {
-          case '\\':
-            if (p == end) {
-                strcpy(err, "too short escape sequence");
-                return -1;
-            }
-            switch (c = *p++) {
-              case '1': case '2': case '3':
-              case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */
-                {
-                    int octlen;
-                    if (ruby_scan_oct(p-1, end-(p-1), &octlen) <= 0177) {
-                        /* backref or 7bit octal.
-                           no need to unescape anyway.
-                           re-escaping may break backref */
-                        goto escape_asis;
-                    }
-                }
-                /* xxx: How about more than 199 subexpressions? */ 
-
-              case '0': /* \0, \0O, \0OO */
-
-              case 'x': /* \xHH */
-              case 'c': /* \cX, \c\M-X */
-              case 'C': /* \C-X, \C-\M-X */
-              case 'M': /* \M-X, \M-\C-X, \M-\cX */
-                p = p-2;
-                if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0)
-                    return -1;
-                break;
-
-              case 'u':
-                if (p == end) {
-                    strcpy(err, "too short escape sequence");
-                    return -1;
-                }
-                if (*p == '{') {
-                    /* \u{H HH HHH HHHH HHHHH HHHHHH ...} */
-                    p++;
-                    if (unescape_unicode_list(&p, end, buf, encp, err) != 0)
-                        return -1;
-                    if (p == end || *p++ != '}') {
-                        strcpy(err, "invalid Unicode list");
-                        return -1;
-                    }
-                    break;
-                }
-                else {
-                    /* \uHHHH */
-                    if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0)
-                        return -1;
-                    break;
-                }
-
-              default: /* \n, \\, \d, \9, etc. */
-escape_asis:
-                smallbuf[0] = '\\';
-                smallbuf[1] = c;
-                rb_str_buf_cat(buf, smallbuf, 2);
-                break;
-            }
-            break;
-
-          default:
-            rb_str_buf_cat(buf, &c, 1);
-            break;
-        }
-    }
-
-    return 0;
-}
-
-static VALUE
-rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc,
-        rb_encoding **fixed_enc, onig_errmsg_buffer err)
-{
-    VALUE buf;
-
-    buf = rb_str_buf_new(0);
-
-    *fixed_enc = 0;
-    if (rb_enc_asciicompat(enc))
-        *fixed_enc = 0;
-    else {
-        *fixed_enc = enc;
-#if !WITH_OBJC
-        rb_enc_associate(buf, enc);
-#endif
-    }
-
-    if (unescape_nonascii(p, end, enc, buf, fixed_enc, err) != 0)
-        return Qnil;
-
-    if (*fixed_enc) {
-#if !WITH_OBJC
-        rb_enc_associate(buf, *fixed_enc);
-#endif
-    }
-
-    return buf;
-}
-
-VALUE
-rb_reg_check_preprocess(VALUE str)
-{
-    rb_encoding *fixed_enc = 0;
-    onig_errmsg_buffer err = "";
-    VALUE buf;
-    const char *p, *end;
-    rb_encoding *enc;
-
-    StringValue(str);
-    p = RSTRING_PTR(str);
-    end = p + RSTRING_LEN(str);
-    enc = rb_enc_get(str);
-
-    buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err);
-
-    if (buf == Qnil) {
-	return rb_reg_error_desc(str, 0, err);
-    }
-    return Qnil;
-}
-
-static VALUE
-rb_reg_preprocess_dregexp(VALUE ary)
-{
-    rb_encoding *fixed_enc = 0;
-    rb_encoding *regexp_enc = 0;
-    onig_errmsg_buffer err = "";
-    int i;
-    VALUE result = 0;
-    int argc = RARRAY_LEN(ary);
-#if !WITH_OBJC
-    VALUE *argv = RARRAY_PTR(ary);
-#endif
-
-    if (argc == 0) {
-        rb_raise(rb_eArgError, "no arguments given");
-    }
-
-    for (i = 0; i < argc; i++) {
-#if WITH_OBJC
-        VALUE str = RARRAY_AT(ary, i);
-#else
-        VALUE str = argv[i];
-#endif
-        VALUE buf;
-        const char *p, *end;
-        rb_encoding *src_enc;
-
-        StringValue(str);
-        p = RSTRING_PTR(str);
-        end = p + RSTRING_LEN(str);
-        src_enc = rb_enc_get(str);
-
-        buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err);
-
-        if (buf == Qnil)
-            rb_raise(rb_eArgError, "%s", err);
-
-        if (fixed_enc != 0) {
-            if (regexp_enc != 0 && regexp_enc != fixed_enc) {
-                rb_raise(rb_eArgError, "encoding mismatch in dynamic regexp : %s and %s",
-                         rb_enc_name(regexp_enc), rb_enc_name(fixed_enc));
-            }
-            regexp_enc = fixed_enc;
-        }
-
-        if (!result)
-            result = rb_str_new3(str);
-        else
-            rb_str_buf_append(result, str);
-    }
-    if (regexp_enc) {
-#if !WITH_OBJC
-        rb_enc_associate(result, regexp_enc);
-#endif
-    }
-
-    return result;
-}
-
-static int
-rb_reg_initialize(VALUE obj, const char *s, int len, rb_encoding *enc,
-		  int options, onig_errmsg_buffer err)
-{
-    struct RRegexp *re = RREGEXP(obj);
-    VALUE unescaped;
-    rb_encoding *fixed_enc = 0;
-#if !WITH_OBJC
-    rb_encoding *a_enc = rb_ascii8bit_encoding();
-#endif
-
-    if (!OBJ_TAINTED(obj) && rb_safe_level() >= 4) {
-	rb_raise(rb_eSecurityError, "Insecure: can't modify regexp");
-    }
-    rb_check_frozen(obj);
-    if (FL_TEST(obj, REG_LITERAL)) {
-	rb_raise(rb_eSecurityError, "can't modify literal regexp");
-    }
-    if (re->ptr != NULL) {
-	onig_free(re->ptr);
-    }
-    if (re->str != NULL) {
-	xfree(re->str);
-    }
-    re->ptr = NULL;
-    re->str = NULL;
-
-    unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err);
-    if (unescaped == Qnil)
-        return -1;
-
-#if !WITH_OBJC
-    if (fixed_enc) {
-	if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
-            (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
-	    strcpy(err, "incompatible character encoding");
-	    return -1;
-	}
-        if (fixed_enc != a_enc) {
-	    options |= ARG_ENCODING_FIXED;
-	    enc = fixed_enc;
-	}
-    }
-#endif
-#if !WITH_OBJC
-    else if (!(options & ARG_ENCODING_FIXED)) {
-       enc = rb_usascii_encoding();
-    }
-#endif
-
-#if !WITH_OBJC
-    rb_enc_associate((VALUE)re, enc);
-#endif
-    if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
-	re->basic.flags |= KCODE_FIXED;
-    }
-    if (options & ARG_ENCODING_NONE) {
-        re->basic.flags |= REG_ENCODING_NONE;
-    }
-   
-    Regexp *reg = make_regexp(RSTRING_PTR(unescaped), 
-	    RSTRING_LEN(unescaped), enc,
-	    options & ARG_REG_OPTION_MASK, err);
-    if (reg == NULL) {
-	return -1;
-    }
-    GC_WB(&re->ptr, reg);
-    GC_WB(&re->str, ALLOC_N(char, len+1));
-    memcpy(re->str, s, len);
-    re->str[len] = '\0';
-    re->len = len;
-    return 0;
-}
-
-static int
-rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err)
-{
-    rb_encoding *enc = rb_enc_get(str);
-    if (options & ARG_ENCODING_NONE) {
-#if !WITH_OBJC
-	/* TODO */
-        rb_encoding *ascii8bit = rb_ascii8bit_encoding();
-        if (enc != ascii8bit) {
-            if (rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
-                strcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
-                return -1;
-            }
-            enc = ascii8bit;
-        }
-#endif
-    }
-
-    char *cstr = NULL;
-    size_t charsize = 0;
-    bool should_free = false;
-
-    enc = rb_reg_prepare_enc(0, str, &cstr, &charsize, &should_free);
-
-    const size_t clen = charsize * RSTRING_LEN(str);
-
-    VALUE code = rb_reg_initialize(obj, cstr, clen, enc, options, err);
-
-//printf("init re %p cstr %p orig str %p charsize %ld enc %p should_free %d\n", (void *)obj, cstr, (void *)str, charsize, enc, should_free);
-
-    if (should_free && cstr != NULL) {
-	free(cstr);
-    }
-
-    return code;
-}
-
-static VALUE
-rb_reg_s_alloc(VALUE klass, SEL sel)
-{
-    NEWOBJ(re, struct RRegexp);
-    OBJSETUP(re, klass, T_REGEXP);
-
-    re->ptr = 0;
-    re->len = 0;
-    re->str = 0;
-
-    return (VALUE)re;
-}
-
-static IMP rb_objc_reg_finalize_super = NULL; 
-
-static void
-rb_objc_reg_finalize(void *rcv, SEL sel)
-{
-    struct RRegexp *re = RREGEXP(rcv);
-    onig_free(re->ptr);
-    if (rb_objc_reg_finalize_super != NULL) {
-	((void(*)(void *, SEL))rb_objc_reg_finalize_super)(rcv, sel);
-    }
-}
-
-VALUE
-rb_reg_new_str(VALUE s, int options)
-{
-    VALUE re = rb_reg_s_alloc(rb_cRegexp, 0);
-    onig_errmsg_buffer err = "";
-
-    if (rb_reg_initialize_str(re, s, options, err) != 0) {
-	rb_reg_raise_str(s, options, err);
-    }
-
-    return re;
-}
-
-VALUE
-rb_reg_new_ary(VALUE ary, int opt)
-{
-    return rb_reg_new_str(rb_reg_preprocess_dregexp(ary), opt);
-}
-
-VALUE
-rb_enc_reg_new(const char *s, long len, rb_encoding *enc, int options)
-{
-    VALUE re = rb_reg_s_alloc(rb_cRegexp, 0);
-    onig_errmsg_buffer err = "";
-
-    if (rb_reg_initialize(re, s, len, enc, options, err) != 0) {
-	rb_enc_reg_raise(s, len, enc, options, err);
-    }
-
-    return re;
-}
-
-VALUE
-rb_reg_new(const char *s, long len, int options)
-{
-#if WITH_OBJC
-    if (s == NULL && len == 0) {
-	s = "";
-    }
-    return rb_enc_reg_new(s, len, NULL, options);
-#else
-    return rb_enc_reg_new(s, len, rb_ascii8bit_encoding(), options);
-#endif
-}
-
-VALUE
-rb_reg_new_retained(const char *s, long len, int options)
-{
-    VALUE re = rb_reg_new(s, len, options);
-    GC_RETAIN(re);
-    return re;
-}
-
-VALUE
-rb_reg_compile(VALUE str, int options)
-{
-    VALUE re = rb_reg_s_alloc(rb_cRegexp, 0);
-    onig_errmsg_buffer err = "";
-
-    if (!str) str = rb_str_new(0,0);
-    if (rb_reg_initialize_str(re, str, options, err) != 0) {
-	rb_set_errinfo(rb_reg_error_desc(str, options, err));
-	return Qnil;
-    }
-    FL_SET(re, REG_LITERAL);
-    return re;
-}
-
-static VALUE reg_cache = 0;
-
-VALUE
-rb_reg_regcomp(VALUE str)
-{
-    volatile VALUE save_str = str;
-    if (reg_cache && RREGEXP(reg_cache)->len == RSTRING_LEN(str)
-#if WITH_OBJC
-	&& rb_enc_get(reg_cache) == rb_enc_get(str)
-#else
-	&& ENCODING_GET(reg_cache) == ENCODING_GET(str)
-#endif
-	&& memcmp(RREGEXP(reg_cache)->str, RSTRING_PTR(str), RSTRING_LEN(str)) == 0) {
-	return reg_cache;
-    }
-
-    if (reg_cache != 0) {
-	GC_RELEASE(reg_cache);
-    }
-    reg_cache = rb_reg_new_str(save_str, 0);
-    GC_RETAIN(reg_cache);
-    return reg_cache;
-}
-
-/*
- * call-seq:
- *   rxp.hash   => fixnum
- *
- * Produce a hash based on the text and options of this regular expression.
- */
-
-static VALUE
-rb_reg_hash(VALUE re, SEL sel)
-{
-    int hashval, len;
-    char *p;
-
-    rb_reg_check(re);
-    hashval = RREGEXP(re)->ptr->options;
-    len = RREGEXP(re)->len;
-    p  = RREGEXP(re)->str;
-    while (len--) {
-	hashval = hashval * 33 + *p++;
-    }
-    hashval = hashval + (hashval>>5);
-
-    return INT2FIX(hashval);
-}
-
-
-/*
- *  call-seq:
- *     rxp == other_rxp      => true or false
- *     rxp.eql?(other_rxp)   => true or false
- *
- *  Equality---Two regexps are equal if their patterns are identical, they have
- *  the same character set code, and their <code>casefold?</code> values are the
- *  same.
- *
- *     /abc/  == /abc/x   #=> false
- *     /abc/  == /abc/i   #=> false
- *     /abc/  == /abc/n   #=> false
- *     /abc/u == /abc/n   #=> false
- */
-
-static VALUE
-rb_reg_equal(VALUE re1, SEL sel, VALUE re2)
-{
-    if (re1 == re2) return Qtrue;
-    if (TYPE(re2) != T_REGEXP) return Qfalse;
-    rb_reg_check(re1); rb_reg_check(re2);
-    if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
-    if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse;
-    if (RREGEXP(re1)->len != RREGEXP(re2)->len) return Qfalse;
-#if WITH_OBJC
-    if (rb_enc_get(re1) != rb_enc_get(re2)) return Qfalse;
-#else
-    if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
-#endif
-    if (memcmp(RREGEXP(re1)->str, RREGEXP(re2)->str, RREGEXP(re1)->len) == 0) {
-	return Qtrue;
-    }
-    return Qfalse;
-}
-
-static VALUE
-reg_operand(VALUE s, int check)
-{
-    if (SYMBOL_P(s)) {
-	return rb_sym_to_s(s);
-    }
-    else {
-	VALUE tmp = rb_check_string_type(s);
-	if (check && NIL_P(tmp)) {
-	    rb_raise(rb_eTypeError, "can't convert %s to String",
-		     rb_obj_classname(s));
-	}
-	return tmp;
-    }
-}
-
-static long
-reg_match_pos(VALUE re, VALUE *strp, long pos)
-{
-    VALUE str = *strp;
-
-    if (NIL_P(str)) {
-	rb_backref_set(Qnil);
-	return -1;
-    }
-    *strp = str = reg_operand(str, Qtrue);
-    if (pos != 0) {
-	if (pos < 0) {
-	    VALUE l = rb_str_length(str);
-	    pos += NUM2INT(l);
-	    if (pos < 0) {
-		return pos;
-	    }
-	}
-	pos = rb_reg_adjust_startpos(re, str, pos, 0);
-    }
-    return rb_reg_search(re, str, pos, 0);
-}
-
-/*
- *  call-seq:
- *     rxp =~ str    => integer or nil
- *
- *  Match---Matches <i>rxp</i> against <i>str</i>.
- *
- *     /at/ =~ "input data"   #=> 7
- *     /ax/ =~ "input data"   #=> nil
- *
- *  If <code>=~</code> is used with a regexp literal with named captures,
- *  captured strings (or nil) is assigned to local variables named by
- *  the capture names.
- *
- *     /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "  x = y  "
- *     p lhs    #=> "x"
- *     p rhs    #=> "y"
- *
- *  If it is not matched, nil is assigned for the variables.
- *
- *     /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "  x = "   
- *     p lhs    #=> nil
- *     p rhs    #=> nil
- *
- *  This assignment is implemented in the Ruby parser.
- *  So a regexp literal is required for the assignment. 
- *  The assignment is not occur if the regexp is not a literal.
- *
- *     re = /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
- *     re =~ "  x = "
- *     p lhs    # undefined local variable
- *     p rhs    # undefined local variable
- *
- *  A regexp interpolation, <code>#{}</code>, also disables
- *  the assignment.
- *
- *     rhs_pat = /(?<rhs>\w+)/
- *     /(?<lhs>\w+)\s*=\s*#{rhs_pat}/ =~ "x = y"
- *     p lhs    # undefined local variable
- *
- */
-
-static VALUE
-rb_reg_match_imp(VALUE re, SEL sel, VALUE str)
-{
-    long pos = reg_match_pos(re, &str, 0);
-    if (pos < 0) {
-	return Qnil;
-    }
-    pos = rb_str_sublen(str, pos);
-    return LONG2FIX(pos);
-}
-
-VALUE
-rb_reg_match(VALUE re, VALUE str)
-{
-    return rb_reg_match_imp(re, 0, str);
-}
-
-/*
- *  call-seq:
- *     rxp === str   => true or false
- *
- *  Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
- *
- *     a = "HELLO"
- *     case a
- *     when /^[a-z]*$/; print "Lower case\n"
- *     when /^[A-Z]*$/; print "Upper case\n"
- *     else;            print "Mixed case\n"
- *     end
- *
- *  <em>produces:</em>
- *
- *     Upper case
- */
-
-VALUE
-rb_reg_eqq(VALUE re, SEL sel, VALUE str)
-{
-    long start;
-
-    str = reg_operand(str, Qfalse);
-    if (NIL_P(str)) {
-	rb_backref_set(Qnil);
-	return Qfalse;
-    }
-    start = rb_reg_search(re, str, 0, 0);
-    if (start < 0) {
-	return Qfalse;
-    }
-    return Qtrue;
-}
-
-
-/*
- *  call-seq:
- *     ~ rxp   => integer or nil
- *
- *  Match---Matches <i>rxp</i> against the contents of <code>$_</code>.
- *  Equivalent to <code><i>rxp</i> =~ $_</code>.
- *
- *     $_ = "input data"
- *     ~ /at/   #=> 7
- */
-
-static VALUE
-rb_reg_match2(VALUE re, SEL sel)
-{
-    long start;
-    VALUE line = rb_lastline_get();
-
-    if (TYPE(line) != T_STRING) {
-	rb_backref_set(Qnil);
-	return Qnil;
-    }
-
-    start = rb_reg_search(re, line, 0, 0);
-    if (start < 0) {
-	return Qnil;
-    }
-    start = rb_str_sublen(line, start);
-    return LONG2FIX(start);
-}
-
-
-/*
- *  call-seq:
- *     rxp.match(str)       => matchdata or nil
- *     rxp.match(str,pos)   => matchdata or nil
- *
- *  Returns a <code>MatchData</code> object describing the match, or
- *  <code>nil</code> if there was no match. This is equivalent to retrieving the
- *  value of the special variable <code>$~</code> following a normal match.
- *  If the second parameter is present, it specifies the position in the string
- *  to begin the search.
- *
- *     /(.)(.)(.)/.match("abc")[2]   #=> "b"
- *     /(.)(.)/.match("abc", 1)[2]   #=> "c"
- *     
- *  If a block is given, invoke the block with MatchData if match succeed, so
- *  that you can write
- *     
- *     pat.match(str) {|m| ...}
- *     
- *  instead of
- *      
- *     if m = pat.match(str)
- *       ...
- *     end
- *      
- *  The return value is a value from block execution in this case.
- */
-
-VALUE
-rb_reg_match_m(VALUE re, SEL sel, int argc, VALUE *argv)
-{
-    VALUE result, str, initpos;
-    long pos;
-
-    if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
-	pos = NUM2LONG(initpos);
-    }
-    else {
-	pos = 0;
-    }
-
-    pos = reg_match_pos(re, &str, pos);
-    if (pos < 0) {
-	rb_backref_set(Qnil);
-	return Qnil;
-    }
-    result = rb_backref_get();
-    rb_match_busy(result);
-    if (!NIL_P(result) && rb_block_given_p()) {
-	return rb_yield(result);
-    }
-    return result;
-}
-
-/*
- * Document-method: compile
- *
- * Synonym for <code>Regexp.new</code>
- */
-
-
-/*
- *  call-seq:
- *     Regexp.new(string [, options])                => regexp
- *     Regexp.new(regexp)                            => regexp
- *     Regexp.compile(string [, options])            => regexp
- *     Regexp.compile(regexp)                        => regexp
- *
- *  Constructs a new regular expression from <i>pattern</i>, which can be either
- *  a <code>String</code> or a <code>Regexp</code> (in which case that regexp's
- *  options are propagated, and new options may not be specified (a change as of
- *  Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or
- *  more of the constants <code>Regexp::EXTENDED</code>,
- *  <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>,
- *  <em>or</em>-ed together. Otherwise, if <i>options</i> is not
- *  <code>nil</code>, the regexp will be case insensitive.
- *
- *     r1 = Regexp.new('^a-z+:\\s+\w+')           #=> /^a-z+:\s+\w+/
- *     r2 = Regexp.new('cat', true)               #=> /cat/i
- *     r3 = Regexp.new('dog', Regexp::EXTENDED)   #=> /dog/x
- *     r4 = Regexp.new(r2)                        #=> /cat/i
- */
-
-static VALUE
-rb_reg_initialize_m(VALUE self, SEL sel, int argc, VALUE *argv)
-{
-    onig_errmsg_buffer err = "";
-    int flags = 0;
-    VALUE str;
-    rb_encoding *enc;
-    const char *ptr;
-    long len;
-
-    if (argc == 0 || argc > 3) {
-	rb_raise(rb_eArgError, "wrong number of arguments");
-    }
-    if (TYPE(argv[0]) == T_REGEXP) {
-	VALUE re = argv[0];
-
-	if (argc > 1) {
-	    rb_warn("flags ignored");
-	}
-	rb_reg_check(re);
-	flags = rb_reg_options(re);
-	ptr = RREGEXP(re)->str;
-	len = RREGEXP(re)->len;
-	enc = rb_enc_get(re);
-	if (rb_reg_initialize(self, ptr, len, enc, flags, err)) {
-	    str = rb_enc_str_new(ptr, len, enc);
-	    rb_reg_raise_str(str, flags, err);
-	}
-    }
-    else {
-	if (argc >= 2) {
-	    if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]);
-	    else if (RTEST(argv[1])) flags = ONIG_OPTION_IGNORECASE;
-	}
-	enc = 0;
-	if (argc == 3 && !NIL_P(argv[2])) {
-	    char *kcode = StringValuePtr(argv[2]);
-	    if (kcode[0] == 'n' || kcode[1] == 'N') {
-#if !WITH_OBJC
-		enc = rb_ascii8bit_encoding();
-#endif
-		flags |= ARG_ENCODING_FIXED;
-	    }
-	    else {
-		rb_warning("encoding option is obsolete - %s", kcode);
-	    }
-	}
-	str = argv[0];
-	ptr = StringValuePtr(str);
-	if (enc
-	    ? rb_reg_initialize(self, ptr, RSTRING_LEN(str), enc, flags, err)
-	    : rb_reg_initialize_str(self, str, flags, err)) {
-	    rb_reg_raise_str(str, flags, err);
-	}
-    }
-    return self;
-}
-
-VALUE
-rb_reg_quote(VALUE str)
-{
-    const char *cstr = RSTRING_PTR(str);;
-    if (cstr == NULL) {
-	return str;
-    }
-    const char *s = cstr;
-    const char *send = s + RSTRING_LEN(str);
-    while (s < send) {
-	switch (*s) {
-	    case '[': case ']': case '{': case '}':
-	    case '(': case ')': case '|': case '-':
-	    case '*': case '.': case '\\':
-	    case '?': case '+': case '^': case '$':
-	    case ' ': case '#':
-	    case '\t': case '\f': case '\v': case '\n': case '\r':
-		goto meta_found;
-	}
-        s++;
-    }
-    return rb_str_new3(str);
-
-    char *t_beg;
-
-  meta_found:
-    t_beg = (char *)alloca((RSTRING_LEN(str) * 2) + 1);
-
-    char *t = t_beg;
-    /* copy upto metacharacter */
-    memcpy(t, cstr, s - cstr);
-    t += s - cstr;
-
-    while (s < send) {
-	const char c = *s;
-        s++;
-	switch (c) {
-	    case '[': case ']': case '{': case '}':
-	    case '(': case ')': case '|': case '-':
-	    case '*': case '.': case '\\':
-	    case '?': case '+': case '^': case '$':
-	    case '#':
-		*t++ = '\\';
-		break;
-	    case ' ':
-		*t++ = '\\';
-		*t++ = ' ';
-		continue;
-	    case '\t':
-		*t++ = '\\';
-		*t++ = 't';
-		continue;
-	    case '\n':
-		*t++ = '\\';
-		*t++ = 'n';
-		continue;
-	    case '\r':
-		*t++ = '\\';
-		*t++ = 'r';
-		continue;
-	    case '\f':
-		*t++ = '\\';
-		*t++ = 'f';
-		continue;
-	    case '\v':
-		*t++ = '\\';
-		*t++ = 'v';
-		continue;
-	}
-	*t++ = c;
-    }
-    // rb_str_new wants a NULL-terminated string
-    *t = 0;
-
-    return rb_str_new(t_beg, t - t_beg);
-}
-
-
-/*
- *  call-seq:
- *     Regexp.escape(str)   => string
- *     Regexp.quote(str)    => string
- *
- *  Escapes any characters that would have special meaning in a regular
- *  expression. Returns a new escaped string, or self if no characters are
- *  escaped.  For any string,
- *  <code>Regexp.new(Regexp.escape(<i>str</i>))=~<i>str</i></code> will be true.
- *
- *     Regexp.escape('\*?{}.')   #=> \\\*\?\{\}\.
- *
- */
-
-static VALUE
-rb_reg_s_quote(VALUE c, SEL sel ,VALUE str)
-{
-    return rb_reg_quote(reg_operand(str, Qtrue));
-}
-
-int
-rb_reg_options(VALUE re)
-{
-    int options;
-
-    rb_reg_check(re);
-    options = RREGEXP(re)->ptr->options & ARG_REG_OPTION_MASK;
-    if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
-    if (RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
-    return options;
-}
-
-VALUE
-rb_check_regexp_type(VALUE re)
-{
-    return rb_check_convert_type(re, T_REGEXP, "Regexp", "to_regexp");
-}
-
-/*
- *  call-seq:
- *     Regexp.try_convert(obj) -> re or nil
- *
- *  Try to convert <i>obj</i> into a Regexp, using to_regexp method.
- *  Returns converted regexp or nil if <i>obj</i> cannot be converted
- *  for any reason.
- *
- *     Regexp.try_convert(/re/)         #=> /re/
- *     Regexp.try_convert("re")         #=> nil
- *
- *     o = Object.new
- *     Regexp.try_convert(o)            #=> nil
- *     def o.to_regexp() /foo/ end
- *     Regexp.try_convert(o)            #=> /foo/
- *
- */
-static VALUE
-rb_reg_s_try_convert(VALUE dummy, SEL sel, VALUE re)
-{
-    return rb_check_regexp_type(re);
-}
-
-static VALUE
-rb_reg_s_union(VALUE self, VALUE args0)
-{
-    long argc = RARRAY_LEN(args0);
-
-    if (argc == 0) {
-        VALUE args[1];
-        args[0] = rb_str_new2("(?!)");
-        return rb_class_new_instance(1, args, rb_cRegexp);
-    }
-    else if (argc == 1) {
-        VALUE arg = rb_ary_entry(args0, 0);
-        VALUE re = rb_check_regexp_type(arg);
-        if (!NIL_P(re))
-            return re;
-        else {
-            VALUE quoted;
-            quoted = rb_reg_s_quote(Qnil, 0, arg);
-            return rb_reg_new_str(quoted, 0);
-        }
-    }
-    else {
-	int i;
-	VALUE source = rb_str_buf_new(0);
-#if !WITH_OBJC
-	rb_encoding *result_enc;
-#endif
-
-        int has_asciionly = 0;
-        rb_encoding *has_ascii_compat_fixed = 0;
-        rb_encoding *has_ascii_incompat = 0;
-
-	for (i = 0; i < argc; i++) {
-	    volatile VALUE v;
-	    VALUE e = rb_ary_entry(args0, i);
-
-	    if (0 < i)
-		rb_str_buf_cat_ascii(source, "|");
-
-	    v = rb_check_regexp_type(e);
-	    if (!NIL_P(v)) {
-                rb_encoding *enc = rb_enc_get(v);
-                if (!rb_enc_asciicompat(enc)) {
-                    if (!has_ascii_incompat)
-                        has_ascii_incompat = enc;
-                    else if (has_ascii_incompat != enc)
-                        rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
-                            rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
-                }
-                else if (rb_reg_fixed_encoding_p(v, 0)) {
-                    if (!has_ascii_compat_fixed)
-                        has_ascii_compat_fixed = enc;
-                    else if (has_ascii_compat_fixed != enc)
-                        rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
-                            rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
-                }
-                else {
-                    has_asciionly = 1;
-                }
-		v = rb_reg_to_s(v, 0);
-	    }
-	    else {
-                rb_encoding *enc = rb_enc_get(e);
-                StringValue(e);
-                enc = rb_enc_get(e);
-                if (!rb_enc_str_asciicompat_p(e)) {
-                    if (!has_ascii_incompat)
-                        has_ascii_incompat = enc;
-                    else if (has_ascii_incompat != enc)
-                        rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
-                            rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
-                }
-#if !WITH_OBJC
-                else if (rb_enc_str_asciionly_p(e)) {
-                    has_asciionly = 1;
-                }
-#endif
-                else {
-                    if (!has_ascii_compat_fixed)
-                        has_ascii_compat_fixed = enc;
-                    else if (has_ascii_compat_fixed != enc)
-                        rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
-                            rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
-                }
-		v = rb_reg_s_quote(Qnil, 0, e);
-	    }
-#if !WITH_OBJC
-            if (has_ascii_incompat) {
-                if (has_asciionly) {
-                    rb_raise(rb_eArgError, "ASCII incompatible encoding: %s",
-                        rb_enc_name(has_ascii_incompat));
-                }
-                if (has_ascii_compat_fixed) {
-                    rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
-                        rb_enc_name(has_ascii_incompat), rb_enc_name(has_ascii_compat_fixed));
-                }
-            }
-#endif
-
-#if !WITH_OBJC
-            if (i == 0) {
-                rb_enc_copy(source, v);
-            }
-#endif
-	    rb_str_append(source, v);
-	}
-
-#if !WITH_OBJC
-        if (has_ascii_incompat) {
-            result_enc = has_ascii_incompat;
-        }
-        else if (has_ascii_compat_fixed) {
-            result_enc = has_ascii_compat_fixed;
-        }
-        else {
-            result_enc = rb_ascii8bit_encoding();
-        }
-
-        rb_enc_associate(source, result_enc);
-#endif
-        return rb_class_new_instance(1, &source, rb_cRegexp);
-    }
-}
-
-/*
- *  call-seq:
- *     Regexp.union(pat1, pat2, ...)            => new_regexp
- *     Regexp.union(pats_ary)                   => new_regexp
- *
- *  Return a <code>Regexp</code> object that is the union of the given
- *  <em>pattern</em>s, i.e., will match any of its parts. The <em>pattern</em>s
- *  can be Regexp objects, in which case their options will be preserved, or
- *  Strings. If no patterns are given, returns <code>/(?!)/</code>.
- *
- *     Regexp.union                         #=> /(?!)/
- *     Regexp.union("penzance")             #=> /penzance/
- *     Regexp.union("a+b*c")                #=> /a\+b\*c/
- *     Regexp.union("skiing", "sledding")   #=> /skiing|sledding/
- *     Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/
- *     Regexp.union(/dogs/, /cats/i)        #=> /(?-mix:dogs)|(?i-mx:cats)/
- */
-static VALUE
-rb_reg_s_union_m(VALUE self, SEL sel, VALUE args)
-{
-    VALUE v;
-    if (RARRAY_LEN(args) == 1 &&
-        !NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
-        return rb_reg_s_union(self, v);
-    }
-    return rb_reg_s_union(self, args);
-}
-
-/* :nodoc: */
-static VALUE
-rb_reg_init_copy(VALUE copy, SEL sel, VALUE re)
-{
-    onig_errmsg_buffer err = "";
-    const char *s;
-    long len;
-
-    if (copy == re) return copy;
-    rb_check_frozen(copy);
-    /* need better argument type check */
-    if (!rb_obj_is_instance_of(re, rb_obj_class(copy))) {
-	rb_raise(rb_eTypeError, "wrong argument type");
-    }
-    rb_reg_check(re);
-    s = RREGEXP(re)->str;
-    len = RREGEXP(re)->len;
-    if (rb_reg_initialize(copy, s, len, rb_enc_get(re), rb_reg_options(re), err) != 0) {
-	rb_reg_raise(s, len, err, re);
-    }
-    return copy;
-}
-
-VALUE
-rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
-{
-    VALUE val = 0;
-    const char *p, *s, *e;
-    int no, clen;
-    rb_encoding *str_enc = rb_enc_get(str);
-    rb_encoding *src_enc = rb_enc_get(src);
-
-#if !WITH_OBJC
-    rb_enc_check(str, src);
-#endif
-    p = s = RSTRING_PTR(str);
-    e = s + RSTRING_LEN(str);
-
-    while (s < e) {
-	const char *ss;
-#if WITH_OBJC
-	int c = *s;
-	clen = 1;
-#else
-        int c = rb_enc_ascget(s, e, &clen, str_enc);
-	if (c == -1) {
-	    s += mbclen(s, e, str_enc);
-	    continue;
-	}
-#endif
-	ss = s;
-        s += clen;
-
-	if (c != '\\' || s == e) continue;
-
-	if (!val) {
-	    val = rb_str_buf_new(ss-p);
-	}
-        rb_enc_str_buf_cat(val, p, ss-p, str_enc);
-
-#if WITH_OBJC
-	c = *s;
-	clen = 1;
-#else
-        c = rb_enc_ascget(s, e, &clen, str_enc);
-        if (c == -1) {
-            s += mbclen(s, e, str_enc);
-	    rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
-            p = s;
-	    continue;
-        }
-#endif
-        s += clen;
-
-	p = s;
-	switch (c) {
-	  case '1': case '2': case '3': case '4':
-	  case '5': case '6': case '7': case '8': case '9':
-            if (onig_noname_group_capture_is_active(RREGEXP(regexp)->ptr)) {
-                no = c - '0';
-            }
-            else {
-                continue;
-            }
-	    break;
-
-          case 'k':
-#if WITH_OBJC
-	    clen = 1;
-	    if (s < e && *s == '<') {
-#else
-            if (s < e && rb_enc_ascget(s, e, &clen, str_enc) == '<') {
-#endif
-                const char *name, *name_end;
-               
-                name_end = name = s + clen;
-                while (name_end < e) {
-#if WITH_OBJC
-		    c = *name_end;
-		    clen = 1;
-                    if (c == '>') break;
-                    name_end += clen;
-#else
-                    c = rb_enc_ascget(name_end, e, &clen, str_enc);
-                    if (c == '>') break;
-                    name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
-#endif
-                }
-                if (name_end < e) {
-                    no = name_to_backref_number(regs, regexp, name, name_end);
-                    p = s = name_end + clen;
-                    break;
-                }
-                else {
-                    rb_raise(rb_eRuntimeError, "invalid group name reference format");
-                }
-            }
-
-            rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
-            continue;
-
-          case '0':
-	  case '&':
-	    no = 0;
-	    break;
-
-	  case '`':
-	    rb_enc_str_buf_cat(val, RSTRING_PTR(src), BEG(0), src_enc);
-	    continue;
-
-	  case '\'':
-	    rb_enc_str_buf_cat(val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc);
-	    continue;
-
-	  case '+':
-	    no = regs->num_regs-1;
-	    while (BEG(no) == -1 && no > 0) no--;
-	    if (no == 0) continue;
-	    break;
-
-	  case '\\':
-	    rb_enc_str_buf_cat(val, s-clen, clen, str_enc);
-	    continue;
-
-	  default:
-	    rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
-	    continue;
-	}
-
-	if (no >= 0) {
-	    if (no >= regs->num_regs) continue;
-	    if (BEG(no) == -1) continue;
-	    rb_enc_str_buf_cat(val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc);
-	}
-    }
-
-    if (!val) return str;
-    if (p < e) {
-        rb_enc_str_buf_cat(val, p, e-p, str_enc);
-    }
-
-    return val;
-}
-
-static VALUE
-kcode_getter(void)
-{
-    rb_warn("variable $KCODE is no longer effective");
-    return Qnil;
-}
-
-static void
-kcode_setter(VALUE val, ID id)
-{
-    rb_warn("variable $KCODE is no longer effective; ignored");
-}
-
-static VALUE
-ignorecase_getter(void)
-{
-    rb_warn("variable $= is no longer effective");
-    return Qfalse;
-}
-
-static void
-ignorecase_setter(VALUE val, ID id)
-{
-    rb_warn("variable $= is no longer effective; ignored");
-}
-
-static VALUE
-match_getter(void)
-{
-    VALUE match = rb_backref_get();
-
-    if (NIL_P(match)) return Qnil;
-    rb_match_busy(match);
-    return match;
-}
-
-static void
-match_setter(VALUE val)
-{
-    if (!NIL_P(val)) {
-	Check_Type(val, T_MATCH);
-    }
-    rb_backref_set(val);
-}
-
-/*
- *  call-seq:
- *     Regexp.last_match           => matchdata
- *     Regexp.last_match(n)        => str
- *
- *  The first form returns the <code>MatchData</code> object generated by the
- *  last successful pattern match. Equivalent to reading the global variable
- *  <code>$~</code>. The second form returns the <i>n</i>th field in this
- *  <code>MatchData</code> object.
- *  <em>n</em> can be a string or symbol to reference a named capture.
- *
- *     /c(.)t/ =~ 'cat'        #=> 0
- *     Regexp.last_match       #=> #<MatchData "cat" 1:"a">
- *     Regexp.last_match(0)    #=> "cat"
- *     Regexp.last_match(1)    #=> "a"
- *     Regexp.last_match(2)    #=> nil
- *
- *     /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "var = val"
- *     Regexp.last_match       #=> #<MatchData "var = val" lhs:"var" rhs:"val">
- *     Regexp.last_match(:lhs) #=> "var"
- *     Regexp.last_match(:rhs) #=> "val"
- */
-
-static VALUE
-rb_reg_s_last_match(VALUE rcv, SEL sel, int argc, VALUE *argv)
-{
-    VALUE nth;
-
-    if (argc > 0 && rb_scan_args(argc, argv, "01", &nth) == 1) {
-        VALUE match = rb_backref_get();
-        int n;
-        if (NIL_P(match)) return Qnil;
-        n = match_backref_number(match, nth);
-	return rb_reg_nth_match(n, match);
-    }
-    return match_getter();
-}
-
-static void
-re_warn(const char *s)
-{
-    rb_warn("%s", s);
-}
-
-/*
- *  Document-class: Regexp
- *
- *  A <code>Regexp</code> holds a regular expression, used to match a pattern
- *  against strings. Regexps are created using the <code>/.../</code> and
- *  <code>%r{...}</code> literals, and by the <code>Regexp::new</code>
- *  constructor.
- *
- */
-
-void
-Init_Regexp(void)
-{
-    rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError);
-
-    onigenc_set_default_caseconv_table((UChar*)casetable);
-    onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
-    onig_set_warn_func(re_warn);
-    onig_set_verb_warn_func(re_warn);
-
-    rb_define_virtual_variable("$~", match_getter, match_setter);
-    rb_define_virtual_variable("$&", last_match_getter, 0);
-    rb_define_virtual_variable("$`", prematch_getter, 0);
-    rb_define_virtual_variable("$'", postmatch_getter, 0);
-    rb_define_virtual_variable("$+", last_paren_match_getter, 0);
-
-    rb_define_virtual_variable("$=", ignorecase_getter, ignorecase_setter);
-    rb_define_virtual_variable("$KCODE", kcode_getter, kcode_setter);
-    rb_define_virtual_variable("$-K", kcode_getter, kcode_setter);
-
-    rb_cRegexp = rb_define_class("Regexp", rb_cObject);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "alloc", rb_reg_s_alloc, 0);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "compile", rb_class_new_instance_imp, -1);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "quote", rb_reg_s_quote, 1);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "escape", rb_reg_s_quote, 1);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "union", rb_reg_s_union_m, -2);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "last_match", rb_reg_s_last_match, -1);
-    rb_objc_define_method(*(VALUE *)rb_cRegexp, "try_convert", rb_reg_s_try_convert, 1);
-
-    rb_objc_reg_finalize_super = rb_objc_install_method2((Class)rb_cRegexp,
-	    "finalize", (IMP)rb_objc_reg_finalize);
-
-    rb_objc_define_method(rb_cRegexp, "initialize", rb_reg_initialize_m, -1);
-    rb_objc_define_method(rb_cRegexp, "initialize_copy", rb_reg_init_copy, 1);
-    rb_objc_define_method(rb_cRegexp, "hash", rb_reg_hash, 0);
-    rb_objc_define_method(rb_cRegexp, "eql?", rb_reg_equal, 1);
-    rb_objc_define_method(rb_cRegexp, "==", rb_reg_equal, 1);
-    rb_objc_define_method(rb_cRegexp, "=~", rb_reg_match_imp, 1);
-    rb_objc_define_method(rb_cRegexp, "===", rb_reg_eqq, 1);
-    rb_objc_define_method(rb_cRegexp, "~", rb_reg_match2, 0);
-    rb_objc_define_method(rb_cRegexp, "match", rb_reg_match_m, -1);
-    rb_objc_define_method(rb_cRegexp, "to_s", rb_reg_to_s, 0);
-    rb_objc_define_method(rb_cRegexp, "inspect", rb_reg_inspect, 0);
-    rb_objc_define_method(rb_cRegexp, "source", rb_reg_source, 0);
-    rb_objc_define_method(rb_cRegexp, "casefold?", rb_reg_casefold_p, 0);
-    rb_objc_define_method(rb_cRegexp, "options", rb_reg_options_m, 0);
-    rb_objc_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */
-    rb_objc_define_method(rb_cRegexp, "fixed_encoding?", rb_reg_fixed_encoding_p, 0);
-    rb_objc_define_method(rb_cRegexp, "names", rb_reg_names, 0);
-    rb_objc_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);
-
-    rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
-    rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND));
-    rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE));
-
-    rb_global_variable(&reg_cache);
-
-    rb_cMatch  = rb_define_class("MatchData", rb_cObject);
-    rb_objc_define_method(*(VALUE *)rb_cMatch, "alloc", match_alloc, 0);
-    rb_undef_method(CLASS_OF(rb_cMatch), "new");
-
-    rb_objc_match_finalize_super = rb_objc_install_method2((Class)rb_cMatch,
-	    "finalize", (IMP)rb_objc_match_finalize);
-
-    rb_objc_define_method(rb_cMatch, "initialize_copy", match_init_copy, 1);
-    rb_objc_define_method(rb_cMatch, "regexp", match_regexp, 0);
-    rb_objc_define_method(rb_cMatch, "names", match_names, 0);
-    rb_objc_define_method(rb_cMatch, "size", match_size, 0);
-    rb_objc_define_method(rb_cMatch, "length", match_size, 0);
-    rb_objc_define_method(rb_cMatch, "offset", match_offset, 1);
-    rb_objc_define_method(rb_cMatch, "begin", match_begin, 1);
-    rb_objc_define_method(rb_cMatch, "end", match_end, 1);
-    rb_objc_define_method(rb_cMatch, "to_a", match_to_a, 0);
-    rb_objc_define_method(rb_cMatch, "[]", match_aref, -1);
-    rb_objc_define_method(rb_cMatch, "captures", match_captures, 0);
-    rb_objc_define_method(rb_cMatch, "values_at", match_values_at, -1);
-    rb_objc_define_method(rb_cMatch, "pre_match", rb_reg_match_pre, 0);
-    rb_objc_define_method(rb_cMatch, "post_match", rb_reg_match_post, 0);
-    rb_objc_define_method(rb_cMatch, "to_s", match_to_s, 0);
-    rb_objc_define_method(rb_cMatch, "inspect", match_inspect, 0);
-    rb_objc_define_method(rb_cMatch, "string", match_string, 0);
-}

Copied: MacRuby/trunk/re.cpp (from rev 3744, MacRuby/branches/icu/re.cpp)
===================================================================
--- MacRuby/trunk/re.cpp	                        (rev 0)
+++ MacRuby/trunk/re.cpp	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,1806 @@
+/* 
+ * MacRuby Regular Expressions.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ * 
+ * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
+ * Copyright (C) 1993-2007 Yukihiro Matsumoto
+ */
+
+#include "unicode/regex.h"
+#include "unicode/unistr.h"
+#include "ruby/ruby.h"
+#include "encoding.h"
+#include "objc.h"
+#include "re.h"
+
+extern "C" {
+
+VALUE rb_eRegexpError;
+VALUE rb_cRegexp;
+VALUE rb_cMatch;
+
+typedef struct rb_regexp {
+    struct RBasic basic;
+    UnicodeString *unistr;
+    RegexPattern *pattern;
+} rb_regexp_t;
+
+#define RREGEXP(o) ((rb_regexp_t *)o)
+
+#define REGEXP_OPT_DEFAULT	(UREGEX_MULTILINE)
+#define REGEXP_OPT_IGNORECASE 	(UREGEX_CASE_INSENSITIVE)
+#define REGEXP_OPT_EXTENDED 	(UREGEX_COMMENTS)
+#define REGEXP_OPT_MULTILINE	(UREGEX_DOTALL)
+
+typedef struct rb_match {
+    struct RBasic basic;
+    rb_regexp_t *regexp;
+    VALUE str;
+    rb_match_result_t *results;
+    int results_count;
+} rb_match_t;
+
+#define RMATCH(o) ((rb_match_t *)o)
+#define MATCH_BUSY FL_USER2
+
+static rb_regexp_t *
+regexp_alloc(VALUE klass, SEL sel)
+{
+    NEWOBJ(re, struct rb_regexp);
+    OBJSETUP(re, klass, T_REGEXP);
+    re->unistr = NULL;
+    re->pattern = NULL;
+    return re;
+}
+
+static rb_match_t *
+match_alloc(VALUE klass, SEL sel)
+{
+    NEWOBJ(match, struct rb_match);
+    OBJSETUP(match, klass, T_MATCH);
+    match->regexp = NULL;
+    match->str = 0;
+    match->results = NULL;
+    match->results_count = 0;
+    return match;
+}
+
+static void
+regexp_finalize(rb_regexp_t *regexp)
+{
+    if (regexp->pattern != NULL) {
+	delete regexp->pattern;
+	regexp->pattern = NULL;
+    }
+    if (regexp->unistr != NULL) {
+	delete regexp->unistr;
+	regexp->unistr = NULL;
+    }
+}
+
+static IMP regexp_finalize_imp_super = NULL; 
+
+static void
+regexp_finalize_imp(void *rcv, SEL sel)
+{
+    regexp_finalize(RREGEXP(rcv));
+    if (regexp_finalize_imp_super != NULL) {
+	((void(*)(void *, SEL))regexp_finalize_imp_super)(rcv, sel);
+    }
+}
+
+static UnicodeString *
+str_to_unistr(VALUE str)
+{
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+
+    rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+
+    UnicodeString *unistr = new UnicodeString((const UChar *)chars, chars_len);
+
+    if (need_free) {
+	free(chars);
+    }
+    return unistr;
+}
+
+static void
+sanitize_regexp_string(UnicodeString *unistr)
+{
+    // ICU does not support [[:word::], so we need to replace all
+    // occurences by \w.
+    UChar word_chars[10] = {'[', '[', ':', 'w', 'o', 'r', 'd', ':', ']', ']'};
+    UnicodeString word_str(word_chars, 10);
+    UChar repl_chars[2] = {'\\', 'w'};
+    UnicodeString repl_str(repl_chars, 2);
+    int32_t pos;
+    while ((pos = unistr->indexOf(word_str)) >= 0) {
+	unistr->replace(pos, 10, repl_str);
+    }
+}
+
+static bool
+init_from_string(rb_regexp_t *regexp, VALUE str, int option, VALUE *excp)
+{
+    option |= REGEXP_OPT_DEFAULT;
+
+    UnicodeString *unistr = str_to_unistr(str);
+    assert(unistr != NULL);
+
+    sanitize_regexp_string(unistr);
+
+    UParseError pe;
+    UErrorCode status = U_ZERO_ERROR;
+    RegexPattern *pattern = RegexPattern::compile(*unistr, option, pe, status);
+
+    if (pattern == NULL) {
+	delete unistr;
+	if (excp != NULL) {
+	    char error[1024];
+	    snprintf(error, sizeof error, "regexp compilation error: %s",
+		    u_errorName(status));
+	    *excp = rb_exc_new2(rb_eRegexpError, error);
+	}
+	return false;
+    }
+
+    regexp_finalize(regexp);
+    regexp->pattern = pattern;
+    regexp->unistr = unistr;
+
+    return true;
+}
+
+static void
+init_from_regexp(rb_regexp_t *regexp, rb_regexp_t *from)
+{
+    regexp_finalize(regexp);
+    regexp->unistr = new UnicodeString(*from->unistr);
+    regexp->pattern = new RegexPattern(*from->pattern);
+}
+
+static VALUE
+rb_str_compile_regexp(VALUE str, int options, VALUE *excp)
+{
+    rb_regexp_t *regexp = regexp_alloc(rb_cRegexp, 0);
+    if (!init_from_string(regexp, str, options, excp)) {
+	return Qnil;
+    }
+    return (VALUE)regexp;
+}
+
+bool
+rb_char_to_icu_option(int c, int *option)
+{
+    assert(option != NULL);
+    switch (c) {
+	case 'i':
+	    *option = REGEXP_OPT_IGNORECASE;
+	    return true;
+	case 'x':
+	    *option = REGEXP_OPT_EXTENDED;
+	    return true;
+	case 'm':
+	    *option = REGEXP_OPT_MULTILINE;
+	    return true;
+
+	// Stupid MRI encoding flags, let's ignore them for now.
+	case 'n':
+	case 'e':
+	case 'u':
+	case 's':
+	    *option = 0;
+	    return true;
+    }
+    *option = -1;
+    return false;
+}
+
+static VALUE
+reg_operand(VALUE s, bool check)
+{
+    if (SYMBOL_P(s)) {
+	return rb_sym_to_s(s);
+    }
+    else {
+	VALUE tmp = rb_check_string_type(s);
+	if (check && NIL_P(tmp)) {
+	    rb_raise(rb_eTypeError, "can't convert %s to String",
+		     rb_obj_classname(s));
+	}
+	return tmp;
+    }
+}
+
+static VALUE
+rb_check_regexp_type(VALUE re)
+{
+    return rb_check_convert_type(re, T_REGEXP, "Regexp", "to_regexp");
+}
+
+/*
+ *  call-seq:
+ *     Regexp.escape(str)   => string
+ *     Regexp.quote(str)    => string
+ *
+ *  Escapes any characters that would have special meaning in a regular
+ *  expression. Returns a new escaped string, or self if no characters are
+ *  escaped.  For any string,
+ *  <code>Regexp.new(Regexp.escape(<i>str</i>))=~<i>str</i></code> will be true.
+ *
+ *     Regexp.escape('\*?{}.')   #=> \\\*\?\{\}\.
+ *
+ */
+
+static VALUE
+regexp_quote(VALUE klass, SEL sel, VALUE pat)
+{
+    return rb_reg_quote(reg_operand(pat, true));
+}
+
+/*
+ *  call-seq:
+ *     Regexp.union(pat1, pat2, ...)            => new_regexp
+ *     Regexp.union(pats_ary)                   => new_regexp
+ *
+ *  Return a <code>Regexp</code> object that is the union of the given
+ *  <em>pattern</em>s, i.e., will match any of its parts. The <em>pattern</em>s
+ *  can be Regexp objects, in which case their options will be preserved, or
+ *  Strings. If no patterns are given, returns <code>/(?!)/</code>.
+ *
+ *     Regexp.union                         #=> /(?!)/
+ *     Regexp.union("penzance")             #=> /penzance/
+ *     Regexp.union("a+b*c")                #=> /a\+b\*c/
+ *     Regexp.union("skiing", "sledding")   #=> /skiing|sledding/
+ *     Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/
+ *     Regexp.union(/dogs/, /cats/i)        #=> /(?-mix:dogs)|(?i-mx:cats)/
+ */
+
+static VALUE regexp_to_s(VALUE rcv, SEL sel);
+
+static VALUE
+regexp_union(VALUE klass, SEL sel, int argc, VALUE *argv)
+{
+    const VALUE *args;
+
+    if (argc == 0) {
+	return rb_reg_new_str(rb_str_new2("(?!)"), 0);
+    }
+    else if (argc == 1) {
+	VALUE v = rb_check_regexp_type(argv[0]);
+	if (!NIL_P(v)) {
+	    return v;
+	}
+	v = rb_check_array_type(argv[0]);
+	if (!NIL_P(v)) {
+	    argc = RARRAY_LEN(argv[0]);
+	    args = RARRAY_PTR(argv[0]);
+	}
+	else {
+	    StringValue(argv[0]);
+	    return rb_reg_new_str(rb_reg_quote(argv[0]), 0);
+	}
+    }
+    else {
+	args = argv;
+    }
+
+    VALUE source = rb_unicode_str_new(NULL, 0);
+
+    for (int i = 0; i < argc; i++) {
+	VALUE arg = args[i];
+
+	if (i > 0) {
+	    rb_str_cat2(source, "|");
+	}
+
+	VALUE substr;
+	VALUE re = rb_check_regexp_type(arg);
+	if (!NIL_P(re)) {
+	    substr = regexp_to_s(re, 0);
+	}
+	else {
+	    StringValue(arg);
+	    substr = rb_reg_quote(arg);
+	}
+
+	rb_str_append(source, substr);
+    }
+
+    return rb_reg_new_str(source, 0);
+}
+
+/*
+ *  call-seq:
+ *     Regexp.last_match           => matchdata
+ *     Regexp.last_match(n)        => str
+ *
+ *  The first form returns the <code>MatchData</code> object generated by the
+ *  last successful pattern match. Equivalent to reading the global variable
+ *  <code>$~</code>. The second form returns the <i>n</i>th field in this
+ *  <code>MatchData</code> object.
+ *  <em>n</em> can be a string or symbol to reference a named capture.
+ *
+ *     /c(.)t/ =~ 'cat'        #=> 0
+ *     Regexp.last_match       #=> #<MatchData "cat" 1:"a">
+ *     Regexp.last_match(0)    #=> "cat"
+ *     Regexp.last_match(1)    #=> "a"
+ *     Regexp.last_match(2)    #=> nil
+ *
+ *     /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "var = val"
+ *     Regexp.last_match       #=> #<MatchData "var = val" lhs:"var" rhs:"val">
+ *     Regexp.last_match(:lhs) #=> "var"
+ *     Regexp.last_match(:rhs) #=> "val"
+ */
+
+static VALUE match_getter(void);
+static int match_backref_number(VALUE match, VALUE backref, bool check);
+
+static VALUE
+regexp_last_match(VALUE klass, SEL sel, int argc, VALUE *argv)
+{
+    VALUE nth;
+
+    if (argc > 0 && rb_scan_args(argc, argv, "01", &nth) == 1) {
+	VALUE match = rb_backref_get();
+	if (NIL_P(match)) {
+	    return Qnil;
+	}
+	const int n = match_backref_number(match, nth, true);
+	return rb_reg_nth_match(n, match);
+    }
+    return match_getter();
+}
+
+/*
+ *  call-seq:
+ *     Regexp.try_convert(obj) -> re or nil
+ *
+ *  Try to convert <i>obj</i> into a Regexp, using to_regexp method.
+ *  Returns converted regexp or nil if <i>obj</i> cannot be converted
+ *  for any reason.
+ *
+ *     Regexp.try_convert(/re/)         #=> /re/
+ *     Regexp.try_convert("re")         #=> nil
+ *
+ *     o = Object.new
+ *     Regexp.try_convert(o)            #=> nil
+ *     def o.to_regexp() /foo/ end
+ *     Regexp.try_convert(o)            #=> /foo/
+ *
+ */
+
+static VALUE
+regexp_try_convert(VALUE klass, SEL sel, VALUE obj)
+{
+    return rb_check_regexp_type(obj);
+}
+
+/*
+ *  call-seq:
+ *     Regexp.new(string [, options])                => regexp
+ *     Regexp.new(regexp)                            => regexp
+ *     Regexp.compile(string [, options])            => regexp
+ *     Regexp.compile(regexp)                        => regexp
+ *
+ *  Constructs a new regular expression from <i>pattern</i>, which can be either
+ *  a <code>String</code> or a <code>Regexp</code> (in which case that regexp's
+ *  options are propagated, and new options may not be specified (a change as of
+ *  Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or
+ *  more of the constants <code>Regexp::EXTENDED</code>,
+ *  <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>,
+ *  <em>or</em>-ed together. Otherwise, if <i>options</i> is not
+ *  <code>nil</code>, the regexp will be case insensitive.
+ *
+ *     r1 = Regexp.new('^a-z+:\\s+\w+')           #=> /^a-z+:\s+\w+/
+ *     r2 = Regexp.new('cat', true)               #=> /cat/i
+ *     r3 = Regexp.new('dog', Regexp::EXTENDED)   #=> /dog/x
+ *     r4 = Regexp.new(r2)                        #=> /cat/i
+ */
+
+static VALUE
+regexp_initialize(VALUE self, SEL sel, int argc, VALUE *argv)
+{
+    if (argc == 0 || argc > 3) {
+	rb_raise(rb_eArgError, "wrong number of arguments");
+    }
+    if (TYPE(argv[0]) == T_REGEXP) {
+	VALUE re = argv[0];
+	if (argc > 1) {
+	    rb_warn("flags ignored");
+	}
+	assert(RREGEXP(re)->pattern != NULL);
+	init_from_regexp(RREGEXP(self), RREGEXP(re));
+    }
+    else {
+	int options = 0;
+	if (argc >= 2) {
+	    if (FIXNUM_P(argv[1])) {
+		options = FIX2INT(argv[1]);
+	    }
+	    else if (RTEST(argv[1])) {
+		options = REGEXP_OPT_IGNORECASE;
+	    }
+	}
+	VALUE str = argv[0];
+	StringValue(str);
+
+	VALUE exc = Qnil;
+	if (!init_from_string(RREGEXP(self), str, options, &exc)) {
+	    rb_exc_raise(exc);
+	}
+    }
+    return self;
+}
+
+static VALUE
+regexp_initialize_copy(VALUE rcv, SEL sel, VALUE other)
+{
+    if (TYPE(other) != T_REGEXP) {
+	rb_raise(rb_eTypeError, "wrong argument type");
+    }
+    init_from_regexp(RREGEXP(rcv), RREGEXP(other));
+    return rcv;
+}
+
+/*
+ *  call-seq:
+ *     rxp == other_rxp      => true or false
+ *     rxp.eql?(other_rxp)   => true or false
+ *
+ *  Equality---Two regexps are equal if their patterns are identical, they have
+ *  the same character set code, and their <code>casefold?</code> values are the
+ *  same.
+ *
+ *     /abc/  == /abc/x   #=> false
+ *     /abc/  == /abc/i   #=> false
+ *     /abc/  == /abc/n   #=> false
+ *     /abc/u == /abc/n   #=> false
+ */
+
+static VALUE
+regexp_equal(VALUE rcv, SEL sel, VALUE other)
+{
+    if (rcv == other) {
+	return Qtrue;
+    }
+    if (TYPE(other) != T_REGEXP) {
+	return Qfalse;
+    }
+
+    assert(RREGEXP(rcv)->unistr != NULL && RREGEXP(rcv)->pattern != NULL);
+    assert(RREGEXP(other)->unistr != NULL && RREGEXP(other)->pattern != NULL);
+
+    // Using the == operator on the RegexpPatterns does not work, for a
+    // reason... so we are comparing source strings and flags.
+    return *RREGEXP(rcv)->unistr == *RREGEXP(other)->unistr
+	&& RREGEXP(rcv)->pattern->flags() == RREGEXP(other)->pattern->flags()
+	? Qtrue : Qfalse;
+}
+
+/*
+ *  call-seq:
+ *     rxp =~ str    => integer or nil
+ *
+ *  Match---Matches <i>rxp</i> against <i>str</i>.
+ *
+ *     /at/ =~ "input data"   #=> 7
+ *     /ax/ =~ "input data"   #=> nil
+ *
+ *  If <code>=~</code> is used with a regexp literal with named captures,
+ *  captured strings (or nil) is assigned to local variables named by
+ *  the capture names.
+ *
+ *     /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "  x = y  "
+ *     p lhs    #=> "x"
+ *     p rhs    #=> "y"
+ *
+ *  If it is not matched, nil is assigned for the variables.
+ *
+ *     /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "  x = "   
+ *     p lhs    #=> nil
+ *     p rhs    #=> nil
+ *
+ *  This assignment is implemented in the Ruby parser.
+ *  So a regexp literal is required for the assignment. 
+ *  The assignment is not occur if the regexp is not a literal.
+ *
+ *     re = /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
+ *     re =~ "  x = "
+ *     p lhs    # undefined local variable
+ *     p rhs    # undefined local variable
+ *
+ *  A regexp interpolation, <code>#{}</code>, also disables
+ *  the assignment.
+ *
+ *     rhs_pat = /(?<rhs>\w+)/
+ *     /(?<lhs>\w+)\s*=\s*#{rhs_pat}/ =~ "x = y"
+ *     p lhs    # undefined local variable
+ */
+
+int
+rb_reg_search(VALUE re, VALUE str, int pos, bool reverse)
+{
+    if (reverse) {
+	rb_raise(rb_eRuntimeError, "reverse searching is not implemented yet");
+    }
+
+    const long len = rb_str_chars_len(str);
+    if (pos > len || pos < 0) {
+	rb_backref_set(Qnil);
+	return -1;
+    }
+
+    UnicodeString *unistr = str_to_unistr(str);
+    assert(unistr != NULL);
+
+    UErrorCode status = U_ZERO_ERROR;
+    assert(RREGEXP(re)->pattern != NULL);
+    RegexMatcher *matcher = RREGEXP(re)->pattern->matcher(*unistr, status);
+
+    if (matcher == NULL) {
+	delete unistr;
+	rb_raise(rb_eRegexpError, "can't create matcher: %s",
+		u_errorName(status));
+    }
+
+    if (!matcher->find(pos, status)) {
+	// No match.
+	rb_backref_set(Qnil);
+	delete matcher;
+	delete unistr;
+	return -1;
+    }
+
+    // Match found.
+    const int res_count = 1 + matcher->groupCount();
+    rb_match_result_t *res = NULL;
+
+    VALUE match = rb_backref_get();
+    if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
+	// Creating a new Match object.
+	match = (VALUE)match_alloc(rb_cMatch, 0);
+	rb_backref_set(match);
+	res = (rb_match_result_t *)xmalloc(sizeof(rb_match_result_t)
+		* res_count);
+	GC_WB(&RMATCH(match)->results, res);
+	GC_WB(&RMATCH(match)->str, rb_str_new(NULL, 0));
+    }
+    else {
+	// Reusing the previous Match object.
+	assert(RMATCH(match)->results != NULL);
+	if (res_count > RMATCH(match)->results_count) {
+	    res = (rb_match_result_t *)xrealloc(RMATCH(match)->results,
+		    sizeof(rb_match_result_t) * res_count);
+	    if (res != RMATCH(match)->results) {
+		GC_WB(&RMATCH(match)->results, res);
+	    }
+	}
+	else {
+	    res = RMATCH(match)->results;
+	    memset(res, 0, sizeof(rb_match_result_t) * res_count);
+	}
+	assert(RMATCH(match)->str != 0);
+    }
+
+    RMATCH(match)->results_count = res_count;
+    GC_WB(&RMATCH(match)->regexp, re);
+
+    rb_str_set_len(RMATCH(match)->str, 0);
+    rb_str_append_uchars(RMATCH(match)->str, unistr->getBuffer(),
+	    unistr->length());
+
+    res[0].beg = matcher->start(status);
+    res[0].end = matcher->end(status);
+
+    for (int i = 0; i < matcher->groupCount(); i++) {
+	res[i + 1].beg = matcher->start(i + 1, status);
+	res[i + 1].end = matcher->end(i + 1, status);
+    }
+
+    delete matcher;
+    delete unistr;
+
+    return res[0].beg;
+}
+
+static long
+reg_match_pos(VALUE re, VALUE *strp, long pos)
+{
+    VALUE str = *strp;
+
+    if (NIL_P(str)) {
+	rb_backref_set(Qnil);
+	return -1;
+    }
+    *strp = str = reg_operand(str, true);
+    if (pos != 0) {
+	if (pos < 0) {
+	    VALUE l = rb_str_length(str);
+	    pos += NUM2INT(l);
+	    if (pos < 0) {
+		return pos;
+	    }
+	}
+	pos = rb_reg_adjust_startpos(re, str, pos, false);
+    }
+    return rb_reg_search(re, str, pos, 0);
+}
+
+VALUE
+regexp_match(VALUE rcv, SEL sel, VALUE str)
+{
+    const long pos = reg_match_pos(rcv, &str, 0);
+    if (pos < 0) {
+	return Qnil;
+    }
+    return LONG2FIX(pos);
+}
+
+/*
+ *  call-seq:
+ *     rxp.match(str)       => matchdata or nil
+ *     rxp.match(str,pos)   => matchdata or nil
+ *
+ *  Returns a <code>MatchData</code> object describing the match, or
+ *  <code>nil</code> if there was no match. This is equivalent to retrieving the
+ *  value of the special variable <code>$~</code> following a normal match.
+ *  If the second parameter is present, it specifies the position in the string
+ *  to begin the search.
+ *
+ *     /(.)(.)(.)/.match("abc")[2]   #=> "b"
+ *     /(.)(.)/.match("abc", 1)[2]   #=> "c"
+ *     
+ *  If a block is given, invoke the block with MatchData if match succeed, so
+ *  that you can write
+ *     
+ *     pat.match(str) {|m| ...}
+ *     
+ *  instead of
+ *      
+ *     if m = pat.match(str)
+ *       ...
+ *     end
+ *      
+ *  The return value is a value from block execution in this case.
+ */
+
+VALUE
+regexp_match2(VALUE rcv, SEL sel, int argc, VALUE *argv)
+{
+    VALUE result, str, initpos;
+    long pos;
+
+    if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
+	pos = NUM2LONG(initpos);
+    }
+    else {
+	pos = 0;
+    }
+
+    pos = reg_match_pos(rcv, &str, pos);
+    if (pos < 0) {
+	rb_backref_set(Qnil);
+	return Qnil;
+    }
+    result = rb_backref_get();
+    rb_match_busy(result);
+    if (!NIL_P(result) && rb_block_given_p()) {
+	return rb_yield(result);
+    }
+    return result;
+}
+
+/*
+ *  call-seq:
+ *     ~ rxp   => integer or nil
+ *
+ *  Match---Matches <i>rxp</i> against the contents of <code>$_</code>.
+ *  Equivalent to <code><i>rxp</i> =~ $_</code>.
+ *
+ *     $_ = "input data"
+ *     ~ /at/   #=> 7
+ */
+
+static VALUE
+regexp_match3(VALUE rcv, SEL sel)
+{
+    VALUE line = rb_lastline_get();
+    if (TYPE(line) != T_STRING) {
+	rb_backref_set(Qnil);
+	return Qnil;
+    }
+
+    const long start = rb_reg_search(rcv, line, 0, 0);
+    if (start < 0) {
+	return Qnil;
+    }
+    return LONG2FIX(start);
+}
+
+/*
+ *  call-seq:
+ *     rxp === str   => true or false
+ *
+ *  Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
+ *
+ *     a = "HELLO"
+ *     case a
+ *     when /^[a-z]*$/; print "Lower case\n"
+ *     when /^[A-Z]*$/; print "Upper case\n"
+ *     else;            print "Mixed case\n"
+ *     end
+ *
+ *  <em>produces:</em>
+ *
+ *     Upper case
+ */
+
+VALUE
+regexp_eqq(VALUE rcv, SEL sel, VALUE str)
+{
+    str = reg_operand(str, Qfalse);
+    if (NIL_P(str)) {
+	rb_backref_set(Qnil);
+	return Qfalse;
+    }
+    const long start = rb_reg_search(rcv, str, 0, false);
+    if (start < 0) {
+	return Qfalse;
+    }
+    return Qtrue;
+}
+
+/*
+ *  call-seq:
+ *      rxp.source   => str
+ *
+ *  Returns the original string of the pattern.
+ *
+ *      /ab+c/ix.source #=> "ab+c"
+ *
+ *  Note that escape sequences are retained as is.
+ *
+ *     /\x20\+/.source  #=> "\\x20\\+"
+ *
+ */
+
+static VALUE
+regexp_source(VALUE rcv, SEL sel)
+{
+    assert(RREGEXP(rcv)->unistr != NULL);
+
+    const UChar *chars = RREGEXP(rcv)->unistr->getBuffer();
+    const int32_t chars_len = RREGEXP(rcv)->unistr->length();
+    assert(chars_len >= 0);
+
+    VALUE str = rb_unicode_str_new(chars, chars_len);
+
+    if (OBJ_TAINTED(rcv)) {
+	OBJ_TAINT(str);
+    }
+    return str;
+}
+
+/*
+ * call-seq:
+ *    rxp.inspect   => string
+ *
+ * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly,
+ * <code>#inspect</code> actually produces the more natural version of
+ * the string than <code>#to_s</code>.
+ *
+ *      /ab+c/ix.inspect        #=> "/ab+c/ix"
+ *
+ */
+
+static VALUE
+regexp_inspect(VALUE rcv, SEL sel)
+{
+    VALUE str = rb_str_new2("/");
+    rb_str_concat(str, regexp_source(rcv, 0));
+    rb_str_cat2(str, "/");
+
+    const uint32_t options = rb_reg_options(rcv);
+    const bool mode_m = options & REGEXP_OPT_MULTILINE;
+    const bool mode_i = options & REGEXP_OPT_IGNORECASE;
+    const bool mode_x = options & REGEXP_OPT_EXTENDED;
+
+    if (mode_m) {
+	rb_str_cat2(str, "m");
+    }
+    if (mode_i) {
+	rb_str_cat2(str, "i");
+    }
+    if (mode_x) {
+	rb_str_cat2(str, "x");
+    }
+    
+    return str;
+}
+
+/*
+ *  call-seq:
+ *     rxp.to_s   => str
+ *
+ *  Returns a string containing the regular expression and its options (using the
+ *  <code>(?opts:source)</code> notation. This string can be fed back in to
+ *  <code>Regexp::new</code> to a regular expression with the same semantics as
+ *  the original. (However, <code>Regexp#==</code> may not return true when
+ *  comparing the two, as the source of the regular expression itself may
+ *  differ, as the example shows).  <code>Regexp#inspect</code> produces a
+ *  generally more readable version of <i>rxp</i>.
+ *
+ *      r1 = /ab+c/ix           #=> /ab+c/ix
+ *      s1 = r1.to_s            #=> "(?ix-m:ab+c)"
+ *      r2 = Regexp.new(s1)     #=> /(?ix-m:ab+c)/
+ *      r1 == r2                #=> false
+ *      r1.source               #=> "ab+c"
+ *      r2.source               #=> "(?ix-m:ab+c)"
+ */
+
+static VALUE
+regexp_to_s(VALUE rcv, SEL sel)
+{
+    VALUE str = rb_str_new2("(?");
+
+    const uint32_t options = rb_reg_options(rcv);
+    const bool mode_m = options & REGEXP_OPT_MULTILINE;
+    const bool mode_i = options & REGEXP_OPT_IGNORECASE;
+    const bool mode_x = options & REGEXP_OPT_EXTENDED;
+
+    if (mode_m) {
+	rb_str_cat2(str, "m");
+    }
+    if (mode_i) {
+	rb_str_cat2(str, "i");
+    }
+    if (mode_x) {
+	rb_str_cat2(str, "x");
+    }
+
+    if (!mode_m || !mode_i || !mode_x) {
+	rb_str_cat2(str, "-");
+	if (!mode_m) {
+	    rb_str_cat2(str, "m");
+	}
+	if (!mode_i) {
+	    rb_str_cat2(str, "i");
+	}
+	if (!mode_x) {
+	    rb_str_cat2(str, "x");
+	}
+    }
+
+    rb_str_cat2(str, ":");
+    rb_str_concat(str, regexp_source(rcv, 0));
+    rb_str_cat2(str, ")");
+
+    return str;
+}
+
+/*
+ *  call-seq:
+ *     rxp.casefold?   => true or false
+ *
+ *  Returns the value of the case-insensitive flag.
+ *
+ *      /a/.casefold?           #=> false
+ *      /a/i.casefold?          #=> true
+ *      /(?i:a)/.casefold?      #=> false
+ */
+
+int
+rb_reg_options(VALUE re)
+{
+    assert(RREGEXP(re)->pattern != NULL);
+    return RREGEXP(re)->pattern->flags();
+}
+
+static VALUE
+regexp_casefold(VALUE rcv, SEL sel)
+{
+    return rb_reg_options(rcv) & REGEXP_OPT_IGNORECASE ? Qtrue : Qfalse;
+}
+
+/*
+ *  call-seq:
+ *     rxp.options   => fixnum
+ *
+ *  Returns the set of bits corresponding to the options used when creating this
+ *  Regexp (see <code>Regexp::new</code> for details. Note that additional bits
+ *  may be set in the returned options: these are used internally by the regular
+ *  expression code. These extra bits are ignored if the options are passed to
+ *  <code>Regexp::new</code>.
+ *
+ *     Regexp::IGNORECASE                  #=> 1
+ *     Regexp::EXTENDED                    #=> 2
+ *     Regexp::MULTILINE                   #=> 4
+ *
+ *     /cat/.options                       #=> 0
+ *     /cat/ix.options                     #=> 3
+ *     Regexp.new('cat', true).options     #=> 1
+ *     /\xa1\xa2/e.options                 #=> 16
+ *
+ *     r = /cat/ix
+ *     Regexp.new(r.source, r.options)     #=> /cat/ix
+ */
+
+static VALUE
+regexp_options(VALUE rcv, SEL sel)
+{
+    return INT2FIX(rb_reg_options(rcv));
+}
+
+static VALUE
+match_getter(void)
+{
+    VALUE match = rb_backref_get();
+    if (NIL_P(match)) {
+	return Qnil;
+    }
+    rb_match_busy(match);
+    return match;
+}
+
+static void
+match_setter(VALUE val)
+{
+    if (!NIL_P(val)) {
+	Check_Type(val, T_MATCH);
+    }
+    rb_backref_set(val);
+}
+
+static VALUE
+last_match_getter(void)
+{
+    return rb_reg_last_match(rb_backref_get());
+}
+
+static VALUE
+prematch_getter(void)
+{
+    return rb_reg_match_pre(rb_backref_get());
+}
+
+static VALUE
+postmatch_getter(void)
+{
+    return rb_reg_match_post(rb_backref_get());
+}
+
+static VALUE
+last_paren_match_getter(void)
+{
+    return rb_reg_match_last(rb_backref_get());
+}
+
+static VALUE
+kcode_getter(void)
+{
+    rb_warn("variable $KCODE is no longer effective");
+    return Qnil;
+}
+
+static void
+kcode_setter(VALUE val, ID id)
+{
+    rb_warn("variable $KCODE is no longer effective; ignored");
+}
+
+static VALUE
+ignorecase_getter(void)
+{
+    rb_warn("variable $= is no longer effective");
+    return Qfalse;
+}
+
+static void
+ignorecase_setter(VALUE val, ID id)
+{
+    rb_warn("variable $= is no longer effective; ignored");
+}
+
+/*
+ *  Document-class: Regexp
+ *
+ *  A <code>Regexp</code> holds a regular expression, used to match a pattern
+ *  against strings. Regexps are created using the <code>/.../</code> and
+ *  <code>%r{...}</code> literals, and by the <code>Regexp::new</code>
+ *  constructor.
+ *
+ */
+
+static void Init_Match(void);
+
+void
+Init_Regexp(void)
+{
+    rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError);
+
+#define DEFINE_GVAR(name, getter, setter) \
+    rb_define_virtual_variable(name, (VALUE (*)(...))getter, \
+	    (void (*)(...))setter)
+
+    DEFINE_GVAR("$~", match_getter, match_setter);
+    DEFINE_GVAR("$&", last_match_getter, 0);
+    DEFINE_GVAR("$`", prematch_getter, 0);
+    DEFINE_GVAR("$'", postmatch_getter, 0);
+    DEFINE_GVAR("$+", last_paren_match_getter, 0);
+    DEFINE_GVAR("$=", ignorecase_getter, ignorecase_setter);
+    DEFINE_GVAR("$KCODE", kcode_getter, kcode_setter);
+    DEFINE_GVAR("$-K", kcode_getter, kcode_setter);
+
+#undef DEFINE_GVAR
+
+    rb_cRegexp = rb_define_class("Regexp", rb_cObject);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "alloc",
+	    (void *)regexp_alloc, 0);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "compile",
+	    (void *)rb_class_new_instance_imp, -1);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "quote",
+	    (void *)regexp_quote, 1);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "escape",
+	    (void *)regexp_quote, 1);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "union",
+	    (void *)regexp_union, -1);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "last_match",
+	    (void *)regexp_last_match, -1);
+    rb_objc_define_method(*(VALUE *)rb_cRegexp, "try_convert",
+	    (void *)regexp_try_convert, 1);
+
+    rb_objc_define_method(rb_cRegexp, "initialize",
+	    (void *)regexp_initialize, -1);
+    rb_objc_define_method(rb_cRegexp, "initialize_copy",
+	    (void *)regexp_initialize_copy, 1);
+    //rb_objc_define_method(rb_cRegexp, "hash", rb_reg_hash, 0);
+    rb_objc_define_method(rb_cRegexp, "eql?", (void *)regexp_equal, 1);
+    rb_objc_define_method(rb_cRegexp, "==", (void *)regexp_equal, 1);
+    rb_objc_define_method(rb_cRegexp, "=~", (void *)regexp_match, 1);
+    rb_objc_define_method(rb_cRegexp, "match", (void *)regexp_match2, -1);
+    rb_objc_define_method(rb_cRegexp, "~", (void *)regexp_match3, 0);
+    rb_objc_define_method(rb_cRegexp, "===", (void *)regexp_eqq, 1);
+    rb_objc_define_method(rb_cRegexp, "source", (void *)regexp_source, 0);
+    rb_objc_define_method(rb_cRegexp, "casefold?", (void *)regexp_casefold, 0);
+    rb_objc_define_method(rb_cRegexp, "options", (void *)regexp_options, 0);
+#if 0
+    rb_objc_define_method(rb_cRegexp, "encoding", rb_reg_encoding, 0);
+    rb_objc_define_method(rb_cRegexp, "fixed_encoding?",
+	    rb_reg_fixed_encoding_p, 0);
+    rb_objc_define_method(rb_cRegexp, "names", rb_reg_names, 0);
+    rb_objc_define_method(rb_cRegexp, "named_captures",
+	    rb_reg_named_captures, 0);
+#endif
+    rb_objc_define_method(rb_cRegexp, "to_s", (void *)regexp_to_s, 0);
+    rb_objc_define_method(rb_cRegexp, "inspect", (void *)regexp_inspect, 0);
+
+    regexp_finalize_imp_super = rb_objc_install_method2((Class)rb_cRegexp,
+	    "finalize", (IMP)regexp_finalize_imp);
+
+    rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(REGEXP_OPT_IGNORECASE));
+    rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(REGEXP_OPT_EXTENDED));
+    rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(REGEXP_OPT_MULTILINE));
+
+    Init_Match();
+}
+
+static VALUE
+match_initialize_copy(VALUE rcv, SEL sel, VALUE other)
+{
+    if (TYPE(other) != T_MATCH) {
+	rb_raise(rb_eTypeError, "wrong argument type");
+    }
+
+    GC_WB(&RMATCH(rcv)->str, RMATCH(other)->str);
+    GC_WB(&RMATCH(rcv)->regexp, RMATCH(other)->regexp);
+
+    const long len = sizeof(rb_match_result_t) * RMATCH(other)->results_count;
+    rb_match_result_t *res = (rb_match_result_t *)xmalloc(len);
+    memcpy(res, RMATCH(other)->results, len);
+    GC_WB(&RMATCH(rcv)->results, res);
+
+    return rcv;
+}
+
+/*
+ * call-seq:
+ *    mtch.regexp   => regexp
+ *
+ * Returns the regexp.
+ *
+ *     m = /a.*b/.match("abc")
+ *     m.regexp #=> /a.*b/
+ */
+
+static VALUE
+match_regexp(VALUE rcv, SEL sel)
+{
+    assert(RMATCH(rcv)->regexp != NULL);
+    return (VALUE)RMATCH(rcv)->regexp;
+}
+
+/*
+ * call-seq:
+ *    mtch.names   => [name1, name2, ...]
+ *
+ * Returns a list of names of captures as an array of strings.
+ * It is same as mtch.regexp.names.
+ *
+ *     /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
+ *     #=> ["foo", "bar", "baz"]
+ *
+ *     m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
+ *     m.names                          #=> ["x", "y"]
+ */
+
+static VALUE
+match_names(VALUE rcv, SEL sel)
+{
+    // TODO
+    return rb_ary_new();
+}
+
+/*
+ *  call-seq:
+ *     mtch.length   => integer
+ *     mtch.size     => integer
+ *
+ *  Returns the number of elements in the match array.
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.length   #=> 5
+ *     m.size     #=> 5
+ */
+
+static VALUE
+match_size(VALUE rcv, SEL sel)
+{
+    return INT2FIX(RMATCH(rcv)->results_count);
+}
+
+/*
+ *  call-seq:
+ *     mtch.offset(n)   => array
+ *
+ *  Returns a two-element array containing the beginning and ending offsets of
+ *  the <em>n</em>th match.
+ *  <em>n</em> can be a string or symbol to reference a named capture.
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.offset(0)      #=> [1, 7]
+ *     m.offset(4)      #=> [6, 7]
+ *
+ *     m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
+ *     p m.offset(:foo) #=> [0, 1]
+ *     p m.offset(:bar) #=> [2, 3]
+ *
+ */
+
+static int
+match_backref_number(VALUE match, VALUE backref, bool check)
+{
+    const char *name;
+
+    switch (TYPE(backref)) {
+	default:
+	    {
+		const int pos = NUM2INT(backref);
+		if (check) {
+		    if (pos < 0 || pos >= RMATCH(match)->results_count) {
+			rb_raise(rb_eIndexError,
+				"index %d out of matches", pos);
+		    }
+		}
+		return pos;
+	    }
+
+	case T_SYMBOL:
+	    name = rb_sym2name(backref);
+	    break;
+
+	case T_STRING:
+	    name = StringValueCStr(backref);
+	    break;
+    }
+
+    // TODO
+    rb_raise(rb_eIndexError, "named captures are not yet supported");
+}
+ 
+static VALUE
+match_offset(VALUE rcv, SEL sel, VALUE backref)
+{
+    const int pos = match_backref_number(rcv, backref, true);
+    return rb_assoc_new(INT2FIX(RMATCH(rcv)->results[pos].beg),
+	    INT2FIX(RMATCH(rcv)->results[pos].end));
+}
+
+/*
+ *  call-seq:
+ *     mtch.begin(n)   => integer
+ *
+ *  Returns the offset of the start of the <em>n</em>th element of the match
+ *  array in the string.
+ *  <em>n</em> can be a string or symbol to reference a named capture.
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.begin(0)       #=> 1
+ *     m.begin(2)       #=> 2
+ *
+ *     m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
+ *     p m.begin(:foo)  #=> 0
+ *     p m.begin(:bar)  #=> 2
+ */
+
+static VALUE
+match_begin(VALUE rcv, SEL sel, VALUE backref)
+{
+    const int pos = match_backref_number(rcv, backref, true);
+    return INT2FIX(RMATCH(rcv)->results[pos].beg);
+}
+
+/*
+ *  call-seq:
+ *     mtch.end(n)   => integer
+ *
+ *  Returns the offset of the character immediately following the end of the
+ *  <em>n</em>th element of the match array in the string.
+ *  <em>n</em> can be a string or symbol to reference a named capture.
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.end(0)         #=> 7
+ *     m.end(2)         #=> 3
+ *
+ *     m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
+ *     p m.end(:foo)    #=> 1
+ *     p m.end(:bar)    #=> 3
+ */
+
+static VALUE
+match_end(VALUE rcv, SEL sel, VALUE backref)
+{
+    const int pos = match_backref_number(rcv, backref, true);
+    return INT2FIX(RMATCH(rcv)->results[pos].end);
+}
+
+/*
+ *  call-seq:
+ *     mtch.to_a   => anArray
+ *
+ *  Returns the array of matches.
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.to_a   #=> ["HX1138", "H", "X", "113", "8"]
+ *
+ *  Because <code>to_a</code> is called when expanding
+ *  <code>*</code><em>variable</em>, there's a useful assignment
+ *  shortcut for extracting matched fields. This is slightly slower than
+ *  accessing the fields directly (as an intermediate array is
+ *  generated).
+ *
+ *     all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
+ *     all   #=> "HX1138"
+ *     f1    #=> "H"
+ *     f2    #=> "X"
+ *     f3    #=> "113"
+ */
+
+static VALUE
+match_array(VALUE match, int start)
+{
+    const int len = RMATCH(match)->results_count;
+    assert(start >= 0 && start < len);
+    const bool tainted = OBJ_TAINTED(match);
+
+    VALUE ary = rb_ary_new2(len);
+    for (int i = start; i < len; i++) {
+	VALUE str = rb_reg_nth_match(i, match);
+	if (tainted) {
+	    OBJ_TAINT(str);
+	}
+	rb_ary_push(ary, str);
+    }
+    return ary;
+}
+
+static VALUE
+match_to_a(VALUE rcv, SEL sel)
+{
+    return match_array(rcv, 0);
+}
+
+/*
+ *  call-seq:
+ *     mtch.captures   => array
+ *
+ *  Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>.
+ *
+ *     f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
+ *     f1    #=> "H"
+ *     f2    #=> "X"
+ *     f3    #=> "113"
+ *     f4    #=> "8"
+ */
+
+static VALUE
+match_captures(VALUE rcv, SEL sel)
+{
+    return match_array(rcv, 1);
+}
+
+/*
+ *  call-seq:
+ *     mtch[i]               => str or nil
+ *     mtch[start, length]   => array
+ *     mtch[range]           => array
+ *     mtch[name]            => str or nil
+ *
+ *  Match Reference---<code>MatchData</code> acts as an array, and may be
+ *  accessed using the normal array indexing techniques.  <i>mtch</i>[0] is
+ *  equivalent to the special variable <code>$&</code>, and returns the entire
+ *  matched string.  <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
+ *  of the matched backreferences (portions of the pattern between parentheses).
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m          #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
+ *     m[0]       #=> "HX1138"
+ *     m[1, 2]    #=> ["H", "X"]
+ *     m[1..3]    #=> ["H", "X", "113"]
+ *     m[-3, 2]   #=> ["X", "113"]
+ *
+ *     m = /(?<foo>a+)b/.match("ccaaab")
+ *     m          #=> #<MatchData "aaab" foo:"aaa">
+ *     m["foo"]   #=> "aaa"
+ *     m[:foo]    #=> "aaa"
+ */
+
+static VALUE
+match_aref(VALUE rcv, SEL sel, int argc, VALUE *argv)
+{
+    VALUE backref, rest;
+
+    rb_scan_args(argc, argv, "11", &backref, &rest);
+
+    if (NIL_P(rest)) {
+	switch (TYPE(backref)) {
+	    case T_STRING:
+	    case T_SYMBOL:
+	    case T_FIXNUM:
+		const int pos = match_backref_number(rcv, backref, false);
+		return rb_reg_nth_match(pos, rcv);
+	}
+    }
+    return rb_ary_aref(match_to_a(rcv, 0), 0, argc, argv);
+}
+
+/*
+ *  call-seq:
+ *
+ *     mtch.values_at([index]*)   => array
+ *
+ *  Uses each <i>index</i> to access the matching values, returning an array of
+ *  the corresponding matches.
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
+ *     m.to_a               #=> ["HX1138", "H", "X", "113", "8"]
+ *     m.values_at(0, 2, -2)   #=> ["HX1138", "X", "113"]
+ */
+
+static VALUE
+match_entry(VALUE match, long n)
+{
+    return rb_reg_nth_match(n, match);
+}
+
+static VALUE
+match_values_at(VALUE rcv, SEL sel, int argc, VALUE *argv)
+{
+    return rb_get_values_at(rcv, RMATCH(rcv)->results_count, argc, argv,
+	    match_entry);
+}
+
+/*
+ *  call-seq:
+ *     mtch.pre_match   => str
+ *
+ *  Returns the portion of the original string before the current match.
+ *  Equivalent to the special variable <code>$`</code>.
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.pre_match   #=> "T"
+ */
+
+static VALUE
+match_pre(VALUE rcv, SEL sel)
+{
+    assert(RMATCH(rcv)->results_count > 0);
+
+    VALUE str = rb_str_substr(RMATCH(rcv)->str, 0,
+	    RMATCH(rcv)->results[0].beg);
+
+    if (OBJ_TAINTED(rcv)) {
+	OBJ_TAINT(str);
+    }
+    return str;
+}
+
+VALUE
+rb_reg_match_pre(VALUE rcv)
+{
+    if (NIL_P(rcv)) {
+	return Qnil;
+    }
+    return match_pre(rcv, 0);
+}
+
+/*
+ *  call-seq:
+ *     mtch.post_match   => str
+ *
+ *  Returns the portion of the original string after the current match.
+ *  Equivalent to the special variable <code>$'</code>.
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
+ *     m.post_match   #=> ": The Movie"
+ */
+
+static VALUE
+match_post(VALUE rcv, SEL sel)
+{
+    assert(RMATCH(rcv)->results_count > 0);
+
+    const int pos = RMATCH(rcv)->results[0].end;
+    VALUE str = rb_str_substr(RMATCH(rcv)->str, pos,
+	    rb_str_chars_len(RMATCH(rcv)->str) - pos);
+
+    if (OBJ_TAINTED(rcv)) {
+	OBJ_TAINT(str);
+    }
+    return str;
+}
+
+VALUE
+rb_reg_match_post(VALUE rcv)
+{
+    if (NIL_P(rcv)) {
+	return Qnil;
+    }
+    return match_post(rcv, 0);
+}
+
+VALUE
+rb_reg_match_last(VALUE rcv)
+{
+    if (NIL_P(rcv)) {
+	return Qnil;
+    }
+    assert(RMATCH(rcv)->results_count > 0);
+    return rb_reg_nth_match(RMATCH(rcv)->results_count - 1, rcv);
+}
+
+/*
+ * call-seq:
+ *    mtch.inspect   => str
+ *
+ * Returns a printable version of <i>mtch</i>.
+ *
+ *     puts /.$/.match("foo").inspect
+ *     #=> #<MatchData "o">
+ *
+ *     puts /(.)(.)(.)/.match("foo").inspect
+ *     #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">
+ *
+ *     puts /(.)(.)?(.)/.match("fo").inspect
+ *     #=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
+ *
+ *     puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
+ *     #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
+ *
+ */
+
+rb_match_result_t *
+rb_reg_match_results(VALUE match, int *count)
+{
+    assert(match != Qnil);
+    if (count != NULL) {
+	*count = RMATCH(match)->results_count;
+    }
+    return RMATCH(match)->results;
+}
+
+VALUE
+rb_reg_nth_match(int nth, VALUE match)
+{
+    if (NIL_P(match)) {
+	return Qnil;
+    }
+    if (nth >= RMATCH(match)->results_count) {
+	return Qnil;
+    }
+    if (nth < 0) {
+	nth += RMATCH(match)->results_count;
+	if (nth <= 0) {
+	    return Qnil;
+	}
+    }
+
+    const int beg = RMATCH(match)->results[nth].beg;
+    const int end = RMATCH(match)->results[nth].end;
+    if (beg == -1 || end == -1) {
+	return Qnil;
+    }
+
+    return rb_str_substr(RMATCH(match)->str, beg, end - beg);
+}
+
+VALUE
+rb_reg_last_match(VALUE match)
+{
+    return rb_reg_nth_match(0, match);
+}
+
+static VALUE
+match_inspect(VALUE rcv, SEL sel)
+{
+    VALUE str = rb_str_buf_new2("#<");
+    rb_str_buf_cat2(str, rb_obj_classname(rcv));
+    for (int i = 0; i < RMATCH(rcv)->results_count; i++) {
+	rb_str_buf_cat2(str, " ");
+	if (i > 0) {
+	    char buf[10];
+	    snprintf(buf, sizeof buf, "%d:", i);
+	    rb_str_buf_cat2(str, buf);
+	}
+	VALUE v = rb_reg_nth_match(i, rcv);
+	if (v == Qnil) {
+	    rb_str_buf_cat2(str, "nil");
+	}
+	else {
+	    rb_str_buf_append(str, rb_str_inspect(v));
+	}
+    }
+    rb_str_buf_cat2(str, ">");
+    return str;
+}
+
+/*
+ *  call-seq:
+ *     mtch.string   => str
+ *
+ *  Returns a frozen copy of the string passed in to <code>match</code>.
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.string   #=> "THX1138."
+ */
+
+static VALUE
+match_string(VALUE rcv, SEL sel)
+{
+    assert(RMATCH(rcv)->str != 0);
+    VALUE str = rb_str_dup(RMATCH(rcv)->str);
+    OBJ_FREEZE(str);
+    return str;
+}
+
+/*
+ *  call-seq:
+ *     mtch.to_s   => str
+ *
+ *  Returns the entire matched string.
+ *
+ *     m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ *     m.to_s   #=> "HX1138"
+ */
+
+static VALUE
+match_to_s(VALUE rcv, SEL sel)
+{
+    VALUE str = rb_reg_last_match(rcv);
+
+    if (NIL_P(str)) {
+	str = rb_str_new(0, 0);
+    }
+    if (OBJ_TAINTED(rcv)) {
+	OBJ_TAINT(str);
+    }
+    return str;
+}
+
+/*
+ *  Document-class: MatchData
+ *
+ *  <code>MatchData</code> is the type of the special variable <code>$~</code>,
+ *  and is the type of the object returned by <code>Regexp#match</code> and
+ *  <code>Regexp.last_match</code>. It encapsulates all the results of a pattern
+ *  match, results normally accessed through the special variables
+ *  <code>$&</code>, <code>$'</code>, <code>$`</code>, <code>$1</code>,
+ *  <code>$2</code>, and so on.
+ *
+ */
+
+static void
+Init_Match(void)
+{
+    rb_cMatch = rb_define_class("MatchData", rb_cObject);
+    rb_undef_method(CLASS_OF(rb_cMatch), "new");
+
+    rb_objc_define_method(*(VALUE *)rb_cMatch, "alloc", (void *)match_alloc, 0);
+    rb_objc_define_method(rb_cMatch, "initialize_copy",
+	    (void *)match_initialize_copy, 1);
+    rb_objc_define_method(rb_cMatch, "regexp", (void *)match_regexp, 0);
+    rb_objc_define_method(rb_cMatch, "names", (void *)match_names, 0);
+    rb_objc_define_method(rb_cMatch, "size", (void *)match_size, 0);
+    rb_objc_define_method(rb_cMatch, "length", (void *)match_size, 0);
+    rb_objc_define_method(rb_cMatch, "offset", (void *)match_offset, 1);
+    rb_objc_define_method(rb_cMatch, "begin", (void *)match_begin, 1);
+    rb_objc_define_method(rb_cMatch, "end", (void *)match_end, 1);
+    rb_objc_define_method(rb_cMatch, "to_a", (void *)match_to_a, 0);
+    rb_objc_define_method(rb_cMatch, "captures", (void *)match_captures, 0);
+    rb_objc_define_method(rb_cMatch, "[]", (void *)match_aref, -1);
+    rb_objc_define_method(rb_cMatch, "values_at", (void *)match_values_at, -1);
+    rb_objc_define_method(rb_cMatch, "pre_match", (void *)match_pre, 0);
+    rb_objc_define_method(rb_cMatch, "post_match", (void *)match_post, 0);
+    rb_objc_define_method(rb_cMatch, "to_s", (void *)match_to_s, 0);
+    rb_objc_define_method(rb_cMatch, "string", (void *)match_string, 0);
+    rb_objc_define_method(rb_cMatch, "inspect", (void *)match_inspect, 0);
+}
+
+// Compiler primitives.
+
+void
+regexp_get_uchars(VALUE re, const UChar **chars_p, long *chars_len_p)
+{
+    assert(chars_p != NULL && chars_len_p != NULL);
+
+    UnicodeString *unistr = RREGEXP(re)->unistr;
+    assert(unistr != NULL);
+
+    *chars_p = unistr->getBuffer();
+    *chars_len_p = unistr->length();
+}
+
+VALUE
+rb_unicode_regex_new_retained(UChar *chars, int chars_len, int options)
+{
+    VALUE str = rb_unicode_str_new(chars, chars_len);
+    VALUE re = rb_reg_new_str(str, options);
+    GC_RETAIN(re);
+    return re;
+}
+
+// MRI compatibility.
+
+VALUE
+rb_reg_check_preprocess(VALUE str)
+{
+    return Qnil;
+}
+
+VALUE
+rb_reg_compile(VALUE str, int options)
+{
+    VALUE exc = Qnil;
+    VALUE regexp = rb_str_compile_regexp(str, options, &exc); 
+    if (regexp == Qnil) {
+	rb_set_errinfo(exc);
+    }
+    return regexp;
+}
+
+VALUE
+rb_reg_new_str(VALUE str, int options)
+{
+    VALUE exc = Qnil;
+    VALUE regexp = rb_str_compile_regexp(str, options, &exc); 
+    if (regexp == Qnil) {
+	rb_exc_raise(exc);
+    }
+    return regexp;
+}
+
+VALUE
+rb_reg_regcomp(VALUE str)
+{
+    // XXX MRI caches the regexp here, maybe we should do the same...
+    return rb_reg_new_str(str, 0);
+}
+
+VALUE
+rb_reg_new(const char *cstr, long len, int options)
+{
+    return rb_reg_new_str(rb_usascii_str_new(cstr, len), options);
+}
+
+VALUE
+rb_reg_quote(VALUE pat)
+{
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+    VALUE result;
+
+    rb_str_get_uchars(pat, &chars, &chars_len, &need_free);
+
+    long pos = 0;
+    for (; pos < chars_len; pos++) {
+	switch (chars[pos]) {
+	    case '[': case ']': case '{': case '}':
+	    case '(': case ')': case '|': case '-':
+	    case '*': case '.': case '\\': case '?':
+	    case '+': case '^': case '$': case ' ':
+	    case '#': case '\t': case '\f': case '\v':
+	    case '\n': case '\r':
+		goto meta_found;
+	} 
+    }
+
+    result = rb_unicode_str_new(chars, chars_len);
+    goto bail;
+
+meta_found:
+    result = rb_unicode_str_new(NULL, (chars_len * 2) + 1);
+
+    // Copy up to metacharacter.
+    rb_str_append_uchars(result, &chars[0], pos);
+
+    for (; pos < chars_len; pos++) {
+	UChar c = chars[pos];
+	switch (c) {
+	    case '[': case ']': case '{': case '}':
+	    case '(': case ')': case '|': case '-':
+	    case '*': case '.': case '\\': case '?':
+	    case '+': case '^': case '$': case '#':
+		rb_str_append_uchar(result, '\\');
+		break;
+
+	    case ' ':
+		rb_str_append_uchar(result, '\\');
+		rb_str_append_uchar(result, ' ');
+		continue;
+
+	    case '\t':
+		rb_str_append_uchar(result, '\\');
+		rb_str_append_uchar(result, 't');
+		continue;
+
+	    case '\n':
+		rb_str_append_uchar(result, '\\');
+		rb_str_append_uchar(result, 'n');
+		continue;
+
+	    case '\r':
+		rb_str_append_uchar(result, '\\');
+		rb_str_append_uchar(result, 'r');
+		continue;
+
+	    case '\f':
+		rb_str_append_uchar(result, '\\');
+		rb_str_append_uchar(result, 'f');
+		continue;
+
+	    case '\v':
+		rb_str_append_uchar(result, '\\');
+		rb_str_append_uchar(result, 'v');
+		continue;
+	}
+	rb_str_append_uchar(result, c);
+    }
+
+bail:
+    if (need_free) {
+	free(chars);
+    }
+    return result;
+}
+
+void
+rb_match_busy(VALUE match)
+{
+    FL_SET(match, MATCH_BUSY);
+}
+
+} // extern "C"

Copied: MacRuby/trunk/re.h (from rev 3744, MacRuby/branches/icu/re.h)
===================================================================
--- MacRuby/trunk/re.h	                        (rev 0)
+++ MacRuby/trunk/re.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,45 @@
+/* 
+ * MacRuby Regular Expressions.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ * 
+ * Copyright (C) 2010, Apple Inc. All rights reserved.
+ */
+
+#ifndef __RE_H_
+#define __RE_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+bool rb_char_to_icu_option(int c, int *option);
+
+VALUE regexp_eqq(VALUE rcv, SEL sel, VALUE str);
+VALUE regexp_match(VALUE rcv, SEL sel, VALUE str);
+VALUE regexp_match2(VALUE rcv, SEL sel, int argc, VALUE *argv);
+
+VALUE rb_reg_quote(VALUE pat);
+VALUE rb_reg_regcomp(VALUE str);
+int rb_reg_search(VALUE re, VALUE str, int pos, bool reverse);
+
+void regexp_get_uchars(VALUE re, const UChar **chars_p, long *chars_len_p);
+
+typedef struct rb_match_result {
+    unsigned int beg;
+    unsigned int end;
+} rb_match_result_t;
+
+rb_match_result_t *rb_reg_match_results(VALUE match, int *count);
+
+static inline int
+rb_reg_adjust_startpos(VALUE re, VALUE str, int pos, bool reverse)
+{
+    return reverse ? -pos : rb_str_chars_len(str) - pos;
+}
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+#endif // __RE_H_

Modified: MacRuby/trunk/ruby.c
===================================================================
--- MacRuby/trunk/ruby.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/ruby.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -11,30 +11,19 @@
 
 **********************************************************************/
 
-#include "ruby/ruby.h"
-#include "ruby/node.h"
-#include "ruby/encoding.h"
-#include "dln.h"
 #include <stdio.h>
 #include <sys/types.h>
 #include <ctype.h>
-#include "vm.h"
-
-#ifdef HAVE_UNISTD_H
 #include <unistd.h>
-#endif
-#if defined(HAVE_FCNTL_H)
 #include <fcntl.h>
-#elif defined(HAVE_SYS_FCNTL_H)
-#include <sys/fcntl.h>
-#endif
-#ifdef HAVE_SYS_PARAM_H
-# include <sys/param.h>
-#endif
-#ifndef MAXPATHLEN
-# define MAXPATHLEN 1024
-#endif
+#include <sys/param.h>
 
+#include "ruby/ruby.h"
+#include "ruby/node.h"
+#include "ruby/encoding.h"
+#include "dln.h"
+#include "vm.h"
+#include "encoding.h"
 #include "ruby/util.h"
 
 #ifndef HAVE_STDLIB_H
@@ -564,17 +553,19 @@
 
 	  case 'e':
 	    forbid_setid("-e");
-	    if (!*++s) {
+	    if (*++s == '\0') {
 		s = argv[1];
-		argc--, argv++;
+		argc--;
+		argv++;
 	    }
-	    if (!s) {
+	    if (s == NULL) {
 		rb_raise(rb_eRuntimeError, "no code specified for -e");
 	    }
-	    if (!opt->e_script) {
-		opt->e_script = rb_str_new(0, 0);
-		if (opt->script == 0)
+	    if (opt->e_script == 0) {
+		opt->e_script = rb_str_new(NULL, 0);
+		if (opt->script == NULL) {
 		    opt->script = "-e";
+		}
 	    }
 	    rb_str_cat2(opt->e_script, s);
 	    rb_str_cat2(opt->e_script, "\n");
@@ -582,12 +573,13 @@
 
 	  case 'r':
 	    forbid_setid("-r");
-	    if (*++s) {
+	    if (*++s != '\0') {
 		add_modules(s);
 	    }
 	    else if (argv[1]) {
 		add_modules(argv[1]);
-		argc--, argv++;
+		argc--;
+		argv++;
 	    }
 	    break;
 
@@ -832,7 +824,7 @@
 static rb_encoding *
 opt_enc_find(VALUE enc_name)
 {
-    rb_encoding *enc = rb_enc_find2(enc_name);
+    rb_encoding *enc = rb_enc_find(RSTRING_PTR(enc_name));
     if (enc == NULL) {
 	rb_raise(rb_eRuntimeError, "unknown encoding name - %s", 
 	    RSTRING_PTR(enc_name));
@@ -840,7 +832,7 @@
     return enc;
 }
 
-VALUE rb_progname;
+VALUE rb_progname = Qnil;
 VALUE rb_argv0;
 
 static rb_encoding *src_encoding;
@@ -1093,16 +1085,15 @@
 		if (NIL_P(line)) {
 		    return 0;
 		}
-		assert(*(VALUE *)line == rb_cByteString);
-
 		if ((p = strstr(RSTRING_PTR(line), "ruby")) == 0) {
 		    /* not ruby script, kick the program */
 		    char **argv;
 		    char *path;
 		    char *pend;
 
-		    p = (char *)rb_bytestring_byte_pointer(line);
-		    pend = p + rb_bytestring_length(line);
+		    line = rb_str_bstr(line);
+		    p = (char *)rb_bstr_bytes(line);
+		    pend = p + rb_bstr_length(line);
 
 		    if (pend[-1] == '\n') {
 			pend--;	/* chomp line */
@@ -1136,8 +1127,8 @@
 	      start_read:
 		p += 4;
 
-		char *linebuf = (char *)rb_bytestring_byte_pointer(line);
-		long linebuflen = rb_bytestring_length(line);
+		char *linebuf = (char *)rb_bstr_bytes(line);
+		const long linebuflen = rb_bstr_length(line);
 
 		linebuf[linebuflen - 1] = '\0';
 		if (linebuf[linebuflen - 2] == '\r') {
@@ -1285,14 +1276,18 @@
 	}
     }
 #endif
+    GC_RELEASE(rb_progname);
     rb_progname = rb_tainted_str_new(s, i);
+    GC_RETAIN(rb_progname);
 }
 
 void
 ruby_script(const char *name)
 {
-    if (name) {
+    if (name != NULL) {
+	GC_RELEASE(rb_progname);
 	rb_progname = rb_tainted_str_new2(name);
+	GC_RETAIN(rb_progname);
     }
 }
 
@@ -1359,7 +1354,6 @@
 
     rb_define_hooked_variable("$0", &rb_progname, 0, set_arg0);
     rb_define_hooked_variable("$PROGRAM_NAME", &rb_progname, 0, set_arg0);
-    GC_ROOT(&rb_progname);
 
     rb_define_global_const("ARGV", rb_argv);
     rb_global_variable(&rb_argv0);

Modified: MacRuby/trunk/spec/frozen/core/matchdata/element_reference_spec.rb
===================================================================
--- MacRuby/trunk/spec/frozen/core/matchdata/element_reference_spec.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/spec/frozen/core/matchdata/element_reference_spec.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -17,4 +17,5 @@
   end
 end
 
-language_version __FILE__, "element_reference"
+# MacRuby does not support named captures yet.
+#language_version __FILE__, "element_reference"

Modified: MacRuby/trunk/spec/frozen/core/matchdata/names_spec.rb
===================================================================
--- MacRuby/trunk/spec/frozen/core/matchdata/names_spec.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/spec/frozen/core/matchdata/names_spec.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,3 +1,4 @@
 require File.dirname(__FILE__) + '/../../spec_helper'
 
-language_version __FILE__, "names"
+# MacRuby does not support named captures yet.
+#language_version __FILE__, "names"

Modified: MacRuby/trunk/spec/frozen/core/regexp/named_captures_spec.rb
===================================================================
--- MacRuby/trunk/spec/frozen/core/regexp/named_captures_spec.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/spec/frozen/core/regexp/named_captures_spec.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1 +1,2 @@
-language_version __FILE__, "named_captures"
+# MacRuby does not support named captures yet.
+#language_version __FILE__, "named_captures"

Modified: MacRuby/trunk/spec/frozen/core/regexp/names_spec.rb
===================================================================
--- MacRuby/trunk/spec/frozen/core/regexp/names_spec.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/spec/frozen/core/regexp/names_spec.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1 +1,2 @@
-language_version __FILE__, "names"
+# MacRuby does not support named captures yet.
+#language_version __FILE__, "names"

Modified: MacRuby/trunk/spec/frozen/language/regexp/anchors_spec.rb
===================================================================
--- MacRuby/trunk/spec/frozen/language/regexp/anchors_spec.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/spec/frozen/language/regexp/anchors_spec.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -16,7 +16,9 @@
     (/($^)($^)/ =~ "foo\n\n").should == "foo\n".size and $~.to_a.should == ["", "", ""]
 
     # Different start of line chars
+=begin # MacRuby's regexps consider \r as a new line
     /^bar/.match("foo\rbar").should be_nil
+=end
     /^bar/.match("foo\0bar").should be_nil
 
     # Trivial
@@ -45,7 +47,9 @@
     (/[^o]$/ =~ "foo\n\n").should == ("foo\n".size - 1) and $~.to_a.should == ["\n"]
 
     # Different end of line chars
+=begin # MacRuby's regexps consider \r as a new line
     /foo$/.match("foo\r\nbar").should be_nil
+=endif
     /foo$/.match("foo\0bar").should be_nil
 
     # Trivial
@@ -91,7 +95,9 @@
 
     # Different end of line chars
     /foo\Z/.match("foo\0bar").should be_nil
+=begin # MacRuby's regexps consider \r as a new line
     /foo\Z/.match("foo\r\n").should be_nil
+=end
 
     # Grouping
     /(foo\Z)/.match("foo").to_a.should == ["foo", "foo"]

Modified: MacRuby/trunk/spec/frozen/language/versions/regexp_1.9.rb
===================================================================
--- MacRuby/trunk/spec/frozen/language/versions/regexp_1.9.rb	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/spec/frozen/language/versions/regexp_1.9.rb	2010-03-12 23:56:52 UTC (rev 3746)
@@ -9,9 +9,12 @@
     /foo.(?<!\d)/.match("foo1 fooA").to_a.should == ["fooA"]
   end
 
+=begin
+  # MacRuby does not support named captures
   it 'supports \g (named backreference)' do
     /(?<foo>foo.)bar\g<foo>/.match("foo1barfoo2").to_a.should == ["foo1barfoo2", "foo2"]
   end
+=end
 
   it 'supports character class composition' do
     /[a-z&&[^a-c]]+/.match("abcdef").to_a.should == ["def"]
@@ -25,4 +28,4 @@
     /fooA?+Abar/.match("fooAAAbar").should be_nil
     /fooA*+Abar/.match("fooAAAbar").should be_nil
   end
-end
\ No newline at end of file
+end

Modified: MacRuby/trunk/sprintf.c
===================================================================
--- MacRuby/trunk/sprintf.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/sprintf.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -12,10 +12,8 @@
 #include <stdarg.h>
 
 #include "ruby/ruby.h"
-#include "ruby/node.h"
 #include "ruby/encoding.h"
-#include "vm.h"
-#include "compiler.h"
+#include "encoding.h"
 
 /*
  *  call-seq:
@@ -292,13 +290,10 @@
 VALUE
 rb_enc_sprintf(rb_encoding *enc, const char *format, ...)
 {
-    VALUE result;
     va_list ap;
-
     va_start(ap, format);
-    result = rb_enc_vsprintf(enc, format, ap);
+    VALUE result = rb_enc_vsprintf(enc, format, ap);
     va_end(ap);
-
     return result;
 }
 
@@ -311,13 +306,10 @@
 VALUE
 rb_sprintf(const char *format, ...)
 {
-    VALUE result;
     va_list ap;
-
     va_start(ap, format);
-    result = rb_vsprintf(format, ap);
+    VALUE result = rb_vsprintf(format, ap);
     va_end(ap);
-
     return result;
 }
 
@@ -346,10 +338,9 @@
 #define isprenum(ch) ((ch) == '-' || (ch) == ' ' || (ch) == '+')
 
 static void
-pad_format_value(VALUE arg, long start, long width,
-	CFStringRef pad)
+pad_format_value(VALUE arg, long start, long width, VALUE pad)
 {
-    long slen = (long)CFStringGetLength((CFStringRef)arg);
+    const long slen = rb_str_chars_len(arg);
     if (width <= slen) {
 	return;
     }
@@ -358,43 +349,50 @@
     }
     width -= slen;
     do {
-	CFStringInsert((CFMutableStringRef)arg, start, pad);
-    } while (--width > 0);
+	rb_str_update(arg, start, 0, pad);
+    }
+    while (--width > 0);
 }
 
 static long
-cstr_update(char **str, unsigned long start, unsigned long num, char *replace)
+cstr_update(UChar **str, long *str_len, long start, long num, VALUE replace)
 {
-    unsigned long len = strlen(*str) + 1;
-    unsigned long replace_len = strlen(replace);
+    const long len = *str_len;
+    long replace_len = replace == 0 ? 0 : rb_str_chars_len(replace);
     if (start + num > len) {
 	num = len - start;
     }
     if (replace_len >= num) {
-	char *new_str = (char *)xmalloc(len + replace_len - num);
-	memcpy(new_str, *str, len);
-	*str = new_str;
+	*str_len = len + replace_len - num;
+	*str = (UChar *)xrealloc(*str,
+		sizeof(UChar) * (len + replace_len - num));
     }
     if (replace_len != num) {
-	bcopy(*str + start + num, *str + start + replace_len, len - start -
-		num);
+	bcopy(*str + start + num, *str + start + replace_len,
+		sizeof(UChar) * (len - start - num));
     }
     if (replace_len > 0) {
-	bcopy(replace, *str + start, replace_len);
+	UChar *replace_chars = NULL;
+	bool need_free = false;
+	rb_str_get_uchars(replace, &replace_chars, &replace_len, &need_free);
+	assert(replace_len > 0);
+	bcopy(replace_chars, *str + start, sizeof(UChar) * replace_len);
+	if (need_free) {
+	    free(replace_chars);
+	}
     }
     return replace_len - num;
 }
 
-VALUE
-get_named_arg(char *format_str, unsigned long format_len, unsigned long *i,
-	VALUE hash)
+static VALUE
+get_named_arg(UChar *format_str, long format_len, long *i, VALUE hash)
 {
     if (TYPE(hash) != T_HASH) {
 	rb_raise(rb_eArgError,
 		 "hash required for named references");
     }
-    char closing = format_str[(*i)++] + 2;
-    char *str_ptr = format_str + *i;
+    UChar closing = format_str[(*i)++] + 2;
+    UChar *str_ptr = &format_str[*i];
     while (*i < format_len && format_str[*i] != closing) {
 	(*i)++;
     }
@@ -402,35 +400,42 @@
 	rb_raise(rb_eArgError,
 		 "malformed name - unmatched parenthesis");
     }
-    format_str[*i] = '\0';
-    hash = rb_hash_aref(hash, rb_name2sym(str_ptr));
-    format_str[*i] = closing;
-    return (hash);
+    VALUE substr = rb_unicode_str_new(str_ptr, str_ptr - format_str);
+    hash = rb_hash_aref(hash, ID2SYM(rb_intern_str(substr)));
+    return hash;
 }
 
-// XXX
-// - this method uses strtol to read numbers from the format string, so
-//   extremely large numbers get silently truncated. this should be fixed
-// - switch to a cfstring format string to allow for proper encoding support
-    
 // XXX look for arguments that are altered but not duped
 VALUE
 rb_str_format(int argc, const VALUE *argv, VALUE fmt)
 {
     bool tainted = OBJ_TAINTED(fmt);
-    fmt = rb_str_new3(fmt);
-    char *format_str = (char *)RSTRING_PTR(fmt);
-    unsigned long format_len = strlen(format_str);
-    long num;
+
+    UChar *format_str = NULL;
+    long format_len = 0;
+    bool need_free = false;
+    rb_str_get_uchars(fmt, &format_str, &format_len, &need_free);
+    if (format_len == 0) {
+	goto bail;
+    }
+    UChar *tmp = (UChar *)xmalloc(format_len * sizeof(UChar));
+    memcpy(tmp, format_str, format_len * sizeof(UChar));
+    if (need_free) {
+	free(format_str);
+    }
+    format_str = tmp;
+
+    long num, pos;
     int j = 0;
     int ref_type = 0;
+    long format_str_capa = format_len;
 
-    for (unsigned long i = 0; i < format_len; i++) {
+    for (long i = 0; i < format_len; i++) {
 	if (format_str[i] != '%') {
 	    continue;
 	}
 	if (format_str[i + 1] == '%') {
-	    cstr_update(&format_str, i, 1, (char *)"");
+	    cstr_update(&format_str, &format_str_capa, i, 1, 0);
 	    continue;
 	}
 
@@ -445,11 +450,10 @@
 	long width = 0;
 	long precision = 0;
 	int base = 0;
-	CFStringRef negative_pad = NULL;
-	CFStringRef sharp_pad = CFSTR("");
-	char *str_ptr;
+	VALUE negative_pad = 0;
+	VALUE sharp_pad = rb_str_new2("");
+	const long start = i;
 
-	unsigned long start = i;
 	while (i++ < format_len) {
 	    switch (format_str[i]) {
 		case '#':
@@ -467,16 +471,17 @@
 			    i--;
 			    break;
 			}
-			num = strtol(format_str + i, &str_ptr, 10);
-			if (str_ptr == format_str + i--) {
+
+			num = rb_uchar_strtol(format_str, format_len, i, &pos);
+			if (pos == i--) {
 			    SET_REF_TYPE(REL_REF);
 			    width = NUM2LONG(rb_Integer(GETNTHARG(j)));
 			    j++;
 			}
-			else if (*str_ptr == '$') {
+			else if (format_str[pos] == '$') {
 			    SET_REF_TYPE(ABS_REF);
 			    width = NUM2LONG(rb_Integer(GETNTHARG(num - 1)));
-			    i = str_ptr - format_str;
+			    i = pos;
 			}
 		    }
 		    if (width < 0) {
@@ -516,9 +521,9 @@
 		case '7':
 		case '8':
 		case '9':
-		    num = strtol(format_str + i, &str_ptr, 10);
-		    i = str_ptr - format_str;
-		    if (*str_ptr == '$') {
+		    num = rb_uchar_strtol(format_str, format_len, i, &pos);
+		    i = pos;
+		    if (format_str[pos] == '$') {
 			if (num == 0) {
 			    rb_raise(rb_eArgError, "invalid absolute argument");
 			}
@@ -546,23 +551,26 @@
 				i--;
 				break;
 			    }
-			    num = strtol(format_str + i, &str_ptr, 10);
-			    if (str_ptr == format_str + i--) {
+
+			    num = rb_uchar_strtol(format_str, format_len,
+				    i, &pos);
+			    if (num == i--) {
 				SET_REF_TYPE(REL_REF);
 				precision = NUM2LONG(rb_Integer(GETNTHARG(j)));
 				j++;
 			    }
-			    else if (*str_ptr == '$') {
+			    else if (format_str[pos] == '$') {
 				SET_REF_TYPE(ABS_REF);
 				precision = NUM2LONG(rb_Integer(GETNTHARG(
 					num - 1)));
-				i = str_ptr - format_str;
+				i = pos;
 			    }
 			}
 		    }
 		    else if (isdigit(format_str[i])) {
-			precision = strtol(format_str + i, &str_ptr, 10);
-			i = str_ptr - format_str - 1;
+			precision = rb_uchar_strtol(format_str, format_len,
+				i, &pos);
+			i = pos - 1;
 		    }
 		    else {
 			rb_raise(rb_eArgError, "invalid precision");
@@ -592,24 +600,24 @@
 		case 'x':
 		case 'X':
 		    base = 16;
-		    negative_pad = CFSTR("f");
-		    sharp_pad = CFSTR("0x");
+		    negative_pad = rb_str_new2("f");
+		    sharp_pad = rb_str_new2("0x");
 		    complete = true;
 		    break;
 
 		case 'o':
 		case 'O':
 		    base = 8;
-		    negative_pad = CFSTR("7");
-		    sharp_pad = CFSTR("0");
+		    negative_pad = rb_str_new2("7");
+		    sharp_pad = rb_str_new2("0");
 		    complete = true;
 		    break;
 
 		case 'B':
 		case 'b':
 		    base = 2;
-		    negative_pad = CFSTR("1");
-		    sharp_pad = CFSTR("0b");
+		    negative_pad = rb_str_new2("1");
+		    sharp_pad = rb_str_new2("0b");
 		    complete = true;
 		    break;
 
@@ -647,42 +655,43 @@
 				value < 0 ? "-Inf" : "Inf"));
 			if (isnan(value) || value > 0) {
 			    if (plus_flag) {
-				rb_str_update(arg, 0, 0, (VALUE)CFSTR("+"));
+				rb_str_update(arg, 0, 0, rb_str_new2("+"));
 			    }
 			    else if (space_flag) {
-				rb_str_update(arg, 0, 0, (VALUE)CFSTR(" "));
+				rb_str_update(arg, 0, 0, rb_str_new2(" "));
 			    }
 			}
 			break;
 		    }
 
-		    arg = rb_str_new(format_str + i, 1);
+		    arg = rb_unicode_str_new(&format_str[i], 1);
 		    if (precision_flag) {
 			rb_str_update(arg, 0, 0, rb_big2str(LONG2NUM(precision),
 				10));
-			rb_str_update(arg, 0, 0, (VALUE)CFSTR("."));
+			rb_str_update(arg, 0, 0, rb_str_new2("."));
 		    }
 		    rb_str_update(arg, 0, 0, rb_big2str(LONG2NUM(width), 10));
 		    if (minus_flag) {
-			rb_str_update(arg, 0, 0, (VALUE)CFSTR("-"));
+			rb_str_update(arg, 0, 0, rb_str_new2("-"));
 		    }
 		    else if (zero_flag) {
-			rb_str_update(arg, 0, 0, (VALUE)CFSTR("0"));
+			rb_str_update(arg, 0, 0, rb_str_new2("0"));
 		    }
 		    if (plus_flag) {
-			rb_str_update(arg, 0, 0, (VALUE)CFSTR("+"));
+			rb_str_update(arg, 0, 0, rb_str_new2("+"));
 		    }
 		    else if (space_flag) {
-			rb_str_update(arg, 0, 0, (VALUE)CFSTR(" "));
+			rb_str_update(arg, 0, 0, rb_str_new2(" "));
 		    }
 		    if (sharp_flag) {
-			rb_str_update(arg, 0, 0, (VALUE)CFSTR("#"));
+			rb_str_update(arg, 0, 0, rb_str_new2("#"));
 		    }
-		    rb_str_update(arg, 0, 0, (VALUE)CFSTR("%"));
+		    rb_str_update(arg, 0, 0, rb_str_new2("%"));
 
-		    asprintf(&str_ptr, RSTRING_PTR(arg), value);
-		    arg = rb_str_new2(str_ptr);
-		    free(str_ptr);
+		    char *ptr;
+		    asprintf(&ptr, RSTRING_PTR(arg), value);
+		    arg = rb_str_new2(ptr);
+		    free(ptr);
 		    break;
 		}
 
@@ -691,11 +700,10 @@
 		case 'p':
 		case '@':
 		    GET_ARG();
-		    arg = (tolower(format_str[i]) != 's' ? rb_inspect(arg)
-			    : TYPE(arg) == T_STRING ? rb_str_new3(arg)
-			    : rb_obj_as_string(arg));
-		    if (precision_flag && precision
-			    < CFStringGetLength((CFStringRef)arg)) {
+		    arg = (tolower(format_str[i]) != 's'
+			    ? rb_inspect(arg) : TYPE(arg) == T_STRING
+				? rb_str_new3(arg) : rb_obj_as_string(arg));
+		    if (precision_flag && precision < rb_str_chars_len(arg)) {
 			CFStringPad((CFMutableStringRef)arg, NULL, precision,
 				0);
 		    }
@@ -715,7 +723,7 @@
 	    if (base != 0) {
 		bool sign_pad = false;
 		unsigned long num_index = 0;
-		CFStringRef zero_pad = CFSTR("0");
+		VALUE zero_pad = rb_str_new2("0");
 
 		VALUE num = rb_Integer(arg);
 		if (TYPE(num) == T_FIXNUM) {
@@ -726,7 +734,7 @@
 		}
 		if (IS_NEG(num)) {
 		    num_index = 1;
-		    if (!sign_pad && negative_pad != NULL) {
+		    if (!sign_pad && negative_pad != 0) {
 			zero_pad = negative_pad;
 			num = rb_big_clone(num);
 			rb_big_2comp(num);
@@ -734,9 +742,11 @@
 		}
 
 		arg = rb_big2str(num, base);
-		if (!sign_pad && IS_NEG(num) && negative_pad != NULL) {
-		    char neg = *RSTRING_PTR(negative_pad);
-		    str_ptr = (char *)RSTRING_PTR(arg) + 1;
+		if (!sign_pad && IS_NEG(num) && negative_pad != 0) {
+		    break; // TODO
+#if 0
+		    UChar neg = CFStringGetCharacterAtIndex(negative_pad, 0);
+		    char *str_ptr = (char *)RSTRING_PTR(arg) + 1;
 		    if (base == 8) {
 			*str_ptr |= ((~0 << 3) >> ((3 * strlen(str_ptr)) %
 				(sizeof(BDIGIT) * 8))) & ~(~0 << 3);
@@ -747,11 +757,13 @@
 		    rb_str_update(arg, 0, num_index, (VALUE)negative_pad);
 		    rb_str_update(arg, 0, 0, (VALUE)CFSTR(".."));
 		    num_index = 2;
+#endif
 		}
 
 		if (precision_flag) {
-		    pad_format_value(arg, num_index, precision + (IS_NEG(num) &&
-			    (sign_pad || negative_pad == NULL) ? 1 : 0),
+		    pad_format_value(arg, num_index,
+			    precision + (IS_NEG(num)
+				&& (sign_pad || negative_pad == 0) ? 1 : 0),
 			    zero_pad);
 		}
 		if (sharp_flag && rb_cmpint(num, Qfalse, Qfalse) != 0) {
@@ -760,7 +772,7 @@
 		}
 		if (sign_pad && RBIGNUM_POSITIVE_P(num)) {
 		    rb_str_update(arg, 0, 0, (VALUE)(plus_flag ?
-			    CFSTR("+") : CFSTR(" ")));
+			    rb_str_new2("+") : rb_str_new2(" ")));
 		    num_index++;
 		}
 		if (zero_flag) {
@@ -775,15 +787,19 @@
 		tainted = true;
 	    }
 
-	    pad_format_value(arg, minus_flag ? -1 : 0, width, CFSTR(" "));
-	    num = cstr_update(&format_str, start, i - start + 1,
-		    (char *)RSTRING_PTR(arg));
+	    pad_format_value(arg, minus_flag ? -1 : 0, width, rb_str_new2(" "));
+	    num = cstr_update(&format_str, &format_str_capa, start,
+		    i - start + 1, arg);
 	    format_len += num;
 	    i += num;
 	    break;
 	}
     }
 
-    fmt = rb_str_new2(format_str);
-    return tainted ? OBJ_TAINT(fmt) : fmt;
+bail:
+    fmt = rb_unicode_str_new(format_str, format_len);
+    if (tainted) {
+	OBJ_TAINT(fmt);
+    }
+    return fmt;
 }

Modified: MacRuby/trunk/string.c
===================================================================
--- MacRuby/trunk/string.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/string.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -1,5 +1,5 @@
 /* 
- * MacRuby implementation of Ruby 1.9's string.c.
+ * MacRuby Strings.
  *
  * This file is covered by the Ruby license. See COPYING for more details.
  * 
@@ -9,359 +9,1303 @@
  * Copyright (C) 2000 Information-technology Promotion Agency, Japan
  */
 
-#include "ruby/ruby.h"
-#include "ruby/re.h"
+#include <stdio.h>
+#include <stdarg.h>
+#include <wctype.h>
+
+#include "ruby.h"
 #include "ruby/encoding.h"
+#include "encoding.h"
+#include "re.h"
+#include "objc.h"
 #include "id.h"
-#include "objc.h"
 #include "ruby/node.h"
 #include "vm.h"
 
-#define BEG(no) regs->beg[no]
-#define END(no) regs->end[no]
+#include <unicode/unum.h>
+#include <unicode/utrans.h>
 
-#include <math.h>
-#include <ctype.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
 VALUE rb_cString;
-VALUE rb_cCFString;
 VALUE rb_cNSString;
 VALUE rb_cNSMutableString;
-VALUE rb_cSymbol;
-VALUE rb_cByteString;
+VALUE rb_cRubyString;
 
-typedef struct {
-    struct RBasic basic;
-    CFMutableDataRef data;
-} rb_bstr_t;
+VALUE rb_fs;
 
-VALUE
-rb_str_freeze(VALUE str)
+// rb_str_t primitives.
+
+static void
+str_update_flags_utf16(rb_str_t *self)
 {
-    rb_obj_freeze(str);
-    return str;
+    assert(str_is_stored_in_uchars(self)
+	    || NON_NATIVE_UTF16_ENC(self->encoding));
+
+    bool ascii_only = true;
+    bool has_supplementary = false;
+    bool valid_encoding = true;
+    // if the length is an odd number, it can't be valid UTF-16
+    if (ODD_NUMBER(self->length_in_bytes)) {
+	valid_encoding = false;
+    }
+
+    UChar *uchars = self->data.uchars;
+    long uchars_count = BYTES_TO_UCHARS(self->length_in_bytes);
+    bool native_byte_order = str_is_stored_in_uchars(self);
+    UChar32 lead = 0;
+    for (int i = 0; i < uchars_count; ++i) {
+	UChar32 c;
+	if (native_byte_order) {
+	    c = uchars[i];
+	}
+	else {
+	    uint8_t *bytes = (uint8_t *)&uchars[i];
+	    c = (uint16_t)bytes[0] << 8 | (uint16_t)bytes[1];
+	}
+	if (U16_IS_SURROGATE(c)) { // surrogate
+	    if (U16_IS_SURROGATE_LEAD(c)) { // lead surrogate
+		// a lead surrogate should not be
+		// after an other lead surrogate
+		if (lead != 0) {
+		    valid_encoding = false;
+		}
+		lead = c;
+	    }
+	    else { // trail surrogate
+		// a trail surrogate must follow a lead surrogate
+		if (lead == 0) {
+		    valid_encoding = false;
+		}
+		else {
+		    has_supplementary = true;
+		    c = U16_GET_SUPPLEMENTARY(lead, c);
+		    if (!U_IS_UNICODE_CHAR(c)) {
+			valid_encoding = false;
+		    }
+		}
+		lead = 0;
+	    }
+	}
+	else { // not a surrogate
+	    // a non-surrogate character should not be after a lead surrogate
+	    // and it should be a valid Unicode character
+	    // Warning: Ruby 1.9 does not do the IS_UNICODE_CHAR check
+	    // (for 1.9, 0xffff is valid though it's not a Unicode character)
+	    if ((lead != 0) || !U_IS_UNICODE_CHAR(c)) {
+		valid_encoding = false;
+	    }
+
+	    if (c > 127) {
+		ascii_only = false;
+	    }
+	}
+    }
+    // the last character should not be a lead surrogate
+    if (lead != 0) {
+	valid_encoding = false;
+    }
+
+    str_set_has_supplementary(self, has_supplementary);
+    if (valid_encoding) {
+	str_set_valid_encoding(self, true);
+	str_set_ascii_only(self, ascii_only);
+    }
+    else {
+	str_set_valid_encoding(self, false);
+	str_set_ascii_only(self, false);
+    }
 }
 
-#define is_ascii_string(str) (1)
-#define is_broken_string(str) (0)
-#define STR_ENC_GET(str) (NULL)
-#define str_mod_check(x,y,z)
+void
+str_update_flags(rb_str_t *self)
+{
+    if (self->length_in_bytes == 0) {
+	str_set_valid_encoding(self, true);
+	str_set_ascii_only(self, true);
+	str_set_has_supplementary(self, false);
+    }
+    else if (BINARY_ENC(self->encoding)) {
+	str_set_valid_encoding(self, true);
+	str_set_has_supplementary(self, false);
+	bool ascii_only = true;
+	for (long i = 0; i < self->length_in_bytes; ++i) {
+	    if ((uint8_t)self->data.bytes[i] > 127) {
+		ascii_only = false;
+		break;
+	    }
+	}
+	str_set_ascii_only(self, ascii_only);
+    }
+    else if (str_is_stored_in_uchars(self) || UTF16_ENC(self->encoding)) {
+	str_update_flags_utf16(self);
+    }
+    else {
+	self->encoding->methods.update_flags(self);
+    }
+}
 
-VALUE rb_fs;
+static void
+str_invert_byte_order(rb_str_t *self)
+{
+    assert(NON_NATIVE_UTF16_ENC(self->encoding));
 
-static inline void
-str_frozen_check(VALUE s)
+    long length_in_bytes = self->length_in_bytes;
+    char *bytes = self->data.bytes;
+
+    if (ODD_NUMBER(length_in_bytes)) {
+	--length_in_bytes;
+    }
+
+    for (long i = 0; i < length_in_bytes; i += 2) {
+	char tmp = bytes[i];
+	bytes[i] = bytes[i+1];
+	bytes[i+1] = tmp;
+    }
+    str_negate_stored_in_uchars(self);
+}
+
+static rb_encoding_t *
+str_compatible_encoding(rb_str_t *str1, rb_str_t *str2)
 {
-    if (OBJ_FROZEN(s)) {
-	rb_raise(rb_eRuntimeError, "string frozen");
+    if (str1->encoding == str2->encoding) {
+	return str1->encoding;
     }
+    if (str2->length_in_bytes == 0) {
+	return str1->encoding;
+    }
+    if (str1->length_in_bytes == 0) {
+	return str2->encoding;
+    }
+    if (!str1->encoding->ascii_compatible
+	    || !str2->encoding->ascii_compatible) {
+	return NULL;
+    }
+    if (str_is_ruby_ascii_only(str2)) {
+	return str1->encoding;
+    }
+    return NULL;
 }
 
-static inline void
-str_change_class(VALUE str, VALUE klass)
+static rb_encoding_t *
+str_must_have_compatible_encoding(rb_str_t *str1, rb_str_t *str2)
 {
-    if (klass != 0 
-	&& klass != rb_cNSString 
-	&& klass != rb_cNSMutableString
-	&& klass != rb_cSymbol
-	&& klass != rb_cByteString) {
-	*(VALUE *)str = (VALUE)klass;
+    rb_encoding_t *new_encoding = str_compatible_encoding(str1, str2);
+    if (new_encoding == NULL) {
+	rb_raise(rb_eEncCompatError,
+		"incompatible character encodings: %s and %s",
+		str1->encoding->public_name, str2->encoding->public_name);
     }
+    return new_encoding;
 }
 
-static inline VALUE
+static rb_str_t *
 str_alloc(VALUE klass)
 {
-    VALUE str = (VALUE)CFStringCreateMutable(NULL, 0);
-    str_change_class(str, klass);
-    CFMakeCollectable((CFTypeRef)str);
+    assert(rb_klass_is_rstr(klass));
+    assert(klass != 0);
 
-    return (VALUE)str;
+    NEWOBJ(str, rb_str_t);
+    str->basic.flags = 0;
+    str->basic.klass = klass;
+    str->encoding = rb_encodings[ENCODING_UTF8];
+    str->capacity_in_bytes = 0;
+    str->length_in_bytes = 0;
+    str->data.bytes = NULL;
+    str->flags = 0;
+    return str;
 }
 
-VALUE
-rb_str_new_empty(void)
+static VALUE
+str_new(void)
 {
-    return str_alloc(0);
+    return (VALUE)str_alloc(rb_cRubyString);
 }
 
-VALUE
-rb_str_new_fast(int argc, ...)
+static VALUE
+str_new_like(VALUE obj)
 {
-    VALUE str = str_alloc(0);
+    return (VALUE)str_alloc(rb_obj_class(obj));
+}
 
-    if (argc > 0) {
-	va_list ar;
-	va_start(ar, argc);
-	for (int i = 0; i < argc; ++i) {
-	    VALUE fragment = va_arg(ar, VALUE);
-	    switch (TYPE(fragment)) {
-		case T_FIXNUM:
-		    CFStringAppendFormat((CFMutableStringRef)str, NULL, CFSTR("%ld"),
-			    FIX2LONG(fragment));
-		    break;
+static void
+str_replace_with_bytes(rb_str_t *self, const char *bytes, long len,
+	rb_encoding_t *enc)
+{
+    assert(len >= 0);
+    assert(enc != NULL);
 
-		default:
-		    fragment = rb_obj_as_string(fragment);
-		    // fall through
+    self->flags = 0;
+    self->encoding = enc;
+    self->capacity_in_bytes = len;
+    if (len > 0) {
+	GC_WB(&self->data.bytes, xmalloc(len));
+	if (bytes != NULL) {
+	    memcpy(self->data.bytes, bytes, len);
+	    self->length_in_bytes = len;
+	}
+	else {
+	    self->length_in_bytes = 0;
+	}
+    }
+    else {
+	self->data.bytes = NULL;
+	self->length_in_bytes = 0;
+    }
+}
 
-		case T_STRING:
-		    CFStringAppend((CFMutableStringRef)str, (CFStringRef)fragment);
-		    break;
-	    }
+static void
+str_replace_with_string(rb_str_t *self, rb_str_t *source)
+{
+    if (self == source) {
+	return;
+    }
+    str_replace_with_bytes(self, source->data.bytes, source->length_in_bytes,
+	    source->encoding);
+    self->flags = source->flags;
+}
+
+static void
+str_append_uchar(rb_str_t *self, UChar c)
+{
+    assert(str_is_stored_in_uchars(self));
+    const long uchar_cap = BYTES_TO_UCHARS(self->capacity_in_bytes);
+    const long uchar_len = BYTES_TO_UCHARS(self->length_in_bytes);
+    if (uchar_len + 1 >= uchar_cap) {
+	assert(uchar_len + 1 < uchar_cap + 10);
+	self->capacity_in_bytes += UCHARS_TO_BYTES(10);
+	UChar *uchars = (UChar *)xrealloc(self->data.uchars,
+		self->capacity_in_bytes);
+	if (uchars != self->data.uchars) {
+	    GC_WB(&self->data.uchars, uchars);
 	}
-	va_end(ar);
     }
+    self->data.uchars[uchar_len] = c;
+    self->length_in_bytes += UCHARS_TO_BYTES(1);
+}
 
-    return str;
+static void
+str_replace_with_uchars(rb_str_t *self, const UChar *chars, long len)
+{
+    assert(len >= 0);
+
+    len = UCHARS_TO_BYTES(len);
+    self->flags = 0;
+    self->encoding = rb_encodings[ENCODING_UTF8];
+    self->capacity_in_bytes = len;
+    if (len > 0) {
+	GC_WB(&self->data.uchars, xmalloc(len));
+	if (chars != NULL) {
+	    memcpy(self->data.uchars, chars, len);
+	    self->length_in_bytes = len;
+	}
+	else {
+	    self->length_in_bytes = 0;
+	}
+	str_set_stored_in_uchars(self, true);
+    }
+    else {
+	self->data.uchars = NULL;
+	self->length_in_bytes = 0;
+    }
 }
 
-static VALUE
-str_new(VALUE klass, const char *ptr, long len)
+static void
+str_replace_with_cfstring(rb_str_t *self, CFStringRef source)
 {
-    VALUE str;
+    const long len = CFStringGetLength(source);
+    UniChar *chars = NULL;
+    if (len > 0) {
+	chars = (UniChar *)malloc(sizeof(UniChar) * len);
+	CFStringGetCharacters(source, CFRangeMake(0, len), chars);
+    }
+    str_replace_with_uchars(self, chars, len);
+}
 
-    if (len < 0) {
-	rb_raise(rb_eArgError, "negative string size (or size too big)");
+static void
+str_replace(rb_str_t *self, VALUE arg)
+{
+    switch (TYPE(arg)) {
+	case T_STRING:
+	    if (IS_RSTR(arg)) {
+		str_replace_with_string(self, RSTR(arg));
+	    }
+	    else {
+		str_replace_with_cfstring(self, (CFStringRef)arg);
+	    }
+	    break;
+	default:
+	    str_replace(self, rb_str_to_str(arg));
+	    break;
     }
+}
 
-    if (ptr != NULL && len > 0) {
-	const long slen = len == 1
-	    ? 1 /* XXX in the case ptr is actually a pointer to a single char
-		   character, which is not NULL-terminated. */
-	    : strlen(ptr);
+static rb_str_t *
+str_dup(rb_str_t *source)
+{
+    rb_str_t *destination = str_alloc(rb_cRubyString);
+    str_replace_with_string(destination, source);
+    return destination;
+}
 
-	if (len <= slen) {
-	    str = str_alloc(klass);
-	    CFStringAppendCString((CFMutableStringRef)str, ptr, 
-		    kCFStringEncodingUTF8);
-	    if (len < slen) {
-		CFStringPad((CFMutableStringRef)str, NULL, len, 0);
+static rb_str_t *
+str_new_from_cfstring(CFStringRef source)
+{
+    rb_str_t *destination = str_alloc(rb_cRubyString);
+    str_replace_with_cfstring(destination, source);
+    return destination;
+}
+
+static void
+str_make_data_binary(rb_str_t *self)
+{
+    if (!str_is_stored_in_uchars(self) || NATIVE_UTF16_ENC(self->encoding)) {
+	// nothing to do
+	return;
+    }
+
+    if (NON_NATIVE_UTF16_ENC(self->encoding)) {
+	// Doing the conversion ourself is faster, and anyway ICU's converter
+	// does not like non-paired surrogates.
+	str_invert_byte_order(self);
+	return;
+    }
+
+    self->encoding->methods.make_data_binary(self);
+}
+
+static bool
+str_try_making_data_uchars(rb_str_t *self)
+{
+    if (str_is_stored_in_uchars(self)) {
+	return true;
+    }
+    else if (NON_NATIVE_UTF16_ENC(self->encoding)) {
+	str_invert_byte_order(self);
+	return true;
+    }
+    else if (BINARY_ENC(self->encoding)) {
+	// you can't convert binary to anything
+	return false;
+    }
+    else if (self->length_in_bytes == 0) {
+	// for empty strings, nothing to convert
+	str_set_stored_in_uchars(self, true);
+	return true;
+    }
+    else if (str_known_to_have_an_invalid_encoding(self)) {
+	return false;
+    }
+
+    return self->encoding->methods.try_making_data_uchars(self);
+}
+
+static void
+str_make_same_format(rb_str_t *str1, rb_str_t *str2)
+{
+    if (str_is_stored_in_uchars(str1) != str_is_stored_in_uchars(str2)) {
+	if (str_is_stored_in_uchars(str1)) {
+	    if (!str_try_making_data_uchars(str2)) {
+		str_make_data_binary(str1);
 	    }
-	    if (CFStringGetLength((CFStringRef)str) != len) {
-		str = rb_bytestring_new_with_data((const UInt8 *)ptr, len);
-	    }
 	}
 	else {
-	    str = rb_bytestring_new_with_data((const UInt8 *)ptr, len);
+	    str_make_data_binary(str2);
 	}
     }
+}
+
+static long
+str_length(rb_str_t *self, bool ucs2_mode)
+{
+    if (self->length_in_bytes == 0) {
+	return 0;
+    }
+    if (str_is_stored_in_uchars(self)) {
+	long length;
+	if (ucs2_mode) {
+	    length = BYTES_TO_UCHARS(self->length_in_bytes);
+	}
+	else {
+	    // we must return the length in Unicode code points,
+	    // not the number of UChars, even if the probability
+	    // we have surrogates is very low
+	    length = u_countChar32(self->data.uchars,
+		    BYTES_TO_UCHARS(self->length_in_bytes));
+	}
+	if (ODD_NUMBER(self->length_in_bytes)) {
+	    return length + 1;
+	}
+	else {
+	    return length;
+	}
+    }
     else {
-	if (len == 0) {
-	    str = str_alloc(klass);
+	if (self->encoding->single_byte_encoding) {
+	    return self->length_in_bytes;
 	}
+	else if (ucs2_mode && NON_NATIVE_UTF16_ENC(self->encoding)) {
+	    return div_round_up(self->length_in_bytes, 2);
+	}
 	else {
-	    str = rb_bytestring_new();
-	    rb_bytestring_resize(str, len);
+	    return self->encoding->methods.length(self, ucs2_mode);
 	}
     }
+}
 
+static UChar
+str_get_uchar(rb_str_t *self, long pos, bool ucs2_mode)
+{
+    assert(pos >= 0 && pos < str_length(self, ucs2_mode));
+    if (str_try_making_data_uchars(self)) {
+	// FIXME: Not ucs2 compliant.
+	return self->data.uchars[pos];
+    }
+    //assert(BINARY_ENC(self->encoding));
+    return self->data.bytes[pos];
+}
+
+static long
+str_bytesize(rb_str_t *self)
+{
+    if (str_is_stored_in_uchars(self)) {
+	if (UTF16_ENC(self->encoding)) {
+	    return self->length_in_bytes;
+	}
+	else {
+	    return self->encoding->methods.bytesize(self);
+	}
+    }
+    else {
+	return self->length_in_bytes;
+    }
+}
+
+static rb_str_t *
+str_new_similar_empty_string(rb_str_t *self)
+{
+    rb_str_t *str = str_alloc(rb_cRubyString);
+    str->encoding = self->encoding;
+    str->flags = self->flags & STRING_REQUIRED_FLAGS;
     return str;
 }
 
-VALUE
-rb_unicode_str_new(const UniChar *ptr, const size_t len)
+static rb_str_t *
+str_new_copy_of_part(rb_str_t *self, long offset_in_bytes,
+	long length_in_bytes)
 {
-    VALUE str = str_alloc(rb_cString);
-    CFStringAppendCharacters((CFMutableStringRef)str,
-	    ptr, len);
+    rb_str_t *str = str_alloc(rb_cRubyString);
+    str->encoding = self->encoding;
+    str->capacity_in_bytes = str->length_in_bytes = length_in_bytes;
+    str->flags = self->flags & STRING_REQUIRED_FLAGS;
+    GC_WB(&str->data.bytes, xmalloc(length_in_bytes));
+    memcpy(str->data.bytes, &self->data.bytes[offset_in_bytes],
+	    length_in_bytes);
     return str;
 }
 
-VALUE
-rb_str_new(const char *ptr, long len)
+// you cannot cut a surrogate in an encoding that is not UTF-16
+// (it's in theory possible to store the surrogate in
+//  UTF-8 or UTF-32 but that would be incorrect Unicode)
+NORETURN(static void
+str_cannot_cut_surrogate(void))
 {
-    return str_new(rb_cString, ptr, len);
+    rb_raise(rb_eIndexError, "You can't cut a surrogate in two in an encoding that is not UTF-16");
 }
 
-VALUE
-rb_usascii_str_new(const char *ptr, long len)
+static character_boundaries_t
+str_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode)
 {
-    return str_new(rb_cString, ptr, len);
+    character_boundaries_t boundaries = {-1, -1};
+
+    if (str_is_stored_in_uchars(self)) {
+	if (ucs2_mode || str_known_not_to_have_any_supplementary(self)) {
+	    if (index < 0) {
+		index += div_round_up(self->length_in_bytes, 2);
+		if (index < 0) {
+		    return boundaries;
+		}
+	    }
+	    boundaries.start_offset_in_bytes = UCHARS_TO_BYTES(index);
+	    boundaries.end_offset_in_bytes = boundaries.start_offset_in_bytes
+		+ 2;
+	    if (!UTF16_ENC(self->encoding)) {
+		long length = BYTES_TO_UCHARS(self->length_in_bytes);
+		if ((index < length)
+			&& U16_IS_SURROGATE(self->data.uchars[index])) {
+		    if (U16_IS_SURROGATE_LEAD(self->data.uchars[index])) {
+			boundaries.end_offset_in_bytes = -1;
+		    }
+		    else { // U16_IS_SURROGATE_TRAIL
+			boundaries.start_offset_in_bytes = -1;
+		    }
+		}
+	    }
+	}
+	else {
+	    // we don't have the length of the string, just the number of
+	    // UChars (uchars_count >= number of characters)
+	    long uchars_count = BYTES_TO_UCHARS(self->length_in_bytes);
+	    if ((index < -uchars_count) || (index >= uchars_count)) {
+		return boundaries;
+	    }
+	    const UChar *uchars = self->data.uchars;
+	    long offset;
+	    if (index < 0) {
+		// count the characters from the end
+		offset = uchars_count;
+		while ((offset > 0) && (index < 0)) {
+		    --offset;
+		    // if the next character is a paired surrogate
+		    // we need to go to the start of the whole surrogate
+		    if (U16_IS_TRAIL(uchars[offset]) && (offset > 0)
+			    && U16_IS_LEAD(uchars[offset-1])) {
+			--offset;
+		    }
+		    ++index;
+		}
+		// ended before the index got to 0
+		if (index != 0) {
+		    return boundaries;
+		}
+		assert(offset >= 0);
+	    }
+	    else {
+		// count the characters from the start
+		offset = 0;
+		U16_FWD_N(uchars, offset, uchars_count, index);
+		if (offset >= uchars_count) {
+		    return boundaries;
+		}
+	    }
+
+	    long length_in_bytes;
+	    if (U16_IS_LEAD(uchars[offset]) && (offset < uchars_count - 1)
+		    && (U16_IS_TRAIL(uchars[offset+1]))) {
+		// if it's a lead surrogate we must also copy the trail
+		// surrogate
+		length_in_bytes = UCHARS_TO_BYTES(2);
+	    }
+	    else {
+		length_in_bytes = UCHARS_TO_BYTES(1);
+	    }
+	    boundaries.start_offset_in_bytes = UCHARS_TO_BYTES(offset);
+	    boundaries.end_offset_in_bytes = boundaries.start_offset_in_bytes
+		+ length_in_bytes;
+	}
+    }
+    else { // data in binary
+	if (self->encoding->single_byte_encoding) {
+	    if (index < 0) {
+		index += self->length_in_bytes;
+		if (index < 0) {
+		    return boundaries;
+		}
+	    }
+	    boundaries.start_offset_in_bytes = index;
+	    boundaries.end_offset_in_bytes = boundaries.start_offset_in_bytes
+		+ 1;
+	}
+	else if (UTF32_ENC(self->encoding)
+		&& (!ucs2_mode
+		    || str_known_not_to_have_any_supplementary(self))) {
+	    if (index < 0) {
+		index += div_round_up(self->length_in_bytes, 4);
+		if (index < 0) {
+		    return boundaries;
+		}
+	    }
+	    boundaries.start_offset_in_bytes = index * 4;
+	    boundaries.end_offset_in_bytes = boundaries.start_offset_in_bytes
+		+ 4;
+	}
+	else if (NON_NATIVE_UTF16_ENC(self->encoding)
+		&& (ucs2_mode
+		    || str_known_not_to_have_any_supplementary(self))) {
+	    if (index < 0) {
+		index += div_round_up(self->length_in_bytes, 2);
+		if (index < 0) {
+		    return boundaries;
+		}
+	    }
+	    boundaries.start_offset_in_bytes = UCHARS_TO_BYTES(index);
+	    boundaries.end_offset_in_bytes = boundaries.start_offset_in_bytes
+		+ 2;
+	}
+	else {
+	    boundaries = self->encoding->methods.get_character_boundaries(self,
+		    index, ucs2_mode);
+	}
+    }
+
+    return boundaries;
 }
 
-VALUE
-rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
+static rb_str_t *
+str_get_characters(rb_str_t *self, long first, long last, bool ucs2_mode)
 {
-    return str_new(rb_cString, ptr, len);
+    if (self->length_in_bytes == 0) {
+	if (first == 0) {
+	    return str_new_similar_empty_string(self);
+	}
+	else {
+	    return NULL;
+	}
+    }
+    if (!self->encoding->single_byte_encoding
+	    && !str_is_stored_in_uchars(self)) {
+	str_try_making_data_uchars(self);
+    }
+    character_boundaries_t first_boundaries =
+	str_get_character_boundaries(self, first, ucs2_mode);
+    character_boundaries_t last_boundaries =
+	str_get_character_boundaries(self, last, ucs2_mode);
+
+    if (first_boundaries.start_offset_in_bytes == -1) {
+	if (last_boundaries.end_offset_in_bytes == -1) {
+	    // you cannot cut a surrogate in an encoding that is not UTF-16
+	    str_cannot_cut_surrogate();
+	}
+	else {
+	    return NULL;
+	}
+    }
+    else if (last_boundaries.end_offset_in_bytes == -1) {
+	// you cannot cut a surrogate in an encoding that is not UTF-16
+	str_cannot_cut_surrogate();
+    }
+
+    if (first_boundaries.start_offset_in_bytes == self->length_in_bytes) {
+	return str_new_similar_empty_string(self);
+    }
+    else if (first_boundaries.start_offset_in_bytes > self->length_in_bytes) {
+	return NULL;
+    }
+    if (last_boundaries.end_offset_in_bytes >= self->length_in_bytes) {
+	last_boundaries.end_offset_in_bytes = self->length_in_bytes;
+    }
+
+    return str_new_copy_of_part(self, first_boundaries.start_offset_in_bytes,
+	    last_boundaries.end_offset_in_bytes
+	    - first_boundaries.start_offset_in_bytes);
 }
 
-VALUE
-rb_str_new2(const char *ptr)
+static void
+str_resize_bytes(rb_str_t *self, long new_capacity)
 {
-    long len;
-    if (!ptr) {
-	rb_raise(rb_eArgError, "NULL pointer given");
+    if (self->capacity_in_bytes < new_capacity) {
+	if (self->data.bytes == NULL) {
+	    GC_WB(&self->data.bytes, xmalloc(new_capacity));
+	}
+	else {
+	    char *bytes = xrealloc(self->data.bytes, new_capacity);
+	    if (bytes != self->data.bytes) {
+		GC_WB(&self->data.bytes, bytes);
+	    }
+	}
+	self->capacity_in_bytes = new_capacity;
     }
-    len = strlen(ptr);
-    return rb_str_new(len == 0 ? NULL : ptr, len);
 }
 
-VALUE
-rb_usascii_str_new2(const char *ptr)
+static void
+str_ensure_null_terminator(rb_str_t *self)
 {
-    if (!ptr) {
-	rb_raise(rb_eArgError, "NULL pointer given");
+    assert(!str_is_stored_in_uchars(self));
+
+    if (self->length_in_bytes > 0
+	&& (self->capacity_in_bytes == self->length_in_bytes
+	    || self->data.bytes[self->length_in_bytes] != '\0')) {
+	str_resize_bytes(self, self->length_in_bytes + 1);
+	self->data.bytes[self->length_in_bytes] = '\0';
     }
-    return rb_usascii_str_new(ptr, strlen(ptr));
 }
 
-VALUE
-rb_tainted_str_new(const char *ptr, long len)
+static void
+str_splice(rb_str_t *self, long pos, long len, rb_str_t *str, bool ucs2_mode)
 {
-    VALUE str = rb_str_new(ptr, len);
-    OBJ_TAINT(str);
-    return str;
+    // self[pos..pos+len] = str
+    assert(pos >= 0 && len >= 0);
+
+    if (str != NULL) {
+	str_must_have_compatible_encoding(self, str);
+	str_make_same_format(self, str);
+    }
+
+    character_boundaries_t beg, end;
+    if (pos + len == 0) {
+	// Positioning before the string.
+	const long offset = 0;
+	beg.start_offset_in_bytes = beg.end_offset_in_bytes = offset;
+	end.start_offset_in_bytes = end.end_offset_in_bytes = offset;
+    }
+    else if (len == 0 && str_length(self, ucs2_mode) == pos) {
+	// Positioning after the string.
+	const long offset = self->length_in_bytes;
+	beg.start_offset_in_bytes = beg.end_offset_in_bytes = offset;
+	end.start_offset_in_bytes = end.end_offset_in_bytes = offset;
+    }
+    else {
+	// Positioning in the string.
+	beg = str_get_character_boundaries(self, pos, ucs2_mode);
+
+	// TODO: probably call str_cannot_cut_surrogate()
+	assert(beg.start_offset_in_bytes != -1);
+	assert(beg.end_offset_in_bytes != -1);
+
+	end = str_get_character_boundaries(self, pos + len - 1, ucs2_mode);
+
+	// TODO: probably call str_cannot_cut_surrogate()
+	assert(end.start_offset_in_bytes != -1);
+	assert(end.end_offset_in_bytes != -1);
+    }
+
+    const long bytes_to_splice = end.end_offset_in_bytes
+	- beg.start_offset_in_bytes;
+
+    long bytes_to_add = 0; 
+    if (str != NULL) {
+	if (str->length_in_bytes > bytes_to_splice) {
+	    str_resize_bytes(self, self->length_in_bytes
+		    + (str->length_in_bytes - bytes_to_splice));
+	}
+	bytes_to_add = str->length_in_bytes;
+    }
+
+    if (beg.start_offset_in_bytes == end.end_offset_in_bytes
+	    && end.end_offset_in_bytes == self->length_in_bytes) {
+    	if (bytes_to_add > 0) {
+	    // We are splicing at the very end.
+	    memcpy(self->data.bytes + self->length_in_bytes, str->data.bytes,
+		    bytes_to_add);
+	}
+    }
+    else {
+	// We are splicing in the middle.
+	memmove(self->data.bytes + beg.start_offset_in_bytes + bytes_to_add,
+		self->data.bytes + end.end_offset_in_bytes,
+		self->length_in_bytes - end.end_offset_in_bytes);
+	if (bytes_to_add > 0) {
+	    memcpy(self->data.bytes + beg.start_offset_in_bytes,
+		    str->data.bytes, bytes_to_add);
+	}
+    }
+
+    self->length_in_bytes = self->length_in_bytes - bytes_to_splice
+	+ bytes_to_add; 
 }
 
-VALUE
-rb_tainted_str_new2(const char *ptr)
+static void
+str_delete(rb_str_t *self, long pos, long len, bool ucs2_mode)
 {
-    VALUE str = rb_str_new2(ptr);
-    OBJ_TAINT(str);
-    return str;
+    str_splice(self, pos, len, NULL, ucs2_mode);
 }
 
-static inline VALUE
-str_new3(VALUE klass, VALUE str)
+static void
+str_insert(rb_str_t *self, long pos, rb_str_t *str, bool ucs2_mode)
 {
-    VALUE str2 = rb_str_dup(str);
-    str_change_class(str2, klass);
-    return str2;
+    str_splice(self, pos, 0, str, ucs2_mode);
 }
 
-VALUE
-rb_str_new3(VALUE str)
+static void
+str_concat_bytes(rb_str_t *self, const char *bytes, long len)
 {
-    return str_new3(rb_obj_class(str), str);
+    assert(bytes != NULL && len >= 0);
+
+    const long new_length_in_bytes = self->length_in_bytes + len;
+
+    str_resize_bytes(self, new_length_in_bytes);
+    memcpy(self->data.bytes + self->length_in_bytes, bytes, len);
+    self->length_in_bytes = new_length_in_bytes;
 }
 
-VALUE
-rb_str_new4(VALUE orig)
+static void
+str_concat_uchars(rb_str_t *self, const UChar *chars, long len)
 {
-    return rb_str_new3(orig);
+    assert(str_try_making_data_uchars(self));
+
+    str_concat_bytes(self, (const char *)chars, UCHARS_TO_BYTES(len)); 
 }
 
-VALUE
-rb_str_new5(VALUE obj, const char *ptr, long len)
+static void
+str_concat_string(rb_str_t *self, rb_str_t *str)
 {
-    return str_new(rb_obj_class(obj), ptr, len);
+    if (str->length_in_bytes == 0) {
+	return;
+    }
+    if (self->length_in_bytes == 0) {
+	str_replace_with_string(self, str);
+	return;
+    }
+
+    str_must_have_compatible_encoding(self, str);
+    str_make_same_format(self, str);
+
+    // TODO: we should maybe merge flags
+    // (if both are ASCII-only, the concatenation is ASCII-only,
+    //  though I'm not sure all the tests required are worth doing)
+    str_unset_facultative_flags(self);
+
+    str_concat_bytes(self, str->data.bytes, str->length_in_bytes);
 }
 
-#define STR_BUF_MIN_SIZE 128
+static int
+str_compare(rb_str_t *self, rb_str_t *str)
+{
+    if (self == str) {
+	return 0;
+    }
 
-VALUE
-rb_str_buf_new(long capa)
+    if (self->length_in_bytes == 0 && str->length_in_bytes == 0) {
+	// both strings are empty
+	return 0;
+    }
+
+    if (str_compatible_encoding(self, str) == NULL) {
+	// incompatible encodings
+	return -1;
+    }
+
+    str_make_same_format(self, str);
+
+    const long min_len = self->length_in_bytes < str->length_in_bytes
+	? self->length_in_bytes : str->length_in_bytes;
+
+    const int res = memcmp(self->data.bytes, str->data.bytes, min_len);
+
+    if (res == 0) {
+	if (self->length_in_bytes == str->length_in_bytes) {
+	    return 0;
+	}
+	return self->length_in_bytes > str->length_in_bytes
+	    ? 1 : -1;
+    }
+    return res > 0 ? 1 : -1;
+}
+
+static int
+str_case_compare(rb_str_t *self, rb_str_t *str)
 {
-    return rb_str_new2("");
-#if 0
-    return rb_bytestring_new();
-#endif
+    if (self == str) {
+	return 0;
+    }
+
+    if (self->length_in_bytes == 0 && str->length_in_bytes == 0) {
+	// both strings are empty
+	return 0;
+    }
+
+    if (str_compatible_encoding(self, str) == NULL) {
+	// incompatible encodings
+	return -1;
+    }
+
+    str_make_same_format(self, str);
+
+    const long min_length = self->length_in_bytes < str->length_in_bytes
+	? self->length_in_bytes : str->length_in_bytes;
+
+    if (str_is_stored_in_uchars(str)) {
+	for (long i = 0; i < BYTES_TO_UCHARS(min_length); i++) {
+	    UChar c1 = self->data.uchars[i];
+	    UChar c2 = str->data.uchars[i];
+	    if (c1 != c2) {
+		c1 = isascii(c1) ? toupper(c1) : c1;
+		c2 = isascii(c2) ? toupper(c2) : c2;
+		if (c1 != c2) {
+		    return c1 < c2 ? -1 : 1;
+		}
+	    }
+	}
+    }
+    else {
+	for (long i = 0; i < min_length; i++) {
+	    char c1 = self->data.bytes[i];
+	    char c2 = str->data.bytes[i];
+	    if (c1 != c2) {
+		c1 = isascii(c1) ? toupper(c1) : c1;
+		c2 = isascii(c2) ? toupper(c2) : c2;
+		if (c1 != c2) {
+		    return c1 < c2 ? -1 : 1;
+		}
+	    }
+	}
+    }
+
+    if (self->length_in_bytes == str->length_in_bytes) {
+	return 0;
+    }
+    return self->length_in_bytes > str->length_in_bytes ? 1 : -1;
 }
 
-VALUE
-rb_str_buf_new2(const char *ptr)
+
+static long
+str_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes,
+	bool ucs2_mode)
 {
-    return rb_str_new2(ptr);
-#if 0
-    VALUE str = rb_bytestring_new();
-    long len = strlen(ptr);
-    if (ptr != NULL && len > 0) {
-	CFDataAppendBytes(rb_bytestring_wrapped_data(str), (const UInt8 *)ptr, len);
+    if ((offset_in_bytes >= self->length_in_bytes) || (offset_in_bytes < 0)) {
+	return -1;
     }
-    return str;
-#endif
+    if (offset_in_bytes == 0) {
+	return 0;
+    }
+
+    if (str_is_stored_in_uchars(self)) {
+	if (ucs2_mode || str_known_not_to_have_any_supplementary(self)) {
+	    return BYTES_TO_UCHARS(offset_in_bytes);
+	}
+	else {
+	    long length = BYTES_TO_UCHARS(self->length_in_bytes);
+	    long offset_in_uchars = BYTES_TO_UCHARS(offset_in_bytes);
+	    long index = 0, i = 0;
+	    for (;;) {
+		if (U16_IS_LEAD(self->data.uchars[i]) && (i+1 < length)
+			&& U16_IS_TRAIL(self->data.uchars[i+1])) {
+		    i += 2;
+		}
+		else {
+		    ++i;
+		}
+		if (offset_in_uchars < i) {
+		    return index;
+		}
+		++index;
+		if (offset_in_uchars == i) {
+		    return index;
+		}
+	    }
+	}
+    }
+    else {
+	if (self->encoding->single_byte_encoding) {
+	    return offset_in_bytes;
+	}
+	else if (UTF32_ENC(self->encoding)
+		&& (!ucs2_mode
+		    || str_known_not_to_have_any_supplementary(self))) {
+	    return offset_in_bytes / 4;
+	}
+	else if (NON_NATIVE_UTF16_ENC(self->encoding)
+		&& (ucs2_mode
+		    || str_known_not_to_have_any_supplementary(self))) {
+	    return BYTES_TO_UCHARS(offset_in_bytes);
+	}
+	else {
+	    return self->encoding->methods.offset_in_bytes_to_index(self,
+		    offset_in_bytes, ucs2_mode);
+	}
+    }
 }
 
-VALUE
-rb_str_tmp_new(long len)
+static long
+str_offset_in_bytes_for_string(rb_str_t *self, rb_str_t *searched,
+	long start_offset_in_bytes, long end_offset_in_bytes,
+	bool backward_search)
 {
-    VALUE str = rb_bytestring_new();
-    rb_bytestring_resize(str, len);
-    return str;
+    if (start_offset_in_bytes >= self->length_in_bytes) {
+	return -1;
+    }
+    if (self == searched && start_offset_in_bytes == 0) {
+	return 0;
+    }
+    if (searched->length_in_bytes == 0) {
+	return start_offset_in_bytes;
+    }
+    str_must_have_compatible_encoding(self, searched);
+    str_make_same_format(self, searched);
+    if (searched->length_in_bytes > self->length_in_bytes) {
+	return -1;
+    }
+
+    long increment;
+    if (str_is_stored_in_uchars(self)) {
+	increment = 2;
+    }
+    else {
+	increment = self->encoding->min_char_size;
+    }
+
+    if (backward_search) {
+	for (long offset_in_bytes = end_offset_in_bytes;
+		offset_in_bytes >= start_offset_in_bytes;
+		offset_in_bytes -= increment) {
+	    if (memcmp(self->data.bytes+offset_in_bytes, searched->data.bytes,
+			searched->length_in_bytes) == 0) {
+		return offset_in_bytes;
+	    }
+	}
+    }
+    else {
+	const long max_offset_in_bytes = end_offset_in_bytes
+	    - searched->length_in_bytes + 1;
+
+	for (long offset_in_bytes = start_offset_in_bytes;
+		offset_in_bytes < max_offset_in_bytes;
+		offset_in_bytes += increment) {
+	    if (memcmp(self->data.bytes+offset_in_bytes, searched->data.bytes,
+			searched->length_in_bytes) == 0) {
+		return offset_in_bytes;
+	    }
+	}
+    }
+    return -1;
 }
 
-VALUE
-rb_str_to_str(VALUE str)
+static long
+str_index_for_string(rb_str_t *self, rb_str_t *searched, long start_index,
+	long end_index, bool backward_search, bool ucs2_mode)
 {
-    return rb_convert_type(str, T_STRING, "String", "to_str");
+    str_must_have_compatible_encoding(self, searched);
+    str_make_same_format(self, searched);
+
+    if (searched->length_in_bytes == 0 && self->length_in_bytes == 0) {
+	return start_index;
+    }
+
+    long start_offset_in_bytes;
+    if (start_index == 0) {
+	start_offset_in_bytes = 0;
+    }
+    else {
+	character_boundaries_t boundaries = str_get_character_boundaries(self,
+		start_index, ucs2_mode);
+	if (boundaries.start_offset_in_bytes == -1) {
+	    if (boundaries.end_offset_in_bytes == -1) {
+		return -1;
+	    }
+	    else {
+		// you cannot cut a surrogate in an encoding that is not UTF-16
+		str_cannot_cut_surrogate();
+	    }
+	}
+	start_offset_in_bytes = boundaries.start_offset_in_bytes;
+    }
+
+    long end_offset_in_bytes;
+    if (end_index < 0 || end_index == str_length(self, ucs2_mode)) {
+	end_offset_in_bytes = self->length_in_bytes;
+    }
+    else {
+	character_boundaries_t boundaries = str_get_character_boundaries(self,
+		end_index, ucs2_mode);
+	if (boundaries.start_offset_in_bytes == -1) {
+	    if (boundaries.end_offset_in_bytes == -1) {
+		return -1;
+	    }
+	    else {
+		// you cannot cut a surrogate in an encoding that is not UTF-16
+		str_cannot_cut_surrogate();
+	    }
+	}
+	end_offset_in_bytes = boundaries.end_offset_in_bytes;
+    }
+
+    const long offset_in_bytes = str_offset_in_bytes_for_string(self,
+	    searched, start_offset_in_bytes, end_offset_in_bytes,
+	    backward_search);
+    if (offset_in_bytes == -1) {
+	return -1;
+    }
+    return str_offset_in_bytes_to_index(RSTR(self), offset_in_bytes, ucs2_mode);
 }
 
-void
-rb_str_shared_replace(VALUE str, VALUE str2)
+static bool
+str_include_string(rb_str_t *self, rb_str_t *searched)
 {
-    rb_str_modify(str);
-    CFStringReplaceAll((CFMutableStringRef)str, (CFStringRef)str2);
+    return str_offset_in_bytes_for_string(self, searched, 0,
+	    self->length_in_bytes, false) != -1;
 }
 
-static ID id_to_s;
+static rb_str_t *
+str_need_string(VALUE str)
+{
+    switch (TYPE(str)) {
+	case T_SYMBOL:
+	    str = rb_sym_to_s(str);
+	    break;
 
-VALUE
-rb_obj_as_string(VALUE obj)
+	case T_STRING:
+	    break;
+
+	default:
+	    str = rb_str_to_str(str);
+	    break;
+    }
+    return IS_RSTR(str)
+	? (rb_str_t *)str : str_new_from_cfstring((CFStringRef)str);
+}
+
+void
+rb_str_get_uchars(VALUE str, UChar **chars_p, long *chars_len_p,
+	bool *need_free_p)
 {
-    VALUE str;
+    assert(chars_p != NULL && chars_len_p != NULL && need_free_p != NULL);
 
-    if (TYPE(obj) == T_STRING || TYPE(obj) == T_SYMBOL) {
-	return obj;
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+
+    if (IS_RSTR(str)) {
+	if (str_try_making_data_uchars(RSTR(str))) {
+	    chars = RSTR(str)->data.uchars;
+	    chars_len = str_length(RSTR(str), false);
+	}
+	else {
+	    //assert(BINARY_ENC(RSTR(str)->encoding));
+	    chars_len = RSTR(str)->length_in_bytes;
+	    if (chars_len > 0) {
+		chars = (UChar *)malloc(sizeof(UChar) * chars_len);
+		for (long i = 0; i < chars_len; i++) {
+		    chars[i] = RSTR(str)->data.bytes[i];
+		}
+		need_free = true;
+	    }
+	}
     }
-    //str = rb_funcall(obj, id_to_s, 0);
-    str = rb_vm_call(obj, selToS, 0, NULL, false);
-    if (TYPE(str) != T_STRING) {
-	return rb_any_to_s(obj);
+    else {
+	chars_len = CFStringGetLength((CFStringRef)str);
+	if (chars_len > 0) {
+	    chars = (UChar *)malloc(sizeof(UChar) * chars_len);
+	    CFStringGetCharacters((CFStringRef)str, CFRangeMake(0, chars_len),
+		    chars);
+	    need_free = true;
+	}
     }
-    if (OBJ_TAINTED(obj)) {
-	OBJ_TAINT(str);
-    }
-    return str;
+
+    *chars_p = chars;
+    *chars_len_p = chars_len;
+    *need_free_p = need_free;
 }
 
-static VALUE rb_str_replace(VALUE, VALUE);
-
 static VALUE
-rb_str_dup_imp(VALUE str, SEL sel)
+rstr_substr(VALUE str, long beg, long len)
 {
-    VALUE dup;
-    VALUE klass = *(VALUE *)str;
+    if (len < 0) {
+	return Qnil;
+    }
+    if (len == 0) {
+	return str_new();
+    }	
 
-    if (klass == rb_cByteString) {
-	dup = rb_bytestring_copy(str);
+    const long n = str_length(RSTR(str), false);
+    if (beg < 0) {
+	beg += n;
     }
-    else if (klass == rb_cCFString) {
-	dup = (VALUE)CFStringCreateMutableCopy(NULL, 0, (CFStringRef)str);
-	CFMakeCollectable((CFTypeRef)dup);
-	if (*(VALUE *)str != rb_cSymbol) {
-	    *(VALUE *)dup = *(VALUE *)str;
+    if (beg > n || beg < 0) {
+	return Qnil;
+    }
+    if (beg + len > n) {
+	len = n - beg;
+    }
+
+    rb_str_t *substr = str_get_characters(RSTR(str), beg, beg + len - 1, false);
+    return substr == NULL ? Qnil : (VALUE)substr;
+}
+
+static void
+rstr_splice(VALUE self, long beg, long len, VALUE str)
+{
+    rb_str_t *strstr = str_need_string(str);
+
+    if (len < 0) {
+	rb_raise(rb_eIndexError, "negative length %ld", len);
+    }
+
+    const long slen = str_length(RSTR(self), false);
+    if (slen < beg) {
+out_of_range:
+	rb_raise(rb_eIndexError, "index %ld out of string", beg);
+    }
+    if (beg < 0) {
+	if (-beg > slen) {
+	    goto out_of_range;
 	}
+	beg += slen;
     }
-    else {
-	dup = (VALUE)objc_msgSend((void *)str, selMutableCopy);
+    if (slen < len || slen < beg + len) {
+	len = slen - beg;
     }
 
-    if (OBJ_TAINTED(str)) {
-	OBJ_TAINT(dup);
+    rstr_modify(self);
+
+    str_splice(RSTR(self), beg, len, strstr, false);
+
+    if (OBJ_TAINTED(strstr)) {
+	OBJ_TAINT(self);
     }
+}
 
-    return dup;
+static void
+rstr_append(VALUE str, VALUE substr)
+{
+    str_concat_string(RSTR(str), str_need_string(substr));
+
+    if (OBJ_TAINTED(substr)) {
+	OBJ_TAINT(str);
+    }
 }
 
+//----------------------------------------------
+// Functions called by MacRuby
+
 VALUE
-rb_str_dup(VALUE str)
+mr_enc_s_is_compatible(VALUE klass, SEL sel, VALUE str1, VALUE str2)
 {
-    return rb_str_dup_imp(str, 0);
+    if (SPECIAL_CONST_P(str1) || SPECIAL_CONST_P(str2)) {
+	return Qnil;
+    }
+    assert(IS_RSTR(str1)); // TODO
+    assert(IS_RSTR(str2)); // TODO
+    rb_encoding_t *encoding = str_compatible_encoding(RSTR(str1), RSTR(str2));
+    if (encoding == NULL) {
+	return Qnil;
+    }
+    else {
+	return (VALUE)encoding;
+    }
 }
 
 static VALUE
-rb_str_clone(VALUE str, SEL sel)
+rstr_alloc(VALUE klass, SEL sel)
 {
-    VALUE clone = rb_str_dup(str);
-    if (OBJ_FROZEN(str)) {
-	OBJ_FREEZE(clone);
+    return (VALUE)str_alloc(klass);
+}
+
+/*
+ *  call-seq:
+ *     String.try_convert(obj) -> string or nil
+ *
+ *  Try to convert <i>obj</i> into a String, using to_str method.
+ *  Returns converted regexp or nil if <i>obj</i> cannot be converted
+ *  for any reason.
+ *
+ *     String.try_convert("str")     # => str
+ *     String.try_convert(/re/)      # => nil
+ */
+
+static VALUE
+rstr_try_convert(VALUE self, SEL sel, VALUE other)
+{
+    return rb_check_string_type(other);
+}
+
+/*
+ *  call-seq:
+ *     str.replace(other_str)   => str
+ *  
+ *  Replaces the contents and taintedness of <i>str</i> with the corresponding
+ *  values in <i>other_str</i>.
+ *     
+ *     s = "hello"         #=> "hello"
+ *     s.replace "world"   #=> "world"
+ */
+
+static VALUE
+rstr_replace(VALUE self, SEL sel, VALUE arg)
+{
+    rstr_modify(self);
+    str_replace(RSTR(self), arg);
+    if (OBJ_TAINTED(arg)) {
+	OBJ_TAINT(self);
     }
-    return clone;
+    return self;
 }
 
 /*
@@ -372,64 +1316,98 @@
  */
 
 static VALUE
-rb_str_init(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_initialize(VALUE self, SEL sel, int argc, VALUE *argv)
 {
     VALUE orig;
-
-    str = (VALUE)objc_msgSend((id)str, selInit);
-
     if (argc > 0 && rb_scan_args(argc, argv, "01", &orig) == 1) {
-	if (str != orig) {
-	    rb_str_replace(str, orig);
+	if (self != orig) {
+	    rstr_replace(self, 0, orig);
 	}
     }
-    return str;
+    return self;
 }
 
-static long
-str_strlen(VALUE str, rb_encoding *enc)
+static VALUE
+rstr_copy(VALUE rcv, VALUE klass)
 {
-    /* TODO should use CFStringGetMaximumSizeForEncoding too */
-    return RSTRING_LEN(str);
+    VALUE dup = rstr_alloc(klass, 0);
+    rstr_replace(dup, 0, rcv);
+    return dup;
 }
 
+static VALUE
+rstr_dup(VALUE str, SEL sel)
+{
+    VALUE klass = CLASS_OF(str);
+    while (RCLASS_SINGLETON(klass)) {
+	klass = RCLASS_SUPER(klass);
+    }
+    assert(rb_klass_is_rstr(klass));
+
+    VALUE dup = rstr_copy(str, klass);
+
+    if (OBJ_TAINTED(str)) {
+	OBJ_TAINT(dup);
+    }
+    if (OBJ_UNTRUSTED(str)) {
+	OBJ_UNTRUST(dup);
+    }
+    return dup;
+}
+
+static VALUE
+rstr_clone(VALUE str, SEL sel)
+{
+    VALUE clone = rstr_copy(str, CLASS_OF(str));
+
+    if (OBJ_TAINTED(str)) {
+	OBJ_TAINT(clone);
+    }
+    if (OBJ_UNTRUSTED(str)) {
+	OBJ_UNTRUST(clone);
+    }
+    if (OBJ_FROZEN(str)) {
+	OBJ_FREEZE(clone);
+    }
+    return clone;
+}
+
 /*
  *  call-seq:
- *     str.length   => integer
- *     str.size     => integer
- *  
- *  Returns the character length of <i>str</i>.
+ *     string.clear    ->  string
+ *
+ *  Makes string empty.
+ *
+ *     a = "abcde"
+ *     a.clear    #=> ""
  */
 
 static VALUE
-rb_str_length_imp(VALUE str, SEL sel)
+rstr_clear(VALUE self, SEL sel)
 {
-    int len;
-
-    len = str_strlen(str, STR_ENC_GET(str));
-    return INT2NUM(len);
+    rstr_modify(self);
+    RSTR(self)->length_in_bytes = 0;
+    return self;
 }
 
-VALUE
-rb_str_length(VALUE str)
+static VALUE
+rstr_chars_count(VALUE self, SEL sel)
 {
-    return rb_str_length_imp(str, 0);
+    return INT2NUM(str_length(RSTR(self), false));
 }
 
 /*
  *  call-seq:
- *     str.bytesize  => integer
+ *     str.length   => integer
+ *     str.size     => integer
  *  
- *  Returns the length of <i>str</i> in bytes.
+ *  Returns the character length of <i>str</i>.
  */
 
 static VALUE
-rb_str_bytesize(VALUE str, SEL sel)
+rstr_length(VALUE self, SEL sel)
 {
-    // TODO Not super accurate...
-    CFStringEncoding encoding = CFStringGetSmallestEncoding((CFStringRef)str);
-    long size = CFStringGetMaximumSizeForEncoding(RSTRING_LEN(str), encoding);
-    return LONG2NUM(size);
+    return INT2NUM(str_length(RSTR(self), true));
 }
 
 /*
@@ -443,455 +1421,644 @@
  */
 
 static VALUE
-rb_str_empty(VALUE str, SEL sel)
+rstr_empty(VALUE self, SEL sel)
 {
-    return RSTRING_LEN(str) == 0 ? Qtrue : Qfalse;
+    return RSTR(self)->length_in_bytes == 0 ? Qtrue : Qfalse;
 }
 
 /*
  *  call-seq:
- *     str + other_str   => new_str
+ *     str.bytesize  => integer
  *  
- *  Concatenation---Returns a new <code>String</code> containing
- *  <i>other_str</i> concatenated to <i>str</i>.
- *     
- *     "Hello from " + self.to_s   #=> "Hello from main"
+ *  Returns the length of <i>str</i> in bytes.
  */
 
 static VALUE
-rb_str_plus_imp(VALUE str1, SEL sel, VALUE str2)
+rstr_bytesize(VALUE self, SEL sel)
 {
-    StringValue(str2);
-    VALUE str3 = rb_str_new(0, 0);
-    rb_str_buf_append(str3, str1);
-    rb_str_buf_append(str3, str2);
-    if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2)) {
-	OBJ_TAINT(str3);
-    }
-    return str3;
+    return INT2NUM(str_bytesize(RSTR(self)));
 }
 
-VALUE
-rb_str_plus(VALUE str1, VALUE str2)
+static VALUE
+rstr_encoding(VALUE self, SEL sel)
 {
-    return rb_str_plus_imp(str1, 0, str2);
+    return (VALUE)RSTR(self)->encoding;
 }
 
-
 /*
  *  call-seq:
- *     str * integer   => new_str
- *  
- *  Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
- *  the receiver.
- *     
- *     "Ho! " * 3   #=> "Ho! Ho! Ho! "
+ *     str.getbyte(index)          => 0 .. 255
+ *
+ *  returns the <i>index</i>th byte as an integer.
  */
 
 static VALUE
-rb_str_times(VALUE str, SEL sel, VALUE times)
+rstr_getbyte(VALUE self, SEL sel, VALUE index)
 {
-    const long n = RSTRING_LEN(str);
-    const long len = NUM2LONG(times);
-    if (len < 0) {
-	rb_raise(rb_eArgError, "negative argument");
+    unsigned char c = 0;
+    long idx = NUM2LONG(index);
+
+    if (str_is_stored_in_uchars(RSTR(self))
+	    && NATIVE_UTF16_ENC(RSTR(self)->encoding)) {
+	if (idx < 0) {
+	    idx += RSTR(self)->length_in_bytes;
+	    if (idx < 0) {
+		return Qnil;
+	    }
+	}
+	if (idx >= RSTR(self)->length_in_bytes) {
+	    return Qnil;
+	}
+	if (NATIVE_UTF16_ENC(RSTR(self)->encoding)) {
+	    c = RSTR(self)->data.bytes[idx];
+	}
+	else { // non native byte-order UTF-16
+	    if ((idx & 1) == 0) { // even
+		c = RSTR(self)->data.bytes[idx+1];
+	    }
+	    else { // odd
+		c = RSTR(self)->data.bytes[idx-1];
+	    }
+	}
     }
-    if (len && LONG_MAX/len < n) {
-	rb_raise(rb_eArgError, "argument too big");
-    }
+    else {
+	// work with a binary string
+	// (UTF-16 strings could be converted to their binary form
+	//  on the fly but that would just add complexity)
+	str_make_data_binary(RSTR(self));
 
-    VALUE str2 = rb_str_new3(str);
-    CFStringPad((CFMutableStringRef)str2, (CFStringRef)str,
-	    len * n, 0);
-    if (OBJ_TAINTED(str)) {
-	OBJ_TAINT(str2);
+	if (idx < 0) {
+	    idx += RSTR(self)->length_in_bytes;
+	    if (idx < 0) {
+		return Qnil;
+	    }
+	}
+	if (idx >= RSTR(self)->length_in_bytes) {
+	    return Qnil;
+	}
+	c = RSTR(self)->data.bytes[idx];
     }
 
-    return str2;
+    return INT2FIX(c); 
 }
 
 /*
  *  call-seq:
- *     str % arg   => new_str
- *  
- *  Format---Uses <i>str</i> as a format specification, and returns the result
- *  of applying it to <i>arg</i>. If the format specification contains more than
- *  one substitution, then <i>arg</i> must be an <code>Array</code> containing
- *  the values to be substituted. See <code>Kernel::sprintf</code> for details
- *  of the format string.
- *     
- *     "%05d" % 123                              #=> "00123"
- *     "%-5s: %08x" % [ "ID", self.object_id ]   #=> "ID   : 200e14d6"
+ *     str.setbyte(index, int) => int
+ *
+ *  modifies the <i>index</i>th byte as <i>int</i>.
  */
 
 static VALUE
-rb_str_format_m(VALUE str, SEL sel, VALUE arg)
+rstr_setbyte(VALUE self, SEL sel, VALUE index, VALUE value)
 {
-    VALUE tmp = rb_check_array_type(arg);
-
-    if (!NIL_P(tmp)) {
-	return rb_str_format(RARRAY_LEN(tmp), RARRAY_PTR(tmp), str);
+    rstr_modify(self);
+    str_make_data_binary(RSTR(self));
+    if ((index < -RSTR(self)->length_in_bytes)
+	    || (index >= RSTR(self)->length_in_bytes)) {
+	rb_raise(rb_eIndexError, "index %ld out of string", index);
     }
-    return rb_str_format(1, &arg, str);
-}
-
-static inline void
-str_modifiable(VALUE str)
-{
-    long mask = rb_objc_flag_get_mask((void *)str);
-    if (RSTRING_IMMUTABLE(str)) {
-	mask |= FL_FREEZE;
+    if (index < 0) {
+	index += RSTR(self)->length_in_bytes;
     }
-    if ((mask & FL_FREEZE) == FL_FREEZE) {
-	rb_raise(rb_eRuntimeError, "can't modify frozen/immutable string");
-    }
-    if ((mask & FL_TAINT) == FL_TAINT && rb_safe_level() >= 4) {
-	rb_raise(rb_eSecurityError, "Insecure: can't modify string");
-    }
+    RSTR(self)->data.bytes[index] = value;
+    return value;
 }
 
-void
-rb_str_modify(VALUE str)
+/*
+ *  call-seq:
+ *     str.force_encoding(encoding)   => str
+ *
+ *  Changes the encoding to +encoding+ and returns self.
+ */
+
+static VALUE
+rstr_force_encoding(VALUE self, SEL sel, VALUE encoding)
 {
-#if WITH_OBJC
-    str_modifiable(str);
-#else
-    if (!str_independent(str)) {
-	str_make_independent(str);
+    rstr_modify(self);
+    rb_encoding_t *enc = rb_to_encoding(encoding);
+    if (enc != RSTR(self)->encoding) {
+	str_make_data_binary(RSTR(self));
+	if (NATIVE_UTF16_ENC(RSTR(self)->encoding)) {
+	    str_set_stored_in_uchars(RSTR(self), false);
+	}
+	RSTR(self)->encoding = enc;
+	str_unset_facultative_flags(RSTR(self));
+	if (NATIVE_UTF16_ENC(RSTR(self)->encoding)) {
+	    str_set_stored_in_uchars(RSTR(self), true);
+	}
     }
-    ENC_CODERANGE_CLEAR(str);
-#endif
+    return self;
 }
 
-void
-rb_str_associate(VALUE str, VALUE add)
+/*
+ *  call-seq:
+ *     str.valid_encoding?  => true or false
+ *  
+ *  Returns true for a string which encoded correctly.
+ *
+ *    "\xc2\xa1".force_encoding("UTF-8").valid_encoding? => true
+ *    "\xc2".force_encoding("UTF-8").valid_encoding? => false
+ *    "\x80".force_encoding("UTF-8").valid_encoding? => false
+ */
+
+static VALUE
+rstr_is_valid_encoding(VALUE self, SEL sel)
 {
-    /* sanity check */
-    if (OBJ_FROZEN(str)) rb_error_frozen("string");
+    return str_is_valid_encoding(RSTR(self)) ? Qtrue : Qfalse;
 }
 
-VALUE
-rb_str_associated(VALUE str)
+/*
+ *  call-seq:
+ *     str.ascii_only?  => true or false
+ *  
+ *  Returns true for a string which has only ASCII characters.
+ *
+ *    "abc".force_encoding("UTF-8").ascii_only? => true
+ *    "abc\u{6666}".force_encoding("UTF-8").ascii_only? => false
+ */
+
+static VALUE
+rstr_is_ascii_only(VALUE self, SEL sel)
 {
-    return Qfalse;
+    return str_is_ruby_ascii_only(RSTR(self)) ? Qtrue : Qfalse;
 }
 
-VALUE
-rb_string_value(volatile VALUE *ptr)
+/*
+ *  call-seq:
+ *     str[fixnum]                 => new_str or nil
+ *     str[fixnum, fixnum]         => new_str or nil
+ *     str[range]                  => new_str or nil
+ *     str[regexp]                 => new_str or nil
+ *     str[regexp, fixnum]         => new_str or nil
+ *     str[other_str]              => new_str or nil
+ *     str.slice(fixnum)           => new_str or nil
+ *     str.slice(fixnum, fixnum)   => new_str or nil
+ *     str.slice(range)            => new_str or nil
+ *     str.slice(regexp)           => new_str or nil
+ *     str.slice(regexp, fixnum)   => new_str or nil
+ *     str.slice(other_str)        => new_str or nil
+ *  
+ *  Element Reference---If passed a single <code>Fixnum</code>, returns a
+ *  substring of one character at that position. If passed two <code>Fixnum</code>
+ *  objects, returns a substring starting at the offset given by the first, and
+ *  a length given by the second. If given a range, a substring containing
+ *  characters at offsets given by the range is returned. In all three cases, if
+ *  an offset is negative, it is counted from the end of <i>str</i>. Returns
+ *  <code>nil</code> if the initial offset falls outside the string, the length
+ *  is negative, or the beginning of the range is greater than the end.
+ *     
+ *  If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
+ *  returned. If a numeric parameter follows the regular expression, that
+ *  component of the <code>MatchData</code> is returned instead. If a
+ *  <code>String</code> is given, that string is returned if it occurs in
+ *  <i>str</i>. In both cases, <code>nil</code> is returned if there is no
+ *  match.
+ *     
+ *     a = "hello there"
+ *     a[1]                   #=> "e"
+ *     a[1,3]                 #=> "ell"
+ *     a[1..3]                #=> "ell"
+ *     a[-3,2]                #=> "er"
+ *     a[-4..-2]              #=> "her"
+ *     a[12..-1]              #=> nil
+ *     a[-2..-4]              #=> ""
+ *     a[/[aeiou](.)\1/]      #=> "ell"
+ *     a[/[aeiou](.)\1/, 0]   #=> "ell"
+ *     a[/[aeiou](.)\1/, 1]   #=> "l"
+ *     a[/[aeiou](.)\1/, 2]   #=> nil
+ *     a["lo"]                #=> "lo"
+ *     a["bye"]               #=> nil
+ */
+
+static VALUE
+rb_str_subpat(VALUE str, VALUE re, int nth)
 {
-    VALUE s = *ptr;
-    if (TYPE(s) != T_STRING) {
-	s = rb_str_to_str(s);
-	*ptr = s;
+    if (rb_reg_search(re, str, 0, false) >= 0) {
+	return rb_reg_nth_match(nth, rb_backref_get());
     }
-#if 0 // Apparently not needed...
-    else if (CLASS_OF(s) == rb_cByteString) {
-	s = (VALUE)rb_bytestring_resolve_cfstring(s);
-    }
-#endif
-    return s;
+    return Qnil;
 }
 
-char *
-rb_string_value_ptr(volatile VALUE *ptr)
+VALUE
+rstr_aref(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    return (char *)RSTRING_PTR(rb_string_value(ptr));
-}
-
-const char *
-rb_str_cstr(VALUE ptr)
-{
-    if (*(VALUE *)ptr == rb_cSymbol) {
-	return RSYMBOL(ptr)->str;
+    if (argc == 2) {
+	if (TYPE(argv[0]) == T_REGEXP) {
+	    return rb_str_subpat(str, argv[0], NUM2INT(argv[1]));
+	}
+	return rstr_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
     }
-    if (*(VALUE *)ptr == rb_cByteString) {
-	return (const char *)rb_bytestring_byte_pointer(ptr);
-    }
 
-    if (RSTRING_LEN(ptr) == 0) {
-	return "";
+    if (argc != 1) {
+	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc);
     }
 
-    char *cptr = (char *)CFStringGetCStringPtr((CFStringRef)ptr, 0);
-    if (cptr != NULL) {
-	return cptr;
-    }
+    VALUE indx = argv[0];
+    switch (TYPE(indx)) {
+	case T_FIXNUM:
+	    str = rstr_substr(str, FIX2LONG(indx), 1);
+	    if (!NIL_P(str) && str_length(RSTR(str), true) == 0) {
+		return Qnil;
+	    }
+	    return str;
 
-    // XXX this is quite inefficient, but we don't really have a choice.
+	case T_REGEXP:
+	    return rb_str_subpat(str, indx, 0);
 
-    const long max = CFStringGetMaximumSizeForEncoding(
-	    CFStringGetLength((CFStringRef)ptr),
-	    kCFStringEncodingUTF8);
+	case T_STRING:
+	    {
+		if (IS_RSTR(indx)) {
+		    rb_str_t *searched = RSTR(indx);
+		    if (str_include_string(RSTR(str), searched)) {
+			return (VALUE)str_dup(searched);
+		    }
+		}
+		else {
+		    rb_str_t *searched =
+			str_new_from_cfstring((CFStringRef)indx);
+		    if (str_include_string(RSTR(str), searched)) {
+			// no need to duplicate the string as we just
+			// created it
+			return (VALUE)searched;
+		    }
+		}
+		return Qnil;
+	    }
 
-    cptr = (char *)xmalloc(max + 1);
-    if (!CFStringGetCString((CFStringRef)ptr, cptr,
-		max + 1, kCFStringEncodingUTF8)) {
-	// Probably an UTF16 string...
-	xfree(cptr);
-	return NULL;
+	default:
+	    {
+		long beg = 0, len = 0;
+		switch (rb_range_beg_len(indx, &beg, &len, str_length(RSTR(str),
+				false), 0)) {
+		    case Qfalse:
+			break;
+		    case Qnil:
+			return Qnil;
+		    default:
+			return rstr_substr(str, beg, len);
+		}
+		str = rstr_substr(str, NUM2LONG(indx), 1);
+		if (!NIL_P(str) && str_length(RSTR(str), true) == 0) {
+		    return Qnil;
+		}
+		return str;
+	    }
     }
-
-    return cptr;
 }
 
-long
-rb_str_clen(VALUE ptr)
-{
-    return CFStringGetLength((CFStringRef)ptr);
-}
-
-char *
-rb_string_value_cstr(volatile VALUE *ptr)
-{
-    VALUE str = rb_string_value(ptr);
-    return (char *)rb_str_cstr(str);
-}
-
-VALUE
-rb_check_string_type(VALUE str)
-{
-    str = rb_check_convert_type(str, T_STRING, "String", "to_str");
-    return str;
-}
-
 /*
  *  call-seq:
- *     String.try_convert(obj) -> string or nil
- *
- *  Try to convert <i>obj</i> into a String, using to_str method.
- *  Returns converted regexp or nil if <i>obj</i> cannot be converted
- *  for any reason.
- *
- *     String.try_convert("str")     # => str
- *     String.try_convert(/re/)      # => nil
+ *     str[fixnum] = new_str
+ *     str[fixnum, fixnum] = new_str
+ *     str[range] = aString
+ *     str[regexp] = new_str
+ *     str[regexp, fixnum] = new_str
+ *     str[other_str] = new_str
+ *  
+ *  Element Assignment---Replaces some or all of the content of <i>str</i>. The
+ *  portion of the string affected is determined using the same criteria as
+ *  <code>String#[]</code>. If the replacement string is not the same length as
+ *  the text it is replacing, the string will be adjusted accordingly. If the
+ *  regular expression or string is used as the index doesn't match a position
+ *  in the string, <code>IndexError</code> is raised. If the regular expression
+ *  form is used, the optional second <code>Fixnum</code> allows you to specify
+ *  which portion of the match to replace (effectively using the
+ *  <code>MatchData</code> indexing rules. The forms that take a
+ *  <code>Fixnum</code> will raise an <code>IndexError</code> if the value is
+ *  out of range; the <code>Range</code> form will raise a
+ *  <code>RangeError</code>, and the <code>Regexp</code> and <code>String</code>
+ *  forms will silently ignore the assignment.
  */
-static VALUE
-rb_str_s_try_convert(VALUE dummy, SEL sel, VALUE str)
-{
-    return rb_check_string_type(str);
-}
 
-/* byte offset to char offset */
-long
-rb_str_sublen(VALUE str, long pos)
+static void
+rb_str_subpat_set(VALUE str, VALUE re, int nth, VALUE val)
 {
-    return pos;
-}
-
-VALUE
-rb_str_subseq(VALUE str, long beg, long len)
-{
-    if (len < 0) {
-	return Qnil;
+    if (rb_reg_search(re, str, 0, false) < 0) {
+	rb_raise(rb_eIndexError, "regexp not matched");
     }
+    VALUE match = rb_backref_get();
 
-    const long n = CFStringGetLength((CFStringRef)str);
+    int count = 0;
+    rb_match_result_t *results = rb_reg_match_results(match, &count);
+    assert(count > 0);
 
-    if (beg < 0) {
-	beg += n;
+    if (nth >= count) {
+out_of_range:
+	rb_raise(rb_eIndexError, "index %d out of regexp", nth);
     }
-    if (beg > n || beg < 0) {
-	return Qnil;
+    if (nth < 0) {
+	if (-nth >= count) {
+	    goto out_of_range;
+	}
+	nth += count;
     }
-    if (beg + len > n) {
-	len = n - beg;
+
+    const long start = results[nth].beg;
+    if (start == -1) {
+	rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
     }
+    const long end = results[nth].end;
+    const long len = end - start;
+    rstr_splice(str, start, len, val);
+}
 
-    if (*(VALUE *)str == rb_cByteString) {
-	UInt8 *str_data = rb_bytestring_byte_pointer(str);
-	return rb_bytestring_new_with_data(str_data + beg, len);
+static VALUE
+rstr_aset(VALUE str, SEL sel, int argc, VALUE *argv)
+{
+    if (argc == 3) {
+	if (TYPE(argv[0]) == T_REGEXP) {
+	    rb_str_subpat_set(str, argv[0], NUM2INT(argv[1]), argv[2]);
+	}
+	else {
+	    rstr_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]),
+		    argv[2]);
+	}
+	return argv[2];
     }
 
-    CFMutableStringRef substr = CFStringCreateMutable(NULL, 0);
-
-    if (len == 1) {
-	UniChar c = CFStringGetCharacterAtIndex((CFStringRef)str, beg);
-	CFStringAppendCharacters(substr, &c, 1);
+    if (argc != 2) {
+	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc);
     }
-    else {
-	UniChar *buffer = alloca(sizeof(UniChar) * len);
-	CFStringGetCharacters((CFStringRef)str, CFRangeMake(beg, len), 
-		buffer);
-	CFStringAppendCharacters(substr, buffer, len);
-    }
 
-    CFMakeCollectable(substr);
+    VALUE indx = argv[0];
+    VALUE val = argv[1];
+    long pos = 0;
 
-    return (VALUE)substr;
-}
+    switch (TYPE(indx)) {
+	case T_FIXNUM:
+	    pos = FIX2LONG(indx);
+num_index:
+	    rstr_splice(str, pos, 1, val);
+	    return val;
 
-VALUE
-rb_str_substr(VALUE str, long beg, long len)
-{
-    return rb_str_subseq(str, beg, len);
-}
+	case T_REGEXP:
+	    rb_str_subpat_set(str, indx, 0, val);
+	    return val;
 
-VALUE
-rb_str_dup_frozen(VALUE str)
-{
-    str = rb_str_dup(str);
-    rb_str_freeze(str);
-    return str;
-}
+	case T_STRING:
+	    pos = str_index_for_string(RSTR(str), str_need_string(indx),
+		    0, -1, false, true);
+	    if (pos < 0) {
+		rb_raise(rb_eIndexError, "string not matched");
+	    }
+	    rstr_splice(str, pos, rb_str_chars_len(indx), val);
+	    return val;
 
-VALUE
-rb_str_locktmp(VALUE str)
-{
-    return str;
+	default:
+	    /* check if indx is Range */
+	    {
+		long beg, len;
+		if (rb_range_beg_len(indx, &beg, &len,
+			    str_length(RSTR(str), true), 2)) {
+		    rstr_splice(str, beg, len, val);
+		    return val;
+		}
+	    }
+	    pos = NUM2LONG(indx);
+	    goto num_index;
+    }
 }
 
-VALUE
-rb_str_unlocktmp(VALUE str)
+/*
+ *  call-seq:
+ *     str.insert(index, other_str)   => str
+ *  
+ *  Inserts <i>other_str</i> before the character at the given
+ *  <i>index</i>, modifying <i>str</i>. Negative indices count from the
+ *  end of the string, and insert <em>after</em> the given character.
+ *  The intent is insert <i>aString</i> so that it starts at the given
+ *  <i>index</i>.
+ *     
+ *     "abcd".insert(0, 'X')    #=> "Xabcd"
+ *     "abcd".insert(3, 'X')    #=> "abcXd"
+ *     "abcd".insert(4, 'X')    #=> "abcdX"
+ *     "abcd".insert(-3, 'X')   #=> "abXcd"
+ *     "abcd".insert(-1, 'X')   #=> "abcdX"
+ */
+
+static VALUE
+rstr_insert(VALUE str, SEL sel, VALUE idx, VALUE substr)
 {
+    long pos = NUM2LONG(idx);
+    if (pos == -1) {
+	rstr_append(str, substr);
+    }
+    else {
+	if (pos < 0) {
+	    pos++;
+	}
+	rstr_splice(str, pos, 0, substr);
+    }
     return str;
 }
 
-void
-rb_str_set_len(VALUE str, long len)
-{
-    rb_str_resize(str, len);    
-}
+/*
+ *  call-seq:
+ *     str.index(substring [, offset])   => fixnum or nil
+ *     str.index(fixnum [, offset])      => fixnum or nil
+ *     str.index(regexp [, offset])      => fixnum or nil
+ *  
+ *  Returns the index of the first occurrence of the given <i>substring</i>,
+ *  character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
+ *  <code>nil</code> if not found. If the second parameter is present, it
+ *  specifies the position in the string to begin the search.
+ *     
+ *     "hello".index('e')             #=> 1
+ *     "hello".index('lo')            #=> 3
+ *     "hello".index('a')             #=> nil
+ *     "hello".index(?e)              #=> 1
+ *     "hello".index(101)             #=> 1
+ *     "hello".index(/[aeiou]/, -3)   #=> 4
+ */
 
-VALUE
-rb_str_resize(VALUE str, long len)
+static VALUE
+rstr_index(VALUE self, SEL sel, int argc, VALUE *argv)
 {
-    long slen;
+    const long len = str_length(RSTR(self), false);
+    VALUE sub, initpos;
+    long pos;
 
-    if (len < 0) {
-	rb_raise(rb_eArgError, "negative string size (or size too big)");
+    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
+	pos = NUM2LONG(initpos);
+	if (pos < 0) {
+	    pos += len;
+	}
+	if (pos < 0 || pos >= len) {
+	    if (TYPE(sub) == T_REGEXP) {
+		rb_backref_set(Qnil);
+	    }
+	    return Qnil;
+	}
     }
+    else {
+	pos = 0;
+    }
 
-    rb_str_modify(str);
-    slen = RSTRING_LEN(str);
-    if (slen != len) {
-	CFStringPad((CFMutableStringRef)str, CFSTR(" "), len, 0);
+    switch (TYPE(sub)) {
+	case T_REGEXP:
+	    pos = rb_reg_search(sub, self, pos, false);
+	    break;
+
+	default: 
+	    StringValue(sub);
+	    // fall through
+	case T_STRING:
+	    pos = str_index_for_string(RSTR(self), str_need_string(sub),
+		    pos, -1, false, true);
+	    break;
     }
-    return str;
+
+    return pos >= 0 ? LONG2NUM(pos) : Qnil;
 }
 
-static void
-rb_objc_str_cat(VALUE str, const char *ptr, long len, int cfstring_encoding)
+/*
+ *  call-seq:
+ *     str.rindex(substring [, fixnum])   => fixnum or nil
+ *     str.rindex(fixnum [, fixnum])   => fixnum or nil
+ *     str.rindex(regexp [, fixnum])   => fixnum or nil
+ *  
+ *  Returns the index of the last occurrence of the given <i>substring</i>,
+ *  character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
+ *  <code>nil</code> if not found. If the second parameter is present, it
+ *  specifies the position in the string to end the search---characters beyond
+ *  this point will not be considered.
+ *     
+ *     "hello".rindex('e')             #=> 1
+ *     "hello".rindex('l')             #=> 3
+ *     "hello".rindex('a')             #=> nil
+ *     "hello".rindex(?e)              #=> 1
+ *     "hello".rindex(101)             #=> 1
+ *     "hello".rindex(/[aeiou]/, -2)   #=> 1
+ */
+
+static VALUE
+rstr_rindex(VALUE self, SEL sel, int argc, VALUE *argv)
 {
-    if (*(VALUE *)str == rb_cByteString) {
-	CFMutableDataRef data = rb_bytestring_wrapped_data(str);
-	CFDataAppendBytes(data, (const UInt8 *)ptr, len);
-    }
-    else {
-	long plen = strlen(ptr);
-	if (plen >= len) {
-	    const char *cstr;
-	    if (plen > len) {
-		// Sometimes the given string is bigger than the given length.
-		char *tmp = alloca(len + 1);
-		strncpy(tmp, ptr, len);
-		tmp[len] = '\0';
-		cstr = (const char *)tmp;
+    const long len = str_length(RSTR(self), false);
+    VALUE sub, initpos;
+    long pos;
+
+    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
+	pos = NUM2LONG(initpos);
+	if (pos < 0) {
+	    pos += len;
+	    if (pos < 0) {
+		if (TYPE(sub) == T_REGEXP) {
+		    rb_backref_set(Qnil);
+		}
+		return Qnil;
 	    }
-	    else {
-		cstr = ptr;
-	    }
-	    CFStringAppendCString((CFMutableStringRef)str, cstr,
-		    cfstring_encoding);
 	}
-	else {
-	    // Promoting as bytestring!
-	    CFDataRef data = CFStringCreateExternalRepresentation(NULL,
-		    (CFStringRef)str, kCFStringEncodingUTF8, 0);
-	    assert(data != NULL);
-	    CFMutableDataRef mdata = CFDataCreateMutableCopy(NULL, 0, data);
-	    CFRelease(data);
-
-	    rb_bstr_t *bstr = (rb_bstr_t *)str;
-	    bstr->basic.klass = rb_cByteString;
-	    bstr->basic.flags = 0;
-	    GC_WB(&bstr->data, mdata);
-
-	    CFMakeCollectable(mdata);
+	if (pos >= len) {
+	    pos = len - 1;
 	}
     }
-}
+    else {
+	pos = len - 1;
+    }
 
-VALUE
-rb_str_buf_cat(VALUE str, const char *ptr, long len)
-{
-    rb_objc_str_cat(str, ptr, len, kCFStringEncodingASCII);
+    switch (TYPE(sub)) {
+	case T_REGEXP:
+	    pos = rb_reg_search(sub, self, pos, true);
+	    break;
 
-    return str;
-}
-
-VALUE
-rb_str_buf_cat2(VALUE str, const char *ptr)
-{
-    return rb_str_buf_cat(str, ptr, strlen(ptr));
-}
-
-VALUE
-rb_str_cat(VALUE str, const char *ptr, long len)
-{
-    if (len < 0) {
-	rb_raise(rb_eArgError, "negative string size (or size too big)");
+	default: 
+	    StringValue(sub);
+	    // fall through
+	case T_STRING:
+	    pos = str_index_for_string(RSTR(self), str_need_string(sub),
+		    0, pos - 1, true, true);
+	    break;
     }
 
-    return rb_str_buf_cat(str, ptr, len);
+    return pos >= 0 ? LONG2NUM(pos) : Qnil;
 }
 
-VALUE
-rb_str_cat2(VALUE str, const char *ptr)
+static VALUE
+rstr_getchar(VALUE self, SEL sel, VALUE index)
 {
-    return rb_str_cat(str, ptr, strlen(ptr));
+    const long idx = FIX2LONG(index);
+    return rstr_substr(self, idx, 1);
 }
 
-VALUE
-rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
-{
-    rb_objc_str_cat(str, ptr, len, kCFStringEncodingUTF8);
-    return str;
-}
+/*
+ *  call-seq:
+ *     str + other_str   => new_str
+ *  
+ *  Concatenation---Returns a new <code>String</code> containing
+ *  <i>other_str</i> concatenated to <i>str</i>.
+ *     
+ *     "Hello from " + self.to_s   #=> "Hello from main"
+ */
 
-VALUE
-rb_str_buf_cat_ascii(VALUE str, const char *ptr)
+static VALUE
+rstr_plus(VALUE self, SEL sel, VALUE other)
 {
-    rb_objc_str_cat(str, ptr, strlen(ptr), kCFStringEncodingASCII);
-    return str;
+    rb_str_t *newstr = str_dup(RSTR(self));
+    str_concat_string(newstr, str_need_string(other));
+    if (OBJ_TAINTED(self) || OBJ_TAINTED(other)) {
+	OBJ_TAINT(newstr);
+    }
+    return (VALUE)newstr;
 }
 
-static inline VALUE
-rb_str_buf_append0(VALUE str, VALUE str2)
+/*
+ *  call-seq:
+ *     str * integer   => new_str
+ *  
+ *  Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
+ *  the receiver.
+ *     
+ *     "Ho! " * 3   #=> "Ho! Ho! Ho! "
+ */
+
+static VALUE
+rstr_times(VALUE self, SEL sel, VALUE times)
 {
-    if (TYPE(str2) != T_SYMBOL) {
-	Check_Type(str2, T_STRING);
+    const long len = NUM2LONG(times);
+    if (len < 0) {
+	rb_raise(rb_eArgError, "negative argument");
     }
+    if (len > 0 && LONG_MAX/len < str_length(RSTR(self), true)) {
+	rb_raise(rb_eArgError, "argument too big");
+    }
 
-    CFStringAppend((CFMutableStringRef)str, (CFStringRef)str2);
+    VALUE new = str_new_like(self);
+    for (long i = 0; i < len; i++) {
+	str_concat_string(RSTR(new), RSTR(self));
+    }
+    if (OBJ_TAINTED(self)) {
+	OBJ_TAINT(new);
+    }
+    return new;
+}
 
-#if 0
-    const char *ptr;
-    long len;
+/*
+ *  call-seq:
+ *     str % arg   => new_str
+ *  
+ *  Format---Uses <i>str</i> as a format specification, and returns the result
+ *  of applying it to <i>arg</i>. If the format specification contains more than
+ *  one substitution, then <i>arg</i> must be an <code>Array</code> containing
+ *  the values to be substituted. See <code>Kernel::sprintf</code> for details
+ *  of the format string.
+ *     
+ *     "%05d" % 123                              #=> "00123"
+ *     "%-5s: %08x" % [ "ID", self.object_id ]   #=> "ID   : 200e14d6"
+ */
 
-    ptr = RSTRING_PTR(str2);
-    len = RSTRING_LEN(str2);
-
-    rb_objc_str_cat(str, ptr, len, kCFStringEncodingASCII);
-#endif
-
-    return str;
-}
-
-VALUE
-rb_str_buf_append(VALUE str, VALUE str2)
+static VALUE
+rstr_format(VALUE str, SEL sel, VALUE arg)
 {
-   return rb_str_buf_append0(str, str2);
-}
+    VALUE tmp = rb_check_array_type(arg);
 
-VALUE
-rb_str_append(VALUE str, VALUE str2)
-{
-    StringValue(str2);
-    rb_str_modify(str);
-    return rb_str_buf_append0(str, str2);
+    if (!NIL_P(tmp)) {
+	return rb_str_format(RARRAY_LEN(tmp), RARRAY_PTR(tmp), str);
+    }
+    return rb_str_format(1, &arg, str);
 }
 
-
 /*
  *  call-seq:
  *     str << fixnum        => str
@@ -909,68 +2076,30 @@
  */
 
 static VALUE
-rb_str_concat_imp(VALUE str1, SEL sel, VALUE str2)
+rstr_concat(VALUE self, SEL sel, VALUE other)
 {
-    if (FIXNUM_P(str2)) {
-        int c = FIX2INT(str2);
-	char buf[2];
+    rstr_modify(self);
 
-	rb_str_modify(str1);
-	buf[0] = (char)c;
-	buf[1] = '\0';
-	CFStringAppendCString((CFMutableStringRef)str1, buf, 
-			      kCFStringEncodingUTF8);
-	return str1;
+    long codepoint = 0;
+    switch (TYPE(other)) {
+	case T_FIXNUM:
+	    codepoint = FIX2LONG(other);
+	    break;
+
+	case T_BIGNUM:
+	    codepoint = rb_big2ulong(other);
+	    break;
+	    
+	default:
+	    rstr_append(self, other);
+	    return self;
     }
-    return rb_str_append(str1, str2);
-}
 
-VALUE
-rb_str_concat(VALUE str1, VALUE str2)
-{
-    return rb_str_concat_imp(str1, 0, str2);
-}
+    // TODO: handle codepoint
 
-int
-rb_memhash(const void *ptr, long len)
-{
-    CFDataRef data;
-    int code;
-
-    data = CFDataCreate(NULL, (const UInt8 *)ptr, len);
-    code = CFHash(data);
-    CFRelease((CFTypeRef)data);
-    return code;
+    return self;
 }
 
-int
-rb_str_hash(VALUE str)
-{
-    return CFHash((CFTypeRef)str);
-}
-
-int
-rb_str_hash_cmp(VALUE str1, VALUE str2)
-{
-    return CFEqual((CFTypeRef)str1, (CFTypeRef)str2) ? 0 : 1;
-}
-
-#define lesser(a,b) (((a)>(b))?(b):(a))
-
-int
-rb_str_comparable(VALUE str1, VALUE str2)
-{
-    return Qtrue;
-}
-
-int
-rb_str_cmp(VALUE str1, VALUE str2)
-{
-    return CFStringCompare((CFStringRef)str1, (CFStringRef)str2, 0);
-}
-
-bool rb_objc_str_is_pure(VALUE);
-
 /*
  *  call-seq:
  *     str == obj   => true or false
@@ -981,61 +2110,22 @@
  */
 
 static VALUE
-rb_str_equal_imp(VALUE str1, SEL sel, VALUE str2)
+rstr_equal(VALUE self, SEL sel, VALUE other)
 {
-    if (str1 == str2) {
+    if (self == other) {
 	return Qtrue;
     }
-    if (TYPE(str2) != T_STRING) {
-	if (!rb_respond_to(str2, rb_intern("to_str"))) {
+    if (TYPE(other) != T_STRING) {
+	if (!rb_respond_to(other, rb_intern("to_str"))) {
 	    return Qfalse;
 	}
-	return rb_equal(str2, str1);
+	return rb_equal(other, self);
     }
-    if (*(VALUE *)str1 == *(VALUE *)str2) {
-	if (RSTRING_LEN(str1) != RSTRING_LEN(str2)) {
-	    return Qfalse;
-	}
-    }
-    if (!rb_objc_str_is_pure(str2)) {
-	/* This is to work around a strange bug in CFEqual's objc 
-	 * dispatching.
-	 */
-	VALUE tmp = str1;
-	str1 = str2;
-	str2 = tmp;
-    }
-    return CFEqual((CFTypeRef)str1, (CFTypeRef)str2) ? Qtrue : Qfalse;
+    return str_compare(RSTR(self), str_need_string(other)) == 0
+	? Qtrue : Qfalse;
 }
 
-VALUE
-rb_str_equal(VALUE str1, VALUE str2)
-{
-    return rb_str_equal_imp(str1, 0, str2);
-}
-
 /*
- * call-seq:
- *   str.eql?(other)   => true or false
- *
- * Two strings are equal if the have the same length and content.
- */
-
-static VALUE
-rb_str_eql(VALUE str1, SEL sel, VALUE str2)
-{
-    if (TYPE(str2) != T_STRING) {
-	return Qfalse;
-    }
-
-    if (CFEqual((CFTypeRef)str1, (CFTypeRef)str2)) {
-	return Qtrue;
-    }
-
-    return Qfalse;
-}
-
-/*
  *  call-seq:
  *     str <=> other_str   => -1, 0, +1
  *  
@@ -1059,39 +2149,40 @@
  */
 
 static VALUE
-rb_str_cmp_m(VALUE str1, SEL sel, VALUE str2)
+rstr_cmp(VALUE self, SEL sel, VALUE other)
 {
     long result;
 
-    if (TYPE(str2) != T_STRING) {
-	if (!rb_respond_to(str2, rb_intern("to_str"))) {
+    if (TYPE(other) != T_STRING) {
+	if (!rb_respond_to(other, rb_intern("to_str"))) {
 	    return Qnil;
 	}
-	else if (!rb_respond_to(str2, rb_intern("<=>"))) {
+	else if (!rb_vm_respond_to(other, selCmp, false)) {
 	    return Qnil;
 	}
 	else {
-	    VALUE tmp = rb_funcall(str2, rb_intern("<=>"), 1, str1);
-
-	    if (NIL_P(tmp)) return Qnil;
+	    VALUE tmp = rb_vm_call(other, selCmp, 1, &self, false);
+	    if (NIL_P(tmp)) {
+		return Qnil;
+	    }
 	    if (!FIXNUM_P(tmp)) {
-		return rb_funcall(LONG2FIX(0), '-', 1, tmp);
+		return rb_vm_call(LONG2FIX(0), selMINUS, 1, &tmp, false);
 	    }
 	    result = -FIX2LONG(tmp);
 	}
     }
     else {
-	result = rb_str_cmp(str1, str2);
+	result = str_compare(RSTR(self), str_need_string(other));
     }
     return LONG2NUM(result);
 }
 
 /*
  *  call-seq:
- *     str.casecmp(other_str)   => -1, 0, +1
- *  
+ *     str.casecmp(other_str)   => -1, 0, +1 or nil
+ *
  *  Case-insensitive version of <code>String#<=></code>.
- *     
+ *
  *     "abcdef".casecmp("abcde")     #=> 1
  *     "aBcDeF".casecmp("abcdef")    #=> 0
  *     "abcdef".casecmp("abcdefg")   #=> -1
@@ -1099,215 +2190,295 @@
  */
 
 static VALUE
-rb_str_casecmp(VALUE str1, SEL sel, VALUE str2)
+rstr_casecmp(VALUE str, SEL sel, VALUE other)
 {
-    return INT2FIX(CFStringCompare((CFStringRef)str1, (CFStringRef)str2,
-	kCFCompareCaseInsensitive));
+    return INT2FIX(str_case_compare(RSTR(str), str_need_string(other)));
 }
 
-static long
-rb_str_index(VALUE str, VALUE sub, long offset)
+/*
+ * call-seq:
+ *   str.eql?(other)   => true or false
+ *
+ * Two strings are equal if they have the same length and content.
+ */
+
+static VALUE
+rstr_eql(VALUE self, SEL sel, VALUE other)
 {
-    CFRange r;
-    return (CFStringFindWithOptions((CFStringRef)str, 
-		(CFStringRef)sub,
-		CFRangeMake(offset, CFStringGetLength((CFStringRef)str) - offset),
-		0,
-		&r))
-	? r.location : -1;
+    if (self == other) {
+	return Qtrue;
+    }
+    if (TYPE(other) != T_STRING) {
+	return Qfalse;
+    }
+    return str_compare(RSTR(self), str_need_string(other)) == 0
+	? Qtrue : Qfalse;
 }
 
 /*
  *  call-seq:
- *     str.index(substring [, offset])   => fixnum or nil
- *     str.index(fixnum [, offset])      => fixnum or nil
- *     str.index(regexp [, offset])      => fixnum or nil
+ *     str.include? other_str   => true or false
+ *     str.include? fixnum      => true or false
  *  
- *  Returns the index of the first occurrence of the given <i>substring</i>,
- *  character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
- *  <code>nil</code> if not found. If the second parameter is present, it
- *  specifies the position in the string to begin the search.
+ *  Returns <code>true</code> if <i>str</i> contains the given string or
+ *  character.
  *     
- *     "hello".index('e')             #=> 1
- *     "hello".index('lo')            #=> 3
- *     "hello".index('a')             #=> nil
- *     "hello".index(?e)              #=> 1
- *     "hello".index(101)             #=> 1
- *     "hello".index(/[aeiou]/, -3)   #=> 4
+ *     "hello".include? "lo"   #=> true
+ *     "hello".include? "ol"   #=> false
+ *     "hello".include? ?h     #=> true
  */
 
 static VALUE
-rb_str_index_m(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_includes(VALUE self, SEL sel, VALUE searched)
 {
-    VALUE sub;
-    VALUE initpos;
-    long pos;
+    return str_include_string(RSTR(self), str_need_string(searched))
+	? Qtrue : Qfalse;
+}
 
-    if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
-	pos = NUM2LONG(initpos);
-    }
-    else {
-	pos = 0;
-    }
-    if (pos < 0) {
-	pos += str_strlen(str, STR_ENC_GET(str));
-	if (pos < 0) {
-	    if (TYPE(sub) == T_REGEXP) {
-		rb_backref_set(Qnil);
-	    }
-	    return Qnil;
+/*
+ *  call-seq:
+ *     str.start_with?([prefix]+)   => true or false
+ *  
+ *  Returns true if <i>str</i> starts with the prefix given.
+ */
+
+static VALUE
+rstr_start_with(VALUE str, SEL sel, int argc, VALUE *argv)
+{
+    for (int i = 0; i < argc; i++) {
+	VALUE tmp = rb_check_string_type(argv[i]);
+	if (NIL_P(tmp)) {
+	    continue;
 	}
+	const long pos = str_index_for_string(RSTR(str), str_need_string(tmp),
+		0, rb_str_chars_len(tmp), false, false);
+	if (pos == 0) {
+	    return Qtrue;
+	}
     }
+    return Qfalse;
+}
 
-    switch (TYPE(sub)) {
-      case T_REGEXP:
-	pos = rb_reg_adjust_startpos(sub, str, pos, 0);
-	pos = rb_reg_search(sub, str, pos, 0);
-	pos = rb_str_sublen(str, pos);
-	break;
+/*
+ *  call-seq:
+ *     str.end_with?([suffix]+)   => true or false
+ *  
+ *  Returns true if <i>str</i> ends with the suffix given.
+ */
 
-      default: {
-	VALUE tmp;
-
-	tmp = rb_check_string_type(sub);
+static VALUE
+rstr_end_with(VALUE str, SEL sel, int argc, VALUE *argv)
+{
+    const long len = rb_str_chars_len(str);
+    for (int i = 0; i < argc; i++) {
+	VALUE tmp = rb_check_string_type(argv[i]);
 	if (NIL_P(tmp)) {
-	    rb_raise(rb_eTypeError, "type mismatch: %s given",
-		     rb_obj_classname(sub));
+	    continue;
 	}
-	sub = tmp;
-      }
-	/* fall through */
-      case T_STRING:
-	pos = rb_str_index(str, sub, pos);
-	pos = rb_str_sublen(str, pos);
-	break;
+	const long sublen = rb_str_chars_len(tmp);
+	if (sublen > len) {
+	    continue;
+	}
+	const long pos = str_index_for_string(RSTR(str), str_need_string(tmp),
+		len - sublen, len, false, false);
+	if (pos == len - sublen) {
+	    return Qtrue;
+	}
     }
+    return Qfalse;
+}
 
-    if (pos == -1) return Qnil;
-    return LONG2NUM(pos);
+static VALUE
+rstr_is_stored_in_uchars(VALUE self, SEL sel)
+{
+    return str_is_stored_in_uchars(RSTR(self)) ? Qtrue : Qfalse;
 }
 
-static long
-rb_str_rindex(VALUE str, VALUE sub, long pos)
+/*
+ *  call-seq:
+ *     str.to_s     => str
+ *     str.to_str   => str
+ *  
+ *  Returns the receiver.
+ */
+
+static VALUE
+rstr_to_s(VALUE self, SEL sel)
 {
-    CFRange r;
-    long sublen, strlen;
-    sublen = RSTRING_LEN(sub);
-    strlen = RSTRING_LEN(str);
-    if (sublen == 0 && strlen == 0)
-	return 0;
-    if (pos <= sublen) {
-	pos = strlen < sublen ? strlen : sublen;
+    if (CLASS_OF(self) != rb_cRubyString) {
+	VALUE dup = (VALUE)str_dup(RSTR(self));
+	if (OBJ_TAINTED(self)) {
+	    OBJ_TAINT(dup);
+	}
+	return dup;
     }
-    return (CFStringFindWithOptions((CFStringRef)str, 
-		(CFStringRef)sub,
-		CFRangeMake(0, pos+1),
-		kCFCompareBackwards,
-		&r))
-	? r.location : -1;
+    return self;
 }
 
-
 /*
  *  call-seq:
- *     str.rindex(substring [, fixnum])   => fixnum or nil
- *     str.rindex(fixnum [, fixnum])   => fixnum or nil
- *     str.rindex(regexp [, fixnum])   => fixnum or nil
+ *     str.intern   => symbol
+ *     str.to_sym   => symbol
  *  
- *  Returns the index of the last occurrence of the given <i>substring</i>,
- *  character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
- *  <code>nil</code> if not found. If the second parameter is present, it
- *  specifies the position in the string to end the search---characters beyond
- *  this point will not be considered.
+ *  Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
+ *  symbol if it did not previously exist. See <code>Symbol#id2name</code>.
  *     
- *     "hello".rindex('e')             #=> 1
- *     "hello".rindex('l')             #=> 3
- *     "hello".rindex('a')             #=> nil
- *     "hello".rindex(?e)              #=> 1
- *     "hello".rindex(101)             #=> 1
- *     "hello".rindex(/[aeiou]/, -2)   #=> 1
+ *     "Koala".intern         #=> :Koala
+ *     s = 'cat'.to_sym       #=> :cat
+ *     s == :cat              #=> true
+ *     s = '@cat'.to_sym      #=> :@cat
+ *     s == :@cat             #=> true
+ *
+ *  This can also be used to create symbols that cannot be represented using the
+ *  <code>:xxx</code> notation.
+ *     
+ *     'cat and dog'.to_sym   #=> :"cat and dog"
  */
 
 static VALUE
-rb_str_rindex_m(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_intern(VALUE self, SEL sel)
 {
-    VALUE sub;
-    VALUE vpos;
-    rb_encoding *enc = STR_ENC_GET(str);
-    long pos, len = str_strlen(str, enc);
+    if (OBJ_TAINTED(self) && rb_safe_level() >= 1) {
+	rb_raise(rb_eSecurityError, "Insecure: can't intern tainted string");
+    }
+    return rb_str_intern_fast(self);
+}
 
-    if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
-	pos = NUM2LONG(vpos);
-	if (pos < 0) {
-	    pos += len;
-	    if (pos < 0) {
-		if (TYPE(sub) == T_REGEXP) {
-		    rb_backref_set(Qnil);
-		}
-		return Qnil;
-	    }
-	}
-	if (pos > len) pos = len;
+/*
+ * call-seq:
+ *   str.inspect   => string
+ *
+ * Returns a printable version of _str_, surrounded by quote marks,
+ * with special characters escaped.
+ *
+ *    str = "hello"
+ *    str[3] = "\b"
+ *    str.inspect       #=> "\"hel\\bo\""
+ */
+
+static void
+inspect_append(VALUE result, UChar c, bool escape)
+{
+    if (escape) {
+	str_append_uchar(RSTR(result), '\\');
     }
-    else {
-	pos = len;
+    str_append_uchar(RSTR(result), c);
+}
+
+static VALUE
+str_inspect(rb_str_t *str, bool dump)
+{
+    const bool uchars = str_try_making_data_uchars(str);
+    const long len = uchars
+	? str_length(str, true) : str->length_in_bytes;
+
+    VALUE result;
+    if (len == 0) {
+	result = rb_str_new2("\"\"");
+	goto bail;
     }
 
-    switch (TYPE(sub)) {
-      case T_REGEXP:
-	/* enc = rb_get_check(str, sub); */
-	if (RREGEXP(sub)->len) {
-	    pos = rb_reg_adjust_startpos(sub, str, pos, 1);
-	    pos = rb_reg_search(sub, str, pos, 1);
-	    pos = rb_str_sublen(str, pos);
+    // Allocate an UTF-8 string with a good initial capacity.
+    // Binary strings will likely have most bytes escaped.
+    const long result_init_len =
+	BINARY_ENC(str->encoding) ? (len * 5) + 2 : len + 2;
+    result = rb_unicode_str_new(NULL, result_init_len);
+
+#define GET_UCHAR(pos) \
+    ((uchars \
+      ? str->data.uchars[pos] : (unsigned char)str->data.bytes[pos]))
+
+    inspect_append(result, '"', false);
+    for (long i = 0; i < len; i++) {
+	const UChar c = GET_UCHAR(i);
+
+	bool print;
+	if (uchars) {
+	    print = iswprint(c);
 	}
-	if (pos >= 0) return LONG2NUM(pos);
-	break;
+	else { // ASCII printable characters
+	    print = ((c >= 0x20) && (c <= 0x7E));
+	}
+	if (print) {
+	    if (c == '"' || c == '\\') {
+		inspect_append(result, c, true);
+	    }
+	    else if (dump && c == '#' && i + 1 < len) {
+		const UChar c2 = GET_UCHAR(i + 1);
+		const bool need_escape = c2 == '$' || c2 == '@' || c2 == '{';
+		inspect_append(result, c, need_escape);
+	    }
+	    else {
+		inspect_append(result, c, false);
+	    }
+	}
+	else if (c == '\n') {
+	    inspect_append(result, 'n', true);
+	} 
+	else if (c == '\r') {
+	    inspect_append(result, 'r', true);
+	} 
+	else if (c == '\t') {
+	    inspect_append(result, 't', true);
+	} 
+	else if (c == '\f') {
+	    inspect_append(result, 'f', true);
+	}
+	else if (c == '\013') {
+	    inspect_append(result, 'v', true);
+	}
+	else if (c == '\010') {
+	    inspect_append(result, 'b', true);
+	}
+	else if (c == '\007') {
+	    inspect_append(result, 'a', true);
+	}
+	else if (c == 033) {
+	    inspect_append(result, 'e', true);
+	}
+	else {
+	    char buf[10];
+	    snprintf(buf, sizeof buf, "\\x%02X", c);
+	    char *p = buf;
+	    while (*p != '\0') {
+		inspect_append(result, *p, false);
+		p++;
+	    }
+	}
+    }
+    inspect_append(result, '"', false);
+   
+#undef GET_UCHAR
 
-      default:
-	StringValue(sub);
-	/* fall through */
-      case T_STRING:
-	pos = rb_str_rindex(str, sub, pos);
-	if (pos >= 0) return LONG2NUM(pos);
-	break;
+bail:
+    if (OBJ_TAINTED(str)) {
+	OBJ_TAINT(result);
     }
-    return Qnil;
+    return result; 
 }
 
+static VALUE
+rstr_inspect(VALUE self, SEL sel)
+{
+    return str_inspect(RSTR(self), false);
+}
+
 /*
  *  call-seq:
- *     str =~ obj   => fixnum or nil
+ *     str.dump   => new_str
  *  
- *  Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match
- *  against <i>str</i>,and returns the position the match starts, or 
- *  <code>nil</code> if there is no match. Otherwise, invokes
- *  <i>obj.=~</i>, passing <i>str</i> as an argument. The default
- *  <code>=~</code> in <code>Object</code> returns <code>false</code>.
- *     
- *     "cat o' 9 tails" =~ /\d/   #=> 7
- *     "cat o' 9 tails" =~ 9      #=> nil
+ *  Produces a version of <i>str</i> with all nonprinting characters replaced by
+ *  <code>\nnn</code> notation and all special characters escaped.
  */
 
 static VALUE
-rb_str_match(VALUE x, SEL sel, VALUE y)
+rstr_dump(VALUE self, SEL sel)
 {
-    switch (TYPE(y)) {
-      case T_STRING:
-	rb_raise(rb_eTypeError, "type mismatch: String given");
-
-      case T_REGEXP:
-	return rb_reg_match(y, x);
-
-      default:
-	return rb_funcall(y, rb_intern("=~"), 1, x);
-    }
+    VALUE res = str_inspect(RSTR(self), true);
+    *(VALUE *)res = *(VALUE *)self;
+    return res;
 }
 
-
-static VALUE get_pat(VALUE, int);
-
-
 /*
  *  call-seq:
  *     str.match(pattern)   => matchdata or nil
@@ -1336,17 +2507,41 @@
  *  The return value is a value from block execution in this case.
  */
 
-VALUE rb_reg_match_m(VALUE re, SEL sel, int argc, VALUE *argv);
+static VALUE
+get_pat(VALUE pat, bool quote)
+{
+    switch (TYPE(pat)) {
+	case T_REGEXP:
+	    return pat;
 
+	case T_STRING:
+	    break;
+
+	default:
+	    {
+		VALUE val = rb_check_string_type(pat);
+		if (NIL_P(val)) {
+		    Check_Type(pat, T_REGEXP);
+		}
+		pat = val;
+	    }
+    }
+
+    if (quote) {
+	pat = rb_reg_quote(pat);
+    }
+    return rb_reg_regcomp(pat);
+}
+
 static VALUE
-rb_str_match_m(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_match2(VALUE self, SEL sel, int argc, VALUE *argv)
 {
     if (argc < 1) {
 	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc);
     }
-    VALUE re = argv[0];
-    argv[0] = str;
-    VALUE result = rb_reg_match_m(get_pat(re, 0), 0, argc, argv);
+    VALUE re = get_pat(argv[0], false);
+    argv[0] = self;
+    VALUE result = regexp_match2(re, 0, argc, argv);
     if (!NIL_P(result) && rb_block_given_p()) {
 	return rb_yield(result);
     }
@@ -1355,567 +2550,603 @@
 
 /*
  *  call-seq:
- *     str.succ   => new_str
- *     str.next   => new_str
+ *     str =~ obj   => fixnum or nil
  *  
- *  Returns the successor to <i>str</i>. The successor is calculated by
- *  incrementing characters starting from the rightmost alphanumeric (or
- *  the rightmost character if there are no alphanumerics) in the
- *  string. Incrementing a digit always results in another digit, and
- *  incrementing a letter results in another letter of the same case.
- *  Incrementing nonalphanumerics uses the underlying character set's
- *  collating sequence.
+ *  Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match
+ *  against <i>str</i>,and returns the position the match starts, or 
+ *  <code>nil</code> if there is no match. Otherwise, invokes
+ *  <i>obj.=~</i>, passing <i>str</i> as an argument. The default
+ *  <code>=~</code> in <code>Object</code> returns <code>false</code>.
  *     
- *  If the increment generates a ``carry,'' the character to the left of
- *  it is incremented. This process repeats until there is no carry,
- *  adding an additional character if necessary.
- *     
- *     "abcd".succ        #=> "abce"
- *     "THX1138".succ     #=> "THX1139"
- *     "<<koala>>".succ   #=> "<<koalb>>"
- *     "1999zzz".succ     #=> "2000aaa"
- *     "ZZZ9999".succ     #=> "AAAA0000"
- *     "***".succ         #=> "**+"
+ *     "cat o' 9 tails" =~ /\d/   #=> 7
+ *     "cat o' 9 tails" =~ 9      #=> nil
  */
 
 static VALUE
-rb_str_succ(VALUE orig, SEL sel)
+rstr_match(VALUE self, SEL sel, VALUE other)
 {
-    UniChar *buf;
-    UniChar carry;
-    long i, len;
-    bool modified;
+    switch (TYPE(other)) {
+	case T_STRING:
+	    rb_raise(rb_eTypeError, "type mismatch: String given");
 
-    len = CFStringGetLength((CFStringRef)orig);
-    if (len == 0)
-	return orig;
+	case T_REGEXP:
+	    return regexp_match(other, 0, self);
 
-    buf = (UniChar *)alloca(sizeof(UniChar) * (len + 1));
-    buf++;
-    
-    CFStringGetCharacters((CFStringRef)orig, CFRangeMake(0, len), buf);
-    modified = false;
-    carry = 0;
-
-    for (i = len - 1; i >= 0; i--) {
-	UniChar c = buf[i];
-	if (iswdigit(c)) {
-	    modified = true;
-	    if (c != '9') {
-		buf[i]++;
-		carry = 0;
-		break;
-	    }
-	    else {
-		buf[i] = '0';
-		carry = '1';
-	    }
-	}
-	else if (iswalpha(c)) {
-	    bool lower = islower(c);
-	    UniChar e = lower ? 'z' : 'Z';
-	    modified = true;
-	    if (c != e) {
-		buf[i]++;
-		carry = 0;
-		break;
-	    }
-	    else {
-		carry = buf[i] = lower ? 'a' : 'A';
-	    }
-	}
+	default:
+	    return rb_vm_call(other, selEqTilde, 1, &self, false);
     }
-
-    if (!modified) {
-	buf[len-1]++;
-    }
-    else if (carry != 0) {
-	buf--;
-	*buf = carry;
-	len++;
-    }
-
-    CFMutableStringRef newstr;
-
-    newstr = CFStringCreateMutable(NULL, 0);
-    CFStringAppendCharacters(newstr, buf, len);
-    CFMakeCollectable(newstr);
-
-    return (VALUE)newstr;
 }
 
-
 /*
  *  call-seq:
- *     str.succ!   => str
- *     str.next!   => str
+ *     str.scan(pattern)                         => array
+ *     str.scan(pattern) {|match, ...| block }   => str
  *  
- *  Equivalent to <code>String#succ</code>, but modifies the receiver in
- *  place.
+ *  Both forms iterate through <i>str</i>, matching the pattern (which may be a
+ *  <code>Regexp</code> or a <code>String</code>). For each match, a result is
+ *  generated and either added to the result array or passed to the block. If
+ *  the pattern contains no groups, each individual result consists of the
+ *  matched string, <code>$&</code>.  If the pattern contains groups, each
+ *  individual result is itself an array containing one entry per group.
+ *     
+ *     a = "cruel world"
+ *     a.scan(/\w+/)        #=> ["cruel", "world"]
+ *     a.scan(/.../)        #=> ["cru", "el ", "wor"]
+ *     a.scan(/(...)/)      #=> [["cru"], ["el "], ["wor"]]
+ *     a.scan(/(..)(..)/)   #=> [["cr", "ue"], ["l ", "wo"]]
+ *     
+ *  And the block form:
+ *     
+ *     a.scan(/\w+/) {|w| print "<<#{w}>> " }
+ *     print "\n"
+ *     a.scan(/(.)(.)/) {|x,y| print y, x }
+ *     print "\n"
+ *     
+ *  <em>produces:</em>
+ *     
+ *     <<cruel>> <<world>>
+ *     rceu lowlr
  */
 
 static VALUE
-rb_str_succ_bang(VALUE str, SEL sel)
+rstr_scan(VALUE self, SEL sel, VALUE pat)
 {
-    rb_str_shared_replace(str, rb_str_succ(str, 0));
+    const bool block_given = rb_block_given_p();
 
-    return str;
+    pat = get_pat(pat, true);
+    long start = 0;
+
+    VALUE ary = 0;
+    if (!block_given) {
+	ary = rb_ary_new();
+    }
+
+    while (rb_reg_search(pat, self, start, false) >= 0) {
+	VALUE match = rb_backref_get();
+
+	int count = 0;
+	rb_match_result_t *results = rb_reg_match_results(match, &count);
+	assert(count > 0);
+
+	if (results[0].beg == results[0].end) {
+	    start = results[0].end + 1;
+	}
+	else {
+	    start = results[0].end;
+	}
+
+	VALUE scan_result;
+	if (count == 1) {
+	    scan_result = rb_reg_nth_match(0, match);
+	}
+	else {
+	    scan_result = rb_ary_new2(count);
+	    for (int i = 1; i < count; i++) {
+		rb_ary_push(scan_result, rb_reg_nth_match(i, match));
+	    }
+	}
+
+	if (block_given) {
+	    rb_match_busy(match);
+	    rb_yield(scan_result);
+	    rb_backref_set(match);
+	    RETURN_IF_BROKEN();
+	}
+	else {
+	    rb_ary_push(ary, scan_result);
+	}
+
+	rb_backref_set(match);
+    }
+
+    return block_given ? self : ary;
 }
 
-
 /*
  *  call-seq:
- *     str.upto(other_str, exclusive=false) {|s| block }   => str
+ *     str.split(pattern=$;, [limit])   => anArray
  *  
- *  Iterates through successive values, starting at <i>str</i> and
- *  ending at <i>other_str</i> inclusive, passing each value in turn to
- *  the block. The <code>String#succ</code> method is used to generate
- *  each value.  If optional second argument exclusive is omitted or is <code>false</code>,
- *  the last value will be included; otherwise it will be excluded.
+ *  Divides <i>str</i> into substrings based on a delimiter, returning an array
+ *  of these substrings.
  *     
- *     "a8".upto("b6") {|s| print s, ' ' }
- *     for s in "a8".."b6"
- *       print s, ' '
- *     end
+ *  If <i>pattern</i> is a <code>String</code>, then its contents are used as
+ *  the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
+ *  space, <i>str</i> is split on whitespace, with leading whitespace and runs
+ *  of contiguous whitespace characters ignored.
  *     
- *  <em>produces:</em>
+ *  If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
+ *  pattern matches. Whenever the pattern matches a zero-length string,
+ *  <i>str</i> is split into individual characters. If <i>pattern</i> contains
+ *  groups, the respective matches will be returned in the array as well.
  *     
- *     a8 a9 b0 b1 b2 b3 b4 b5 b6
- *     a8 a9 b0 b1 b2 b3 b4 b5 b6
+ *  If <i>pattern</i> is omitted, the value of <code>$;</code> is used.  If
+ *  <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
+ *  split on whitespace as if ` ' were specified.
+ *     
+ *  If the <i>limit</i> parameter is omitted, trailing null fields are
+ *  suppressed. If <i>limit</i> is a positive number, at most that number of
+ *  fields will be returned (if <i>limit</i> is <code>1</code>, the entire
+ *  string is returned as the only entry in an array). If negative, there is no
+ *  limit to the number of fields returned, and trailing null fields are not
+ *  suppressed.
+ *     
+ *     " now's  the time".split        #=> ["now's", "the", "time"]
+ *     " now's  the time".split(' ')   #=> ["now's", "the", "time"]
+ *     " now's  the time".split(/ /)   #=> ["", "now's", "", "the", "time"]
+ *     "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
+ *     "hello".split(//)               #=> ["h", "e", "l", "l", "o"]
+ *     "hello".split(//, 3)            #=> ["h", "e", "llo"]
+ *     "hi mom".split(%r{\s*})         #=> ["h", "i", "m", "o", "m"]
+ *     
+ *     "mellow yellow".split("ello")   #=> ["m", "w y", "w"]
+ *     "1,2,,3,4,,".split(',')         #=> ["1", "2", "", "3", "4"]
+ *     "1,2,,3,4,,".split(',', 4)      #=> ["1", "2", "", "3,4,,"]
+ *     "1,2,,3,4,,".split(',', -4)     #=> ["1", "2", "", "3", "4", "", ""]
  */
 
+static VALUE str_strip(VALUE str, int direction);
+
 static VALUE
-rb_str_upto(VALUE beg, SEL sel, int argc, VALUE *argv)
+rstr_split(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    VALUE end, exclusive;
-    VALUE current, after_end;
-    ID succ;
-    int n, excl;
+    const long len = str_length(RSTR(str), false);
+    int lim = 0;
 
-    rb_scan_args(argc, argv, "11", &end, &exclusive);
-    excl = RTEST(exclusive);
-    succ = rb_intern("succ");
-    StringValue(end);
-    if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
-	UniChar c = CFStringGetCharacterAtIndex((CFStringRef)beg, 0);
-	UniChar e = CFStringGetCharacterAtIndex((CFStringRef)end, 0);
-	
-	if (c > e || (excl && c == e)) 
-	    return beg;
-	for (;;) {
-	    CFMutableStringRef substr;
-	    substr = CFStringCreateMutable(NULL, 0);
-	    CFStringAppendCharacters(substr, &c, 1);
-	    CFMakeCollectable(substr);
-	    rb_yield((VALUE)substr);
-	    RETURN_IF_BROKEN();
-	    if (!excl && c == e) 
-		break;
-	    c++;
-	    if (excl && c == e) 
-		break;
+    VALUE spat, limit;
+    if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
+	lim = NUM2INT(limit);
+	if (lim <= 0) {
+	    limit = Qnil;
 	}
-	return beg;
+	else if (lim == 1) {
+	    if (len == 0) {
+		return rb_ary_new2(0);
+	    }
+	    return rb_ary_new3(1, str);
+	}
     }
-    n = rb_str_cmp(beg, end);
-    if (n > 0 || (excl && n == 0)) return beg;
-	
-    after_end = rb_funcall(end, succ, 0, 0);
-    current = beg;
-    while (!rb_str_equal(current, after_end)) {
-	rb_yield(current);
-	RETURN_IF_BROKEN();
-	if (!excl && rb_str_equal(current, end)) break;
-	current = rb_funcall(current, succ, 0, 0);
-	StringValue(current);
-	if (excl && rb_str_equal(current, end)) break;
-	if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
-	    break;
+
+    VALUE result = rb_ary_new();
+    if (len == 0) {
+	return result;
     }
 
-    return beg;
-}
-
-static VALUE
-rb_str_subpat(VALUE str, VALUE re, int nth)
-{
-    if (rb_reg_search(re, str, 0, 0) >= 0) {
-	return rb_reg_nth_match(nth, rb_backref_get());
+    bool awk_split = false, spat_string = false;
+    long spat_len = 0;
+    if (NIL_P(spat)) {
+	if (!NIL_P(rb_fs)) {
+	    spat = rb_fs;
+	    goto fs_set;
+	}
+	awk_split = true;
     }
-    return Qnil;
-}
+    else {
+fs_set:
+	if (TYPE(spat) == T_STRING) {
+	    spat_string = true;
+	    spat_len = rb_str_chars_len(spat);
+	    if (spat_len == 1 && rb_str_get_uchar(spat, 0) == ' ') {
+		awk_split = true;
+	    }
+	}
+	else {
+	    spat = get_pat(spat, true);
+	}
+    }
 
-static VALUE
-rb_str_aref(VALUE str, VALUE indx)
-{
-    long idx;
+    long beg = 0;
+    if (awk_split || spat_string) {
+	if (spat != Qnil) {
+	    if (spat_len == 0) {
+		do {
+		    VALUE substr = rstr_substr(str, beg, 1);
+		    rb_ary_push(result, substr);
+		    beg++;
+		    if (beg >= len) {
+			break;
+		    }
+		}
+		while (limit == Qnil || --lim > 1);
+	    }
+	    else {
+		rb_str_t *spat_str = str_need_string(spat);
+		do {
+		    const long pos = str_index_for_string(RSTR(str), spat_str,
+			    beg, -1, false, false);
+		    if (pos == -1) {
+			break;
+		    }
+		    rb_ary_push(result, rstr_substr(str, beg, pos - beg));
+		    beg = pos + 1;
+		}
+		while (limit == Qnil || --lim > 1);
+	    }
+	}
+	else {
+	    UChar *chars = NULL;
+	    long chars_len = 0;
+	    bool need_free = false;
 
-    switch (TYPE(indx)) {
-      case T_FIXNUM:
-	idx = FIX2LONG(indx);
+	    rb_str_get_uchars(str, &chars, &chars_len, &need_free);
 
-      num_index:
-	str = rb_str_substr(str, idx, 1);
-	if (!NIL_P(str) && RSTRING_LEN(str) == 0) return Qnil;
-	return str;
+	    for (long i = 0; i < chars_len; i++) {
+		UChar c = chars[i];
+		if (c == ' ' || c == '\t' || c == '\n') {
+		    VALUE substr = rstr_substr(str, beg, i - beg);
+		    str_strip(substr, 0);
+		    if (rb_str_chars_len(substr) > 0) {
+			rb_ary_push(result, substr); 
+		    }
+		    beg = i + 1;
+		}
+		if (limit != Qnil && --lim <= 0) {
+		    break;
+		}
+	    }
 
-      case T_REGEXP:
-	return rb_str_subpat(str, indx, 0);
+	    if (need_free) {
+		free(chars);
+	    }
+	}
+    }
+    else {
+	long start = beg;
+	bool last_null = false;
+	do {
+	    const long pos = rb_reg_search(spat, str, beg, false);
+	    if (pos < 0) {
+		break;
+	    }
+	    VALUE match = rb_backref_get();
 
-      case T_STRING:
-	if (rb_str_index(str, indx, 0) != -1)
-	    return rb_str_dup(indx);
-	return Qnil;
+	    int count = 0;
+	    rb_match_result_t *results = rb_reg_match_results(match, &count);
+	    assert(count > 0);
 
-      default:
-	/* check if indx is Range */
-	{
-	    long beg, len;
-	    VALUE tmp;
+	    if (beg == pos && results[0].beg == results[0].end) {
+		if (last_null) {
+		    if (beg + 1 <= len) {
+			rb_ary_push(result, rstr_substr(str, beg, 1));
+		    }
+		    beg = start;
+		}
+		else {
+		    start++;
+		    last_null = true;
+		    continue;
+		}
+	    }
+	    else {
+		rb_ary_push(result, rstr_substr(str, beg, pos - beg));
+		beg = results[0].end;
+	    }
+	    last_null = false;
 
-	    len = str_strlen(str, STR_ENC_GET(str));
-	    switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
-	      case Qfalse:
+	    for (int i = 1; i < count; i++) {
+		VALUE match_str = rb_reg_nth_match(i, match);
+		if (match_str != Qnil) {
+		    rb_ary_push(result, rb_reg_nth_match(i, match));
+		}
+	    }
+	}
+	while (limit == Qnil || --lim > 1);
+    }
+
+    if (len > 0 && (!NIL_P(limit) || len > beg || lim < 0)) {
+	VALUE tmp;
+	if (len == beg) {
+	    tmp = rb_str_new(NULL, 0);
+	}
+	else {
+	    tmp = rb_str_subseq(str, beg, len - beg);
+	}
+	rb_ary_push(result, tmp);
+    }
+
+    if (NIL_P(limit) && lim == 0) {
+	while (true) {
+	    const long n = RARRAY_LEN(result);
+	    if (n > 0 && rb_str_chars_len(RARRAY_AT(result, n - 1)) == 0) {
+		rb_ary_pop(result);
+	    }
+	    else {
 		break;
-	      case Qnil:
-		return Qnil;
-	      default:
-		tmp = rb_str_substr(str, beg, len);
-		return tmp;
 	    }
 	}
-	idx = NUM2LONG(indx);
-	goto num_index;
     }
-    return Qnil;		/* not reached */
+
+    return result;
 }
 
-
 /*
  *  call-seq:
- *     str[fixnum]                 => new_str or nil
- *     str[fixnum, fixnum]         => new_str or nil
- *     str[range]                  => new_str or nil
- *     str[regexp]                 => new_str or nil
- *     str[regexp, fixnum]         => new_str or nil
- *     str[other_str]              => new_str or nil
- *     str.slice(fixnum)           => new_str or nil
- *     str.slice(fixnum, fixnum)   => new_str or nil
- *     str.slice(range)            => new_str or nil
- *     str.slice(regexp)           => new_str or nil
- *     str.slice(regexp, fixnum)   => new_str or nil
- *     str.slice(other_str)        => new_str or nil
+ *     str.to_i(base=10)   => integer
  *  
- *  Element Reference---If passed a single <code>Fixnum</code>, returns a
- *  substring of one character at that position. If passed two <code>Fixnum</code>
- *  objects, returns a substring starting at the offset given by the first, and
- *  a length given by the second. If given a range, a substring containing
- *  characters at offsets given by the range is returned. In all three cases, if
- *  an offset is negative, it is counted from the end of <i>str</i>. Returns
- *  <code>nil</code> if the initial offset falls outside the string, the length
- *  is negative, or the beginning of the range is greater than the end.
+ *  Returns the result of interpreting leading characters in <i>str</i> as an
+ *  integer base <i>base</i> (between 2 and 36). Extraneous characters past the
+ *  end of a valid number are ignored. If there is not a valid number at the
+ *  start of <i>str</i>, <code>0</code> is returned. This method never raises an
+ *  exception.
  *     
- *  If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
- *  returned. If a numeric parameter follows the regular expression, that
- *  component of the <code>MatchData</code> is returned instead. If a
- *  <code>String</code> is given, that string is returned if it occurs in
- *  <i>str</i>. In both cases, <code>nil</code> is returned if there is no
- *  match.
- *     
- *     a = "hello there"
- *     a[1]                   #=> "e"
- *     a[1,3]                 #=> "ell"
- *     a[1..3]                #=> "ell"
- *     a[-3,2]                #=> "er"
- *     a[-4..-2]              #=> "her"
- *     a[12..-1]              #=> nil
- *     a[-2..-4]              #=> ""
- *     a[/[aeiou](.)\1/]      #=> "ell"
- *     a[/[aeiou](.)\1/, 0]   #=> "ell"
- *     a[/[aeiou](.)\1/, 1]   #=> "l"
- *     a[/[aeiou](.)\1/, 2]   #=> nil
- *     a["lo"]                #=> "lo"
- *     a["bye"]               #=> nil
+ *     "12345".to_i             #=> 12345
+ *     "99 red balloons".to_i   #=> 99
+ *     "0a".to_i                #=> 0
+ *     "0a".to_i(16)            #=> 10
+ *     "hello".to_i             #=> 0
+ *     "1100101".to_i(2)        #=> 101
+ *     "1100101".to_i(8)        #=> 294977
+ *     "1100101".to_i(10)       #=> 1100101
+ *     "1100101".to_i(16)       #=> 17826049
  */
 
 static VALUE
-rb_str_aref_m(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_to_i(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    if (argc == 2) {
-	if (TYPE(argv[0]) == T_REGEXP) {
-	    return rb_str_subpat(str, argv[0], NUM2INT(argv[1]));
-	}
-	return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
-    }
-    if (argc != 1) {
-	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc);
-    }
-    return rb_str_aref(str, argv[0]);
-}
+    int base = 10;
 
-static void
-rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
-{
-    rb_str_modify(str);
-    CFStringReplace((CFMutableStringRef)str, CFRangeMake(beg, len), 
-	(CFStringRef)val);
-}
+    if (argc > 0) {
+	VALUE b;
+	rb_scan_args(argc, argv, "01", &b);
 
-static void
-rb_str_splice(VALUE str, long beg, long len, VALUE val)
-{
-    long slen;
-
-    if (len < 0) {
-	rb_raise(rb_eIndexError, "negative length %ld", len);
-    }
-
-    StringValue(val);
-    rb_str_modify(str);
-    slen = CFStringGetLength((CFStringRef)str);
-
-    if (slen < beg) {
-out_of_range:
-	rb_raise(rb_eIndexError, "index %ld out of string", beg);
-    }
-    if (beg < 0) {
-	if (-beg > slen) {
-	    goto out_of_range;
+	base = NUM2INT(b);
+	if (base < 0) {
+	    rb_raise(rb_eArgError, "invalid radix %d", base);
 	}
-	beg += slen;
     }
-    if (slen < len || slen < beg + len) {
-	len = slen - beg;
-    }
-    rb_str_splice_0(str, beg, len, val);
 
-    if (OBJ_TAINTED(val)) {
-	OBJ_TAINT(str);
-    }
+    return rb_str_to_inum(str, base, Qfalse);
 }
 
-void
-rb_str_update(VALUE str, long beg, long len, VALUE val)
+/*
+ *  call-seq:
+ *     str.hex   => integer
+ *  
+ *  Treats leading characters from <i>str</i> as a string of hexadecimal digits
+ *  (with an optional sign and an optional <code>0x</code>) and returns the
+ *  corresponding number. Zero is returned on error.
+ *     
+ *     "0x0a".hex     #=> 10
+ *     "-1234".hex    #=> -4660
+ *     "0".hex        #=> 0
+ *     "wombat".hex   #=> 0
+ */
+
+static VALUE
+rstr_hex(VALUE str, SEL sel)
 {
-    rb_str_splice(str, beg, len, val);
+    return rb_str_to_inum(str, 16, Qfalse);
 }
 
-static void
-rb_str_subpat_set(VALUE str, VALUE re, int nth, VALUE val)
+/*
+ *  call-seq:
+ *     str.oct   => integer
+ *  
+ *  Treats leading characters of <i>str</i> as a string of octal digits (with an
+ *  optional sign) and returns the corresponding number.  Returns 0 if the
+ *  conversion fails.
+ *     
+ *     "123".oct       #=> 83
+ *     "-377".oct      #=> -255
+ *     "bad".oct       #=> 0
+ *     "0377bad".oct   #=> 255
+ */
+
+static VALUE
+rstr_oct(VALUE str, SEL sel)
 {
-    VALUE match;
-    long start, end, len;
-    struct re_registers *regs;
+    return rb_str_to_inum(str, -8, Qfalse);
+}
 
-    if (rb_reg_search(re, str, 0, 0) < 0) {
-	rb_raise(rb_eIndexError, "regexp not matched");
-    }
-    match = rb_backref_get();
-    regs = RMATCH_REGS(match);
-    if (nth >= regs->num_regs) {
-      out_of_range:
-	rb_raise(rb_eIndexError, "index %d out of regexp", nth);
-    }
-    if (nth < 0) {
-	if (-nth >= regs->num_regs) {
-	    goto out_of_range;
-	}
-	nth += regs->num_regs;
-    }
+/*
+ *  call-seq:
+ *     str.ord   => integer
+ *  
+ *  Return the <code>Integer</code> ordinal of a one-character string.
+ *     
+ *     "a".ord         #=> 97
+ */
 
-    start = BEG(nth);
-    if (start == -1) {
-	rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
+static VALUE
+rstr_ord(VALUE str, SEL sel)
+{
+    if (RSTR(str)->length_in_bytes == 0) {
+	rb_raise(rb_eArgError, "empty string");
     }
-    end = END(nth);
-    len = end - start;
-    StringValue(val);
-    rb_str_splice_0(str, start, len, val);
+    return INT2NUM(rb_str_get_uchar(str, 0));
 }
 
+/*
+ *  call-seq:
+ *     string.chr    ->  string
+ *
+ *  Returns a one-character string at the beginning of the string.
+ *
+ *     a = "abcde"
+ *     a.chr    #=> "a"
+ */
+
 static VALUE
-rb_str_aset(VALUE str, VALUE indx, VALUE val)
+rstr_chr(VALUE str, SEL sel)
 {
-    long idx, beg;
+    return rstr_substr(str, 0, 1);
+}
 
-    switch (TYPE(indx)) {
-	case T_FIXNUM:
-	    idx = FIX2LONG(indx);
-num_index:
-	    rb_str_splice(str, idx, 1, val);
-	    return val;
+/*
+ *  call-seq:
+ *     str.to_f   => float
+ *  
+ *  Returns the result of interpreting leading characters in <i>str</i> as a
+ *  floating point number. Extraneous characters past the end of a valid number
+ *  are ignored. If there is not a valid number at the start of <i>str</i>,
+ *  <code>0.0</code> is returned. This method never raises an exception.
+ *     
+ *     "123.45e1".to_f        #=> 1234.5
+ *     "45.67 degrees".to_f   #=> 45.67
+ *     "thx1138".to_f         #=> 0.0
+ */
 
-	case T_REGEXP:
-	    rb_str_subpat_set(str, indx, 0, val);
-	    return val;
-
-	case T_STRING:
-	    beg = rb_str_index(str, indx, 0);
-	    if (beg < 0) {
-		rb_raise(rb_eIndexError, "string not matched");
-	    }
-	    beg = rb_str_sublen(str, beg);
-	    rb_str_splice(str, beg, str_strlen(indx, 0), val);
-	    return val;
-
-	default:
-	    /* check if indx is Range */
-	    {
-		long beg, len;
-		if (rb_range_beg_len(indx, &beg, &len, str_strlen(str, 0), 2)) {
-		    rb_str_splice(str, beg, len, val);
-		    return val;
-		}
-	    }
-	    idx = NUM2LONG(indx);
-	    goto num_index;
-    }
+static VALUE
+rstr_to_f(VALUE str, SEL sel)
+{
+    return DOUBLE2NUM(rb_str_to_dbl(str, Qfalse));
 }
 
 /*
  *  call-seq:
- *     str[fixnum] = new_str
- *     str[fixnum, fixnum] = new_str
- *     str[range] = aString
- *     str[regexp] = new_str
- *     str[regexp, fixnum] = new_str
- *     str[other_str] = new_str
+ *     str.chomp!(separator=$/)   => str or nil
  *  
- *  Element Assignment---Replaces some or all of the content of <i>str</i>. The
- *  portion of the string affected is determined using the same criteria as
- *  <code>String#[]</code>. If the replacement string is not the same length as
- *  the text it is replacing, the string will be adjusted accordingly. If the
- *  regular expression or string is used as the index doesn't match a position
- *  in the string, <code>IndexError</code> is raised. If the regular expression
- *  form is used, the optional second <code>Fixnum</code> allows you to specify
- *  which portion of the match to replace (effectively using the
- *  <code>MatchData</code> indexing rules. The forms that take a
- *  <code>Fixnum</code> will raise an <code>IndexError</code> if the value is
- *  out of range; the <code>Range</code> form will raise a
- *  <code>RangeError</code>, and the <code>Regexp</code> and <code>String</code>
- *  forms will silently ignore the assignment.
+ *  Modifies <i>str</i> in place as described for <code>String#chomp</code>,
+ *  returning <i>str</i>, or <code>nil</code> if no modifications were made.
  */
 
 static VALUE
-rb_str_aset_m(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_chomp_bang(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    if (argc == 3) {
-	if (TYPE(argv[0]) == T_REGEXP) {
-	    rb_str_subpat_set(str, argv[0], NUM2INT(argv[1]), argv[2]);
+    VALUE rs;
+    if (rb_scan_args(argc, argv, "01", &rs) == 0) {
+	rs = rb_rs;
+    }
+    rstr_modify(str);
+    if (rs == Qnil) {
+	return Qnil;
+    }
+    StringValue(rs);
+
+    const long len = rb_str_chars_len(str);
+    if (len == 0) {
+	return Qnil;
+    }
+
+    const long rslen = rb_str_chars_len(rs);
+    long to_del = 0;
+
+    if (rs == rb_default_rs
+	|| rslen == 0
+	|| (rslen == 1 && rb_str_get_uchar(rs, 0) == '\n')) {
+	UChar c = str_get_uchar(RSTR(str), len - 1, false);
+	if (c == '\n') {
+	    to_del++;
+	    c = len > 1 ? str_get_uchar(RSTR(str), len - 2, false) : 0;
 	}
-	else {
-	    rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
+	if (c == '\r' && (rslen > 0 || to_del != 0)) {
+	    to_del++;
 	}
-	return argv[2];
     }
-    if (argc != 2) {
-	rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
+    else if (rslen <= len) {
+	if (str_index_for_string(RSTR(str), str_need_string(rs),
+		    len - rslen, -1, false, false) >= 0) {
+	    to_del += rslen;
+	}
     }
-    return rb_str_aset(str, argv[0], argv[1]);
+
+    if (to_del == 0) {
+	return Qnil;
+    }
+    str_delete(RSTR(str), len - to_del, to_del, false);
+    return str;
 }
 
 /*
  *  call-seq:
- *     str.insert(index, other_str)   => str
+ *     str.chomp(separator=$/)   => new_str
  *  
- *  Inserts <i>other_str</i> before the character at the given
- *  <i>index</i>, modifying <i>str</i>. Negative indices count from the
- *  end of the string, and insert <em>after</em> the given character.
- *  The intent is insert <i>aString</i> so that it starts at the given
- *  <i>index</i>.
+ *  Returns a new <code>String</code> with the given record separator removed
+ *  from the end of <i>str</i> (if present). If <code>$/</code> has not been
+ *  changed from the default Ruby record separator, then <code>chomp</code> also
+ *  removes carriage return characters (that is it will remove <code>\n</code>,
+ *  <code>\r</code>, and <code>\r\n</code>).
  *     
- *     "abcd".insert(0, 'X')    #=> "Xabcd"
- *     "abcd".insert(3, 'X')    #=> "abcXd"
- *     "abcd".insert(4, 'X')    #=> "abcdX"
- *     "abcd".insert(-3, 'X')   #=> "abXcd"
- *     "abcd".insert(-1, 'X')   #=> "abcdX"
+ *     "hello".chomp            #=> "hello"
+ *     "hello\n".chomp          #=> "hello"
+ *     "hello\r\n".chomp        #=> "hello"
+ *     "hello\n\r".chomp        #=> "hello\n"
+ *     "hello\r".chomp          #=> "hello"
+ *     "hello \n there".chomp   #=> "hello \n there"
+ *     "hello".chomp("llo")     #=> "he"
  */
 
 static VALUE
-rb_str_insert(VALUE str, SEL sel, VALUE idx, VALUE str2)
+rstr_chomp(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    long pos = NUM2LONG(idx);
-
-    if (pos == -1) {
-	return rb_str_append(str, str2);
-    }
-    else if (pos < 0) {
-	pos++;
-    }
-    rb_str_splice(str, pos, 0, str2);
+    str = rb_str_new3(str);
+    rstr_chomp_bang(str, 0, argc, argv);
     return str;
 }
 
-
 /*
  *  call-seq:
- *     str.slice!(fixnum)           => fixnum or nil
- *     str.slice!(fixnum, fixnum)   => new_str or nil
- *     str.slice!(range)            => new_str or nil
- *     str.slice!(regexp)           => new_str or nil
- *     str.slice!(other_str)        => new_str or nil
+ *     str.chop!   => str or nil
  *  
- *  Deletes the specified portion from <i>str</i>, and returns the portion
- *  deleted.
- *     
- *     string = "this is a string"
- *     string.slice!(2)        #=> "i"
- *     string.slice!(3..6)     #=> " is "
- *     string.slice!(/s.*t/)   #=> "sa st"
- *     string.slice!("r")      #=> "r"
- *     string                  #=> "thing"
+ *  Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
+ *  or <code>nil</code> if <i>str</i> is the empty string.  See also
+ *  <code>String#chomp!</code>.
  */
 
 static VALUE
-rb_str_slice_bang(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_chop_bang(VALUE str, SEL sel)
 {
-    VALUE result;
-    VALUE buf[3];
-    int i;
+    rstr_modify(str);
 
-    if (argc < 1 || 2 < argc) {
-	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1)", argc);
+    const long len = str_length(RSTR(str), false);
+    if (len == 0) {
+	return Qnil;
     }
-    for (i=0; i<argc; i++) {
-	buf[i] = argv[i];
+
+    long to_del = 1;
+    if (len >= 2 && rb_str_get_uchar(str, len - 1) == '\n'
+	    && rb_str_get_uchar(str, len - 2) == '\r') {
+	to_del++;
     }
-    rb_str_modify(str);
-    buf[i] = rb_str_new(0,0);
-    result = rb_str_aref_m(str, 0, argc, buf);
-    if (!NIL_P(result)) {
-	rb_str_aset_m(str, 0, argc+1, buf);
-    }
-    return result;
+
+    str_delete(RSTR(str), len - to_del, to_del, false);
+    return str;
 }
 
+/*
+ *  call-seq:
+ *     str.chop   => new_str
+ *  
+ *  Returns a new <code>String</code> with the last character removed.  If the
+ *  string ends with <code>\r\n</code>, both characters are removed. Applying
+ *  <code>chop</code> to an empty string returns an empty
+ *  string. <code>String#chomp</code> is often a safer alternative, as it leaves
+ *  the string unchanged if it doesn't end in a record separator.
+ *     
+ *     "string\r\n".chop   #=> "string"
+ *     "string\n\r".chop   #=> "string\n"
+ *     "string\n".chop     #=> "string"
+ *     "string".chop       #=> "strin"
+ *     "x".chop.chop       #=> ""
+ */
+
 static VALUE
-get_pat(VALUE pat, int quote)
+rstr_chop(VALUE str, SEL sel)
 {
-    VALUE val;
-
-    switch (TYPE(pat)) {
-      case T_REGEXP:
-	return pat;
-
-      case T_STRING:
-	break;
-
-      default:
-	val = rb_check_string_type(pat);
-	if (NIL_P(val)) {
-	    Check_Type(pat, T_REGEXP);
-	}
-	pat = val;
-    }
-
-    if (quote) {
-	pat = rb_reg_quote(pat);
-    }
-
-    return rb_reg_regcomp(pat);
+    str = rb_str_new3(str);
+    rstr_chop_bang(str, 0);
+    return str;
 }
 
-
 /*
  *  call-seq:
  *     str.sub!(pattern, replacement)          => str or nil
@@ -1927,14 +3158,121 @@
  */
 
 static VALUE
-rb_str_sub_bang(VALUE str, SEL sel, int argc, VALUE *argv)
+rb_reg_regsub(VALUE str, VALUE src, VALUE regexp, rb_match_result_t *results,
+	int results_count)
 {
+    VALUE val = 0;
+
+    UChar *str_chars = NULL;
+    long str_chars_len = 0;
+    bool str_chars_need_free = false;
+
+    rb_str_get_uchars(str, &str_chars, &str_chars_len,
+	    &str_chars_need_free);
+
+    UChar *src_chars = NULL;
+    long src_chars_len = 0;
+    bool src_chars_need_free = false;
+
+    rb_str_get_uchars(src, &src_chars, &src_chars_len,
+	    &src_chars_need_free);
+
+    long pos = 0;
+
+    for (long i = 0; i < str_chars_len; i++) {
+	UChar c = str_chars[i];
+	if (c != '\\') {
+	    continue;
+	}
+
+	if (val == 0) {
+	    val = rb_unicode_str_new(NULL, 0);
+	}
+	str_concat_uchars(RSTR(val), &str_chars[pos], i - pos);
+
+	i++;
+	if (i == str_chars_len) {
+	    break;
+	}
+	pos = i + 1;
+
+	int no = -1;
+	c = str_chars[i];
+	switch (c) {
+	    case '1': case '2': case '3':
+	    case '4': case '5': case '6':
+	    case '7': case '8': case '9':
+		no = c - '0';
+		break;
+
+	    case '0':
+	    case '&':
+		no = 0;
+		break;
+
+	    case '`':
+		str_concat_uchars(RSTR(val), src_chars, results[0].beg);
+		break;
+
+	    case '\'':
+		str_concat_uchars(RSTR(val), &src_chars[results[0].end],
+			src_chars_len - results[0].end);
+		break;
+
+	    case '+':
+		no = results_count - 1;
+		while (results[no].beg == -1 && no > 0) {
+		    no--;
+		}
+		if (no == 0) {
+		    no = -1;
+		}
+		break;
+
+	    case '\\':
+	    default:
+		str_append_uchar(RSTR(val), c);
+		break;
+	}
+
+	if (no >= 0) {
+	    if (no >= results_count) {
+		continue;
+	    }
+	    if (results[no].beg == -1) {
+		continue;
+	    }
+	    str_concat_uchars(RSTR(val), &src_chars[results[no].beg],
+		    results[no].end - results[no].beg);
+	}
+    }
+
+    if (str_chars_need_free) {
+	free(str_chars);
+    }
+    if (src_chars_need_free) {
+	free(src_chars);
+    }
+
+    if (val == 0) {
+	return str;
+    }
+
+    if (pos < str_chars_len) {
+	str_concat_uchars(RSTR(val), &str_chars[pos], str_chars_len - pos);
+    }
+    return val;
+}
+
+static VALUE
+rstr_sub_bang(VALUE str, SEL sel, int argc, VALUE *argv)
+{
     VALUE repl, hash = Qnil;
-    bool iter = false;
+    bool block_given = false;
     bool tainted = false;
 
     if (argc == 1 && rb_block_given_p()) {
-	iter = true;
+	block_given = true;
     }
     else if (argc == 2) {
 	repl = argv[1];
@@ -1950,32 +3288,36 @@
 	rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
     }
 
-    VALUE pat = get_pat(argv[0], 1);
-    if (rb_reg_search(pat, str, 0, 0) >= 0) {
+    VALUE pat = get_pat(argv[0], true);
+    if (rb_reg_search(pat, str, 0, false) >= 0) {
 	VALUE match = rb_backref_get();
-	struct re_registers *regs = RMATCH_REGS(match);
+	int count = 0;
+	rb_match_result_t *results = rb_reg_match_results(match, &count);
+	assert(count > 0);
 
-	if (iter || !NIL_P(hash)) {
-            if (iter) {
-                rb_match_busy(match);
-                repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
+	if (block_given || !NIL_P(hash)) {
+            if (block_given) {
+		rb_match_busy(match);
+		repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
             }
             else {
-                repl = rb_hash_aref(hash, rb_str_subseq(str, BEG(0),
-			    END(0) - BEG(0)));
+                repl = rb_hash_aref(hash, rstr_substr(str, results[0].beg,
+			    results[0].end - results[0].beg));
                 repl = rb_obj_as_string(repl);
             }
-	    str_frozen_check(str);
-	    if (iter) {
+	    rstr_frozen_check(str);
+	    if (block_given) {
 		rb_backref_set(match);
+		RETURN_IF_BROKEN();
 	    }
 	}
 	else {
-	    repl = rb_reg_regsub(repl, str, regs, pat);
+	    repl = rb_reg_regsub(repl, str, pat, results, count);
 	}
 
-	rb_str_modify(str);
-	rb_str_splice_0(str, BEG(0), END(0) - BEG(0), repl);
+	rstr_modify(str);
+	str_splice(RSTR(str), results[0].beg, results[0].end - results[0].beg,
+		str_need_string(repl), false);
 	if (OBJ_TAINTED(repl)) {
 	    tainted = true;
 	}
@@ -2020,24 +3362,33 @@
  */
 
 static VALUE
-rb_str_sub(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_sub(VALUE str, SEL sel, int argc, VALUE *argv)
 {
     str = rb_str_new3(str);
-    rb_str_sub_bang(str, 0, argc, argv);
+    rstr_sub_bang(str, 0, argc, argv);
     return str;
 }
 
+/*
+ *  call-seq:
+ *     str.gsub!(pattern, replacement)        => str or nil
+ *     str.gsub!(pattern) {|match| block }    => str or nil
+ *  
+ *  Performs the substitutions of <code>String#gsub</code> in place, returning
+ *  <i>str</i>, or <code>nil</code> if no substitutions were performed.
+ */
+
 static VALUE
 str_gsub(SEL sel, int argc, VALUE *argv, VALUE str, bool bang)
 {
-    bool iter = false;
+    bool block_given = false;
     bool tainted = false;
     VALUE hash = Qnil, repl = Qnil;
  
     switch (argc) {
 	case 1:
 	    RETURN_ENUMERATOR(str, argc, argv);
-	    iter = true;
+	    block_given = true;
 	    break;
 
 	case 2:
@@ -2057,92 +3408,72 @@
     }
 
     VALUE pat = get_pat(argv[0], 1);
-    long offset = 0;
-    long beg = rb_reg_search(pat, str, 0, 0);
-    if (beg < 0) {
-	if (bang) {
-	    return Qnil;	/* no match, no substitution */
+    VALUE dest = rb_str_new5(str, NULL, 0);
+    long offset = 0, last = 0;
+    bool changed = false;
+    const long len = str_length(RSTR(str), false);
+
+    while (true) {
+        const long pos = rb_reg_search(pat, str, offset, false);
+	if (pos < 0) {
+	    if (!changed) {
+		return bang ? Qnil : rstr_dup(str, 0);
+	    }
+	    if (last < len) {
+		str_concat_string(RSTR(dest),
+			RSTR(rstr_substr(str, last, len - last)));
+	    }
+	    break;
 	}
-	return rb_str_new3(str);
-    }
 
-    VALUE dest = rb_str_new5(str, NULL, 0);
-    long slen = RSTRING_LEN(str);
-    VALUE match;
+	VALUE match = rb_backref_get();
+	int count = 0;
+	rb_match_result_t *results = rb_reg_match_results(match, &count);
+	assert(count > 0);
 
-    do {
-	match = rb_backref_get();
-	struct re_registers *regs = RMATCH_REGS(match);
-        VALUE val;
-
-	if (iter || !NIL_P(hash)) {
-            if (iter) {
-                rb_match_busy(match);
-                val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
+	VALUE val;
+	if (block_given || !NIL_P(hash)) {
+            if (block_given) {
+		rb_match_busy(match);
+		val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
             }
             else {
-                val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0),
-			    END(0) - BEG(0)));
+                val = rb_hash_aref(hash, rstr_substr(str, results[0].beg,
+			    results[0].end - results[0].beg));
                 val = rb_obj_as_string(val);
             }
-	    str_mod_check(str, sp, slen);
-	    if (bang) {
-		str_frozen_check(str);
-	    }
-	    if (val == dest) { 	/* paranoid check [ruby-dev:24827] */
-		rb_raise(rb_eRuntimeError, "block should not cheat");
-	    }
-	    if (iter) {
+	    rstr_frozen_check(str);
+	    if (block_given) {
 		rb_backref_set(match);
 		RETURN_IF_BROKEN();
 	    }
 	}
 	else {
-	    val = rb_reg_regsub(repl, str, regs, pat);
+	    val = rb_reg_regsub(repl, str, pat, results, count);
 	}
 
+	if (pos - offset > 0) {
+	    str_concat_string(RSTR(dest),
+		    RSTR(rstr_substr(str, offset, pos - offset)));
+	}
+	str_concat_string(RSTR(dest), str_need_string(val));
 
 	if (OBJ_TAINTED(val)) {
 	    tainted = true;
 	}
+	changed = true;
 
-	long len = beg - offset;  /* copy pre-match substr */
-        if (len > 0) {
-	    rb_str_buf_append(dest, rb_str_subseq(str, offset, len));
-	    //rb_enc_str_buf_cat(dest, cp, len, str_enc);
-        }
+	offset = last = results[0].end;
+	if (results[0].beg == offset) {
+	    offset++;
+	}
 
-        rb_str_buf_append(dest, val);
-
-	offset = END(0);
-	if (BEG(0) == END(0)) {
-	    /*
-	     * Always consume at least one character of the input string
-	     * in order to prevent infinite loops.
-	     */
-	    if (slen <= END(0)) {
-		break;
-	    }
-	    len = 1;
-	    rb_str_buf_append(dest, rb_str_subseq(str, END(0), len));
-            //rb_enc_str_buf_cat(dest, sp+END(0), len, str_enc);
-	    offset = END(0) + len;
-	}
-	if (offset > slen) {
-	    break;
-	}
-	beg = rb_reg_search(pat, str, offset, 0);
+	rb_backref_set(match);
     }
-    while (beg >= 0);
 
-    if (slen > offset) {
-	rb_str_buf_append(dest, rb_str_subseq(str, offset, slen - offset));
-        //rb_enc_str_buf_cat(dest, cp, slen - offset, str_enc);
-    }
-    rb_backref_set(match);
     if (bang) {
-	rb_str_modify(str);
-	CFStringReplaceAll((CFMutableStringRef)str, (CFStringRef)dest);
+	rstr_modify(str);
+	str_replace(RSTR(str), dest);
     }
     else {
     	if (!tainted && OBJ_TAINTED(str)) {
@@ -2157,26 +3488,12 @@
     return str;
 }
 
-
-/*
- *  call-seq:
- *     str.gsub!(pattern, replacement)        => str or nil
- *     str.gsub!(pattern) {|match| block }    => str or nil
- *  
- *  Performs the substitutions of <code>String#gsub</code> in place, returning
- *  <i>str</i>, or <code>nil</code> if no substitutions were performed.
- */
-
 static VALUE
-rb_str_gsub_bang(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_gsub_bang(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    // XXX This rb_str_modify() call is disabled because it breaks mkmf.rb for
-    // a reason, must investigate.
-    //rb_str_modify(str);
     return str_gsub(sel, argc, argv, str, true);
 }
 
-
 /*
  *  call-seq:
  *     str.gsub(pattern, replacement)       => new_str
@@ -2211,463 +3528,81 @@
  */
 
 static VALUE
-rb_str_gsub(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_gsub(VALUE str, SEL sel, int argc, VALUE *argv)
 {
     return str_gsub(sel, argc, argv, str, false);
 }
 
-
 /*
  *  call-seq:
- *     str.replace(other_str)   => str
+ *     str.downcase!   => str or nil
  *  
- *  Replaces the contents and taintedness of <i>str</i> with the corresponding
- *  values in <i>other_str</i>.
- *     
- *     s = "hello"         #=> "hello"
- *     s.replace "world"   #=> "world"
+ *  Downcases the contents of <i>str</i>, returning <code>nil</code> if no
+ *  changes were made.
+ *  Note: case replacement is effective only in ASCII region.
  */
 
-static VALUE
-rb_str_replace_imp(VALUE str, SEL sel, VALUE str2)
-{
-    rb_str_modify(str);
-    if (str == str2) {
-	return str;
+#define CHAR_ITERATE(str, code) \
+    if (str_try_making_data_uchars(RSTR(str))) { \
+	for (long i = 0, count = BYTES_TO_UCHARS(RSTR(str)->length_in_bytes); \
+		i < count; i++) { \
+	    UChar __tmp, c; \
+	    __tmp = c = RSTR(str)->data.uchars[i]; \
+	    code; \
+	    if (__tmp != c) { \
+		RSTR(str)->data.uchars[i] = c; \
+	    } \
+	} \
+    } \
+    else { \
+	for (long i = 0, count = RSTR(str)->length_in_bytes; \
+		i < count; i++) { \
+	    char __tmp, c; \
+	    __tmp = c = RSTR(str)->data.bytes[i]; \
+	    code; \
+	    if (__tmp != c) { \
+		RSTR(str)->data.bytes[i] = c; \
+	    } \
+	} \
     }
-    StringValue(str2);
-    CFStringReplaceAll((CFMutableStringRef)str, (CFStringRef)str2);
-    if (OBJ_TAINTED(str2)) {
-	OBJ_TAINT(str);
-    }
-    return str;
-}
 
-VALUE
-rb_str_replace(VALUE str, VALUE str2)
-{
-    return rb_str_replace_imp(str, 0, str2);
-}
-
-/*
- *  call-seq:
- *     string.clear    ->  string
- *
- *  Makes string empty.
- *
- *     a = "abcde"
- *     a.clear    #=> ""
- */
-
 static VALUE
-rb_str_clear(VALUE str, SEL sel)
+rstr_downcase_bang(VALUE str, SEL sel)
 {
-    rb_str_modify(str);
-    CFStringDelete((CFMutableStringRef)str, 
-	CFRangeMake(0, CFStringGetLength((CFStringRef)str)));
-    return str;
-}
+    rstr_modify(str);
 
-/*
- *  call-seq:
- *     string.chr    ->  string
- *
- *  Returns a one-character string at the beginning of the string.
- *
- *     a = "abcde"
- *     a.chr    #=> "a"
- */
+    bool changed = false;
+    CHAR_ITERATE(str,
+	if (c >= 'A' && c <= 'Z') {
+	    c = 'a' + (c - 'A');
+	    changed = true; 
+	});
 
-static VALUE
-rb_str_chr(VALUE str, SEL sel)
-{
-    return rb_str_substr(str, 0, 1);
+    return changed ? str : Qnil;
 }
 
 /*
  *  call-seq:
- *     str.getbyte(index)          => 0 .. 255
- *
- *  returns the <i>index</i>th byte as an integer.
- */
-static VALUE
-rb_str_getbyte(VALUE str, SEL sel, VALUE index)
-{
-    if (*(VALUE *)str != rb_cByteString) {
-	rb_raise(rb_eArgError,
-		"#getbyte is only implemented for ByteString objects");
-    }
-
-    long pos = NUM2LONG(index);
-    const long n = rb_bytestring_length(str);
-    if (pos < 0) {
-	pos += n;
-    }
-    if (pos < 0 || pos >= n) {
-	return Qnil;
-    }
-    return INT2FIX(rb_bytestring_byte_pointer(str)[pos]);
-}
-
-/*
- *  call-seq:
- *     str.setbyte(index, int) => int
- *
- *  modifies the <i>index</i>th byte as <i>int</i>.
- */
-static VALUE
-rb_str_setbyte(VALUE str, SEL sel, VALUE index, VALUE value)
-{
-    if (*(VALUE *)str != rb_cByteString) {
-	rb_raise(rb_eArgError,
-		"#setbyte is only implemented for ByteString objects");
-    }
-
-    long pos = NUM2LONG(index);
-    const int byte = NUM2INT(value);
-    const long n = rb_bytestring_length(str);
-
-    rb_str_modify(str);
-
-    if (pos < -n || n <= pos) {
-        rb_raise(rb_eIndexError, "index %ld out of string", pos);
-    }
-    if (pos < 0) {
-        pos += n;
-    }
-
-    rb_bytestring_byte_pointer(str)[pos] = byte;
-
-    return value;
-}
-
-
-/*
- *  call-seq:
- *     str.reverse!   => str
+ *     str.downcase   => new_str
  *  
- *  Reverses <i>str</i> in place.
- */
-
-static VALUE
-rb_str_reverse_bang(VALUE str, SEL sel)
-{
-    rb_str_modify(str);
-
-    const long n = CFStringGetLength((CFStringRef)str);
-    if (n <= 1) {
-	return rb_str_dup(str);
-    }
-
-#if 1
-    // HACK
-    const char *ptr = CFStringGetCStringPtr((CFStringRef)str, 0);
-    if (ptr != NULL) {
-	char *beg = (char *)ptr;
-	char *end = (char *)&ptr[n - 1];
-	while (beg < end) {
-	    char c = *beg;
-	    *beg++ = *end;
-	    *end-- = c;
-	}
-	return str;
-    }
-    const UniChar *ptr2 = CFStringGetCharactersPtr((CFStringRef)str);
-    if (ptr2 != NULL) {
-	UniChar *beg = (UniChar *)ptr2;
-	UniChar *end = (UniChar *)&ptr2[n - 1];
-	while (beg < end) {
-	    UniChar c = *beg;
-	    *beg++ = *end;
-	    *end-- = c;
-	}
-	return str;
-    }
-#endif
- 
-    UniChar *buffer = (UniChar *)alloca(sizeof(UniChar) * n);
-    CFStringGetCharacters((CFStringRef)str, CFRangeMake(0, n), buffer);
-    UniChar *beg = buffer;
-    UniChar *end = &buffer[n - 1];
-    while (beg < end) {
-	UniChar c = *beg;
-	*beg++ = *end;
-	*end-- = c;
-    }
-
-#if 0
-    CFStringDelete((CFMutableStringRef)str, CFRangeMake(0, n));
-    CFStringAppendCharacters((CFMutableStringRef)str, (const UniChar *)buffer, n);
-#else
-    CFStringRef tmp = CFStringCreateWithCharactersNoCopy(kCFAllocatorMalloc,
-	    buffer, n, kCFAllocatorNull);
-    CFStringReplaceAll((CFMutableStringRef)str, tmp);
-    CFRelease(tmp);
-#endif
-
-    return str;
-}
-
-/*
- *  call-seq:
- *     str.reverse   => new_str
- *  
- *  Returns a new string with the characters from <i>str</i> in reverse order.
+ *  Returns a copy of <i>str</i> with all uppercase letters replaced with their
+ *  lowercase counterparts. The operation is locale insensitive---only
+ *  characters ``A'' to ``Z'' are affected.
+ *  Note: case replacement is effective only in ASCII region.
  *     
- *     "stressed".reverse   #=> "desserts"
+ *     "hEllO".downcase   #=> "hello"
  */
 
 static VALUE
-rb_str_reverse(VALUE str, SEL sel)
+rstr_downcase(VALUE str, SEL sel)
 {
-    VALUE obj = rb_str_dup(str);
-    rb_str_reverse_bang(obj, 0);
-    return obj;
-}
-
-/*
- *  call-seq:
- *     str.include? other_str   => true or false
- *     str.include? fixnum      => true or false
- *  
- *  Returns <code>true</code> if <i>str</i> contains the given string or
- *  character.
- *     
- *     "hello".include? "lo"   #=> true
- *     "hello".include? "ol"   #=> false
- *     "hello".include? ?h     #=> true
- */
-
-static VALUE
-rb_str_include(VALUE str, SEL sel, VALUE arg)
-{
-    long i;
-
-    StringValue(arg);
-    i = rb_str_index(str, arg, 0);
-
-    return (i == -1) ? Qfalse : Qtrue;
-}
-
-
-/*
- *  call-seq:
- *     str.to_i(base=10)   => integer
- *  
- *  Returns the result of interpreting leading characters in <i>str</i> as an
- *  integer base <i>base</i> (between 2 and 36). Extraneous characters past the
- *  end of a valid number are ignored. If there is not a valid number at the
- *  start of <i>str</i>, <code>0</code> is returned. This method never raises an
- *  exception.
- *     
- *     "12345".to_i             #=> 12345
- *     "99 red balloons".to_i   #=> 99
- *     "0a".to_i                #=> 0
- *     "0a".to_i(16)            #=> 10
- *     "hello".to_i             #=> 0
- *     "1100101".to_i(2)        #=> 101
- *     "1100101".to_i(8)        #=> 294977
- *     "1100101".to_i(10)       #=> 1100101
- *     "1100101".to_i(16)       #=> 17826049
- */
-
-static VALUE
-rb_str_to_i(VALUE str, SEL sel, int argc, VALUE *argv)
-{
-    int base;
-
-    if (argc == 0) base = 10;
-    else {
-	VALUE b;
-
-	rb_scan_args(argc, argv, "01", &b);
-	base = NUM2INT(b);
-    }
-    if (base < 0) {
-	rb_raise(rb_eArgError, "invalid radix %d", base);
-    }
-    return rb_str_to_inum(str, base, Qfalse);
-}
-
-
-/*
- *  call-seq:
- *     str.to_f   => float
- *  
- *  Returns the result of interpreting leading characters in <i>str</i> as a
- *  floating point number. Extraneous characters past the end of a valid number
- *  are ignored. If there is not a valid number at the start of <i>str</i>,
- *  <code>0.0</code> is returned. This method never raises an exception.
- *     
- *     "123.45e1".to_f        #=> 1234.5
- *     "45.67 degrees".to_f   #=> 45.67
- *     "thx1138".to_f         #=> 0.0
- */
-
-static VALUE
-rb_str_to_f(VALUE str, SEL sel)
-{
-    return DOUBLE2NUM(rb_str_to_dbl(str, Qfalse));
-}
-
-
-/*
- *  call-seq:
- *     str.to_s     => str
- *     str.to_str   => str
- *  
- *  Returns the receiver.
- */
-
-static VALUE
-rb_str_to_s(VALUE str, SEL sel)
-{
-    if (!rb_objc_str_is_pure(str) && *(VALUE *)str != rb_cByteString) {
-	VALUE dup = str_alloc(rb_cString);
-	CFStringReplaceAll((CFMutableStringRef)dup, (CFStringRef)str);
-	if (OBJ_TAINTED(str)) {
-	    OBJ_TAINT(dup);
-	}
-	return dup;
-    }
+    str = rb_str_new3(str);
+    rstr_downcase_bang(str, 0);
     return str;
 }
 
-#if 0
-static void
-str_cat_char(VALUE str, int c, rb_encoding *enc)
-{
-    char buf[2];
-    buf[0] = (char)c;
-    buf[1] = '\0';
-    CFStringAppendCString((CFMutableStringRef)str, buf, kCFStringEncodingUTF8);
-}
-
-static void
-prefix_escape(VALUE str, int c, rb_encoding *enc)
-{
-    str_cat_char(str, '\\', enc);
-    str_cat_char(str, c, enc);
-}
-#endif
-
 /*
- * call-seq:
- *   str.inspect   => string
- *
- * Returns a printable version of _str_, surrounded by quote marks,
- * with special characters escaped.
- *
- *    str = "hello"
- *    str[3] = "\b"
- *    str.inspect       #=> "\"hel\\bo\""
- */
-
-static inline void
-__append(CFMutableStringRef out, UniChar c)
-{
-    CFStringAppendCharacters(out, &c, 1);
-}
-
-static inline void
-__append_escape(CFMutableStringRef out, UniChar c)
-{
-    __append(out, '\\');
-    __append(out, c);
-}
-
-static VALUE
-__rb_str_inspect(VALUE str, bool dump)
-{
-    const long len = CFStringGetLength((CFStringRef)str);
-    CFStringInlineBuffer buf; 
-    CFStringInitInlineBuffer((CFStringRef)str, &buf, CFRangeMake(0, len));
-
-    CFMutableStringRef out = CFStringCreateMutable(NULL, 0);
-    __append(out, '"');
-
-    for (long i = 0; i < len; i++) {
-	UniChar c = CFStringGetCharacterFromInlineBuffer(&buf, i);
-	if (iswprint(c)) {
-	    if (c == '"'|| c == '\\') {
-		__append_escape(out, c);
-	    }
-	    else if (dump && c == '#' && i + 1 < len) {
-		UniChar c2 = CFStringGetCharacterFromInlineBuffer(&buf, i + 1);
-		if (c2 == '$' || c2 == '@' || c2 == '{') {
-		    __append_escape(out, c);
-		}
-		else {
-		    __append(out, c);
-		}
-	    }
-	    else {
-		__append(out, c);
-	    }
-	}
-	else if (c == '\n') {
-	    __append_escape(out, 'n');
-	}
-	else if (c == '\r') {
-	    __append_escape(out, 'r');
-	}
-	else if (c == '\t') {
-	    __append_escape(out, 't');
-	}
-	else if (c == '\f') {
-	    __append_escape(out, 'f');
-	}
-	else if (c == '\013') {
-	    __append_escape(out, 'v');
-	}
-	else if (c == '\010') {
-	    __append_escape(out, 'b');
-	}
-	else if (c == '\007') {
-	    __append_escape(out, 'a');
-	}
-	else if (c == 033) {
-	    __append_escape(out, 'e');
-	}
-	else {
-	    CFStringAppendFormat(out, NULL, CFSTR("\\x%02X"), c);
-	}
-    }
-    __append(out, '"');
-
-    VALUE res = (VALUE)CFMakeCollectable(out);
-    if (OBJ_TAINTED(str)) {
-	OBJ_TAINT(res);
-    }
-    return res;
-}
-
-VALUE
-rb_str_inspect(VALUE str, SEL sel)
-{
-    return __rb_str_inspect(str, false);
-}
-
-/*
  *  call-seq:
- *     str.dump   => new_str
- *  
- *  Produces a version of <i>str</i> with all nonprinting characters replaced by
- *  <code>\nnn</code> notation and all special characters escaped.
- */
-
-static VALUE
-rb_str_dump(VALUE str, SEL sel)
-{
-    VALUE res = __rb_str_inspect(str, true);
-    if (*(VALUE *)str != rb_cByteString) {
-	*(VALUE *)res = *(VALUE *)str;
-    }
-    return res;
-}
-
-/*
- *  call-seq:
  *     str.upcase!   => str or nil
  *  
  *  Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
@@ -2676,19 +3611,20 @@
  */
 
 static VALUE
-rb_str_upcase_bang(VALUE str, SEL sel)
+rstr_upcase_bang(VALUE str, SEL sel)
 {
-    CFHashCode h;
-    rb_str_modify(str);
-    h = CFHash((CFTypeRef)str);
-    CFStringUppercase((CFMutableStringRef)str, NULL);
-    if (h == CFHash((CFTypeRef)str)) {
-	return Qnil;
-    }
-    return str;
+    rstr_modify(str);
+
+    bool changed = false;
+    CHAR_ITERATE(str,
+	if (c >= 'a' && c <= 'z') {
+	    c = 'A' + (c - 'a');
+	    changed = true; 
+	});
+
+    return changed ? str : Qnil;
 }
 
-
 /*
  *  call-seq:
  *     str.upcase   => new_str
@@ -2702,58 +3638,61 @@
  */
 
 static VALUE
-rb_str_upcase(VALUE str, SEL sel)
+rstr_upcase(VALUE str, SEL sel)
 {
     str = rb_str_new3(str);
-    rb_str_upcase_bang(str, 0);
+    rstr_upcase_bang(str, 0);
     return str;
 }
 
-
 /*
- *  call-seq:
- *     str.downcase!   => str or nil
+ *  call-seq: 
+ *     str.swapcase!   => str or nil
  *  
- *  Downcases the contents of <i>str</i>, returning <code>nil</code> if no
- *  changes were made.
- *  Note: case replacement is effective only in ASCII region.
+ *  Equivalent to <code>String#swapcase</code>, but modifies the receiver in
+ *  place, returning <i>str</i>, or <code>nil</code> if no changes were made.
+ *  Note: case conversion is effective only in ASCII region.
  */
 
 static VALUE
-rb_str_downcase_bang(VALUE str, SEL sel)
+rstr_swapcase_bang(VALUE str, SEL sel)
 {
-    CFHashCode h;
-    rb_str_modify(str);
-    h = CFHash((CFTypeRef)str);
-    CFStringLowercase((CFMutableStringRef)str, NULL);
-    if (h == CFHash((CFTypeRef)str)) {
-	return Qnil;
-    }
-    return str;
+    rstr_modify(str);
+
+    bool changed = false;
+    CHAR_ITERATE(str,
+	if (c >= 'A' && c <= 'Z') {
+	    c = 'a' + (c - 'A');
+	    changed = true; 
+	}
+        else if (c >= 'a' && c <= 'z') {
+	    c = 'A' + (c - 'a');
+	    changed = true;
+	});
+
+    return changed ? str : Qnil;
 }
 
-
 /*
  *  call-seq:
- *     str.downcase   => new_str
+ *     str.swapcase   => new_str
  *  
- *  Returns a copy of <i>str</i> with all uppercase letters replaced with their
- *  lowercase counterparts. The operation is locale insensitive---only
- *  characters ``A'' to ``Z'' are affected.
- *  Note: case replacement is effective only in ASCII region.
+ *  Returns a copy of <i>str</i> with uppercase alphabetic characters converted
+ *  to lowercase and lowercase characters converted to uppercase.
+ *  Note: case conversion is effective only in ASCII region.
  *     
- *     "hEllO".downcase   #=> "hello"
+ *     "Hello".swapcase          #=> "hELLO"
+ *     "cYbEr_PuNk11".swapcase   #=> "CyBeR_pUnK11"
  */
 
 static VALUE
-rb_str_downcase(VALUE str, SEL sel)
+rstr_swapcase(VALUE str, SEL sel)
 {
     str = rb_str_new3(str);
-    rb_str_downcase_bang(str, 0);
+    rstr_swapcase_bang(str, 0);
     return str;
 }
 
-
 /*
  *  call-seq:
  *     str.capitalize!   => str or nil
@@ -2769,41 +3708,26 @@
  */
 
 static VALUE
-rb_str_capitalize_bang(VALUE str, SEL sel)
+rstr_capitalize_bang(VALUE str, SEL sel)
 {
-    CFStringRef tmp;
-    long i, n;
-    bool changed;
-    UniChar *buffer;
+    rstr_modify(str);
 
-    rb_str_modify(str);
-    n = CFStringGetLength((CFStringRef)str);
-    if (n == 0) {
-	return Qnil;
-    }
-    buffer = (UniChar *)alloca(sizeof(UniChar) * n);
-    CFStringGetCharacters((CFStringRef)str, CFRangeMake(0, n), buffer);
-    changed = false;
-    if (iswascii(buffer[0]) && iswlower(buffer[0])) {
-	buffer[0] = towupper(buffer[0]);
-	changed = true;
-    }
-    for (i = 1; i < n; i++) {
-	if (iswascii(buffer[0]) && iswupper(buffer[i])) {
-	    buffer[i] = towlower(buffer[i]);
-	    changed = true;
+    bool changed = false;
+    CHAR_ITERATE(str,
+        if (i == 0) {
+	    if (c >= 'a' && c <= 'z') {
+		c = 'A' + (c - 'a');
+		changed = true;
+	    }
 	}
-    }
-    if (!changed) {
-	return Qnil;
-    }
-    tmp = CFStringCreateWithCharacters(NULL, buffer, n);
-    CFStringReplaceAll((CFMutableStringRef)str, tmp);
-    CFRelease(tmp);
-    return str;
+	else if (c >= 'A' && c <= 'Z') {
+	    c = 'a' + (c - 'A');
+	    changed = true; 
+	});
+
+    return changed ? str : Qnil;
 }
 
-
 /*
  *  call-seq:
  *     str.capitalize   => new_str
@@ -2818,938 +3742,287 @@
  */
 
 static VALUE
-rb_str_capitalize(VALUE str, SEL sel)
+rstr_capitalize(VALUE str, SEL sel)
 {
     str = rb_str_new3(str);
-    rb_str_capitalize_bang(str, 0);
+    rstr_capitalize_bang(str, 0);
     return str;
 }
 
-
 /*
- *  call-seq: 
-*     str.swapcase!   => str or nil
+ *  call-seq:
+ *     str.ljust(integer, padstr=' ')   => new_str
  *  
- *  Equivalent to <code>String#swapcase</code>, but modifies the receiver in
- *  place, returning <i>str</i>, or <code>nil</code> if no changes were made.
- *  Note: case conversion is effective only in ASCII region.
+ *  If <i>integer</i> is greater than the length of <i>str</i>, returns a new
+ *  <code>String</code> of length <i>integer</i> with <i>str</i> left justified
+ *  and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
+ *     
+ *     "hello".ljust(4)            #=> "hello"
+ *     "hello".ljust(20)           #=> "hello               "
+ *     "hello".ljust(20, '1234')   #=> "hello123412341234123"
  */
 
-static VALUE
-rb_str_swapcase_bang(VALUE str, SEL sel)
+static void
+rstr_justify_part(rb_str_t *str, rb_str_t *pad, long width, long padwidth,
+	long index)
 {
-    CFIndex i, n;
-    UniChar *buffer;
-    bool changed;
-
-    rb_str_modify(str);
-
-    n = CFStringGetLength((CFStringRef)str);
-    if (n == 0) {
-	return Qnil;
-    }
-   
-    buffer = (UniChar *)CFStringGetCharactersPtr((CFStringRef)str);
-    if (buffer == NULL) {
-	buffer = (UniChar *)alloca(sizeof(UniChar) * n);
-    	CFStringGetCharacters((CFStringRef)str, CFRangeMake(0, n), buffer);
-    }
-    for (i = 0, changed = false; i < n; i++) {
-	UniChar c = buffer[i];
-	if (!iswascii(c)) {
-	    continue;
+    do {
+	if (padwidth > width) {
+	    pad = RSTR(rstr_substr((VALUE)pad, 0, width));
 	}
-	if (iswlower(c)) {
-	    c = towupper(c);
-	}
-	else if (iswupper(c)) {
-	    c = towlower(c);
-	}
-	else {
-	    continue;
-	}
-	changed = true;
-	buffer[i] = c;
+	str_insert(str, index, pad, false);
+	width -= padwidth;
+	index += padwidth;
     }
-    if (!changed) {
-	return Qnil;
-    }
-    CFStringDelete((CFMutableStringRef)str, CFRangeMake(0, n));
-    CFStringAppendCharacters((CFMutableStringRef)str,
-	    (const UniChar *)buffer, n);
-    return str;
+    while (width > 0);
 }
 
-
-/*
- *  call-seq:
- *     str.swapcase   => new_str
- *  
- *  Returns a copy of <i>str</i> with uppercase alphabetic characters converted
- *  to lowercase and lowercase characters converted to uppercase.
- *  Note: case conversion is effective only in ASCII region.
- *     
- *     "Hello".swapcase          #=> "hELLO"
- *     "cYbEr_PuNk11".swapcase   #=> "CyBeR_pUnK11"
- */
-
 static VALUE
-rb_str_swapcase(VALUE str, SEL sel)
+rstr_justify(int argc, VALUE *argv, VALUE str, char mode)
 {
-    str = rb_str_new3(str);
-    rb_str_swapcase_bang(str, 0);
-    return str;
-}
+    VALUE w, pad;
+    rb_scan_args(argc, argv, "11", &w, &pad);
 
-typedef void str_charset_find_cb
-(CFRange *, const CFRange *, CFStringRef, UniChar, void *);
-
-static void
-str_charset_find(CFStringRef str, VALUE *charsets, int charset_count,
-		 bool squeeze_mode, str_charset_find_cb *cb, void *ctx)
-{
-    int i;
-    long n;
-    CFMutableCharacterSetRef charset;
-    CFRange search_range, result_range; 
-
-    if (charset_count == 0)
-	return;
-
-    n = CFStringGetLength((CFStringRef)str);
-    if (n == 0)
-    	return;
-
-    charset = NULL;
-    for (i = 0; i < charset_count; i++) {
-	VALUE s = charsets[i];
-	bool exclude;
-	const char *sptr, *p;
-
-	StringValue(s);
-
-	sptr = RSTRING_PTR(s);
-	exclude = sptr[0] == '^';
-
-	p = NULL;
-	if (exclude || (p = strchr(sptr, '-')) != NULL) {
-	    CFMutableCharacterSetRef subset;
-	    const char *b, *e;
-
-	    b = exclude ? sptr + 1 : sptr;
-	    e = sptr + strlen(sptr) - 1;
-	    subset = CFCharacterSetCreateMutable(NULL);
-	    if (p == NULL) {
-		p = strchr(b, '-');
-	    }
-	    while (p != NULL) {
-		if (p > b && *(p - 1) != '\\' && *(p + 1) != '\0') {
-		    CFCharacterSetAddCharactersInRange(subset,
-			    CFRangeMake(*(p - 1), *(p + 1) - *(p - 1) + 1));
-		}
-		if (p > b) {
-		    CFStringRef substr;
-		    substr = CFStringCreateWithBytes(NULL,
-			    (const UInt8 *)b,
-			    (CFIndex)p - (CFIndex)b,
-			    kCFStringEncodingUTF8,
-			    false);
-		    assert(substr != NULL);
-		    CFCharacterSetAddCharactersInString(subset, substr);
-		    CFRelease(substr);
-		}
-		if (p == b) {
-		    p = NULL; 
-		}
-		else {
-		    b = p + 2;
-		    p = strchr(b, '-');
-		}
-	    }
-	    if (b <= e) {
-		CFStringRef substr;
-		substr = CFStringCreateWithBytes(NULL,
-			(const UInt8 *)b,
-			(CFIndex)e - (CFIndex)b + 1,
-			kCFStringEncodingUTF8,
-			false);
-		assert(substr != NULL);
-		CFCharacterSetAddCharactersInString(subset, substr);
-		CFRelease(substr);
-	    }
-
-	    if (exclude) {
-		CFCharacterSetInvert(subset);
-	    }
-
-	    if (charset == NULL) {
-		charset = subset;
-	    }
-	    else {
-		CFCharacterSetIntersect(charset, subset);
-		CFRelease(subset);
-	    }
-	}
-	else {
-	    if (charset == NULL) {
-		charset = CFCharacterSetCreateMutable(NULL);
-		CFCharacterSetAddCharactersInString(charset, (CFStringRef)s);
-	    }
-	    else {
-		CFCharacterSetRef subset;
-		subset = CFCharacterSetCreateWithCharactersInString(NULL,
-			(CFStringRef)s);
-		CFCharacterSetIntersect(charset, subset);
-		CFRelease(subset);	
-	    }
-	}
+    if (NIL_P(pad)) {
+	pad = rb_str_new(" ", 1);
     }
+    else {
+	StringValue(pad);
+    }
 
-    search_range = CFRangeMake(0, n);
-#if 0 
-    while (search_range.length != 0 
-	    && CFStringFindCharacterFromSet(
-		(CFStringRef)str,
-		(CFCharacterSetRef)charset,
-		search_range,
-		0,
-		&result_range)) {
-	(*cb)(&search_range, (const CFRange *)&result_range, str, ctx);
+    rb_str_t *padstr = str_need_string(pad);
+    const long padwidth = str_length(RSTR(padstr), false);
+    if (padwidth == 0) {
+	rb_raise(rb_eArgError, "zero width padding");
     }
-#else
-    CFStringInlineBuffer buf;
-    UniChar previous_char = 0;
-    CFStringInitInlineBuffer((CFStringRef)str, &buf, search_range);
-    do {
-        long i;
-	bool mutated = false;
 
-	if (search_range.location + search_range.length < n) {
-	    n = search_range.location + search_range.length;
-	    CFStringInitInlineBuffer((CFStringRef)str, &buf, CFRangeMake(0, n));
-	}
-
-	result_range.length = 0;
-
-	for (i = search_range.location;
-	     i < search_range.location + search_range.length; 
-	     i++) {
-
-	    UniChar c;
-
-	    c = CFStringGetCharacterFromInlineBuffer(&buf, i);
-	    if (CFCharacterSetIsCharacterMember((CFCharacterSetRef)charset, 
-						c)) {
-		if (result_range.length == 0) {
-		    result_range.location = i;
-		    result_range.length = 1;
-		    previous_char = c;
-		}
-		else {
-		    if (result_range.location + result_range.length == i
-			&& (!squeeze_mode || previous_char == c)) {
-			result_range.length++;
-		    }
-		    else {
-			(*cb)(&search_range, (const CFRange *)&result_range, 
-			    str, previous_char, ctx);
-			result_range.location = i;
-			result_range.length = 1;
-			previous_char = c;
-			if (search_range.location + search_range.length < n) {
-			    result_range.location -= n 
-				- (search_range.location + search_range.length);
-			    mutated = true;
-			    break;
-			}
-		    }
-		}
-	    }
-	}
-	if (!mutated) {
-	    if (result_range.length != 0) {
-		(*cb)(&search_range, (const CFRange *)&result_range, str, 
-			previous_char, ctx);
-		result_range.length = 0;
-		previous_char = 0;
-	    }
-	}
+    const long len = str_length(RSTR(str), false);
+    long width = NUM2LONG(w);
+    str = rb_str_new3(str);
+    if (width < 0 || width <= len) {
+	return str;
     }
-    while (search_range.length != 0 && result_range.length != 0); 
-#endif
+    width -= len;
 
-    CFRelease(charset);	
-}
-
-struct tr_trans_cb_ctx {
-    VALUE orepl;
-    const char *src;
-    long src_len;
-    const char *repl;
-    long repl_len;
-    int sflag;
-    bool changed;
-    CFStringRef opt;
-};
-
-static inline void
-trans_replace(CFMutableStringRef str, const CFRange *result_range, 
-	      CFStringRef substr, CFRange *search_range, int sflag)
-{
-    assert(result_range->location + result_range->length 
-	<= CFStringGetLength((CFStringRef)str));
-    if (sflag == 0) {
-	long n;
-	for (n = result_range->location; 
-	     n < result_range->location + result_range->length; 
-	     n++)
-	    CFStringReplace(str, CFRangeMake(n, 1), substr);
+    if (mode == 'c') {
+	rstr_justify_part(RSTR(str), padstr, ceil(width / 2.0), padwidth, len);
+	rstr_justify_part(RSTR(str), padstr, floor(width / 2.0), padwidth, 0);
     }
+    else if (mode == 'l') {
+	rstr_justify_part(RSTR(str), padstr, width, padwidth, len);
+    }
+    else if (mode == 'r') {
+	rstr_justify_part(RSTR(str), padstr, width, padwidth, 0);
+    }
     else {
-	CFStringReplace(str, *result_range, substr);
-	search_range->location = result_range->location + 1;
-	search_range->length = RSTRING_LEN(str) - search_range->location;
-    }	    
-}
+	rb_bug("invalid mode");
+    }
 
-static void
-rb_str_trans_cb(CFRange *search_range, const CFRange *result_range, 
-    CFStringRef str, UniChar character, void *ctx)
-{
-    struct tr_trans_cb_ctx *_ctx;
-
-    _ctx = (struct tr_trans_cb_ctx *)ctx;
-    if (_ctx->repl_len == 0) {
-	CFStringDelete((CFMutableStringRef)str, *result_range);
-	search_range->length -= result_range->length 
-	    + (result_range->location - search_range->location);
-	search_range->location = result_range->location;
+    if (OBJ_TAINTED(pad)) {
+	OBJ_TAINT(str);
     }
-    else if (_ctx->repl_len == 1) {
-	trans_replace((CFMutableStringRef)str, result_range, 
-	    (CFStringRef)_ctx->orepl, search_range, _ctx->sflag);
-    }
-    else if (_ctx->repl_len > 1) {
-	if (_ctx->src_len == 1) {
-	    if (_ctx->opt == NULL) {
-		_ctx->opt = CFStringCreateWithBytes(NULL, 
-		    (const UInt8 *)_ctx->repl, 1, kCFStringEncodingUTF8,
-		    false);
-	    }
-	    trans_replace((CFMutableStringRef)str, result_range, 
-	        (CFStringRef)_ctx->opt, search_range, _ctx->sflag);
-	}
-	else {
-	    /* TODO: support all syntaxes */
-	    char sb, se, rb, re;
-	    bool s_is_range, r_is_range;
-	    CFStringRef substr;
-	    bool release_substr;
-	    long delta;
 
-	    sb = se = rb = re = 0;
-
-	    if (_ctx->src_len == 3 && _ctx->src[1] == '-') {
-		sb = _ctx->src[0];
-		se = _ctx->src[2];
-		s_is_range = true;
-	    }
-	    else {
-		s_is_range = false;
-		if (_ctx->src[0] == '^' || strchr(_ctx->src, '-') != NULL)
-		    rb_raise(rb_eRuntimeError, "src argument value (%s) not " \
-			    "supported yet", _ctx->src);
-	    }
-
-	    if (_ctx->repl_len == 3 && _ctx->repl[1] == '-') {
-		rb = _ctx->repl[0];
-		re = _ctx->repl[2];
-		r_is_range = true;
-	    }
-	    else {
-		r_is_range = false;
-		if (_ctx->repl[0] == '^' || strchr(_ctx->repl, '-') != NULL)
-		    rb_raise(rb_eRuntimeError, "repl argument value (%s) not " \
-			    "supported yet", _ctx->repl);
-	    }
-
-	    if (s_is_range) {
-		assert(sb <= character && se >= character);
-		delta = character - sb;
-	    }
-	    else {
-		char *p;
-		p = strchr(_ctx->src, character);
-		assert(p != NULL);
-		delta = (long)p - (long)_ctx->src;
-	    }
-
-	    if ((r_is_range && delta > (re - rb))
-		    || (!r_is_range && delta > _ctx->repl_len)) {
-		if (_ctx->opt == NULL) {
-		    _ctx->opt = CFStringCreateWithBytes(NULL, 
-			    (const UInt8 *)&_ctx->repl[_ctx->repl_len - 1], 
-			    1, 
-			    kCFStringEncodingUTF8,
-			    false);
-		}
-		substr = _ctx->opt;
-		release_substr = false;
-	    }
-	    else {
-		const char r = r_is_range
-		    ? rb + delta : _ctx->repl[delta];
-		substr = CFStringCreateWithBytes(NULL, (const UInt8 *)&r, 1, 
-			kCFStringEncodingUTF8, false);
-		release_substr = true;
-	    }
-
-	    trans_replace((CFMutableStringRef)str, result_range, 
-	        (CFStringRef)substr, search_range, _ctx->sflag);
-
-	    if (release_substr)
-		CFRelease(substr);
-	}
-    }
-    _ctx->changed = true;
+    return str;
 }
 
 static VALUE
-tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
+rstr_ljust(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    struct tr_trans_cb_ctx _ctx;
-
-    StringValue(src);
-    StringValue(repl);
-    
-    if (RSTRING_LEN(str) == 0)
-       return Qnil;
-  
-    rb_str_modify(str);
-
-    _ctx.orepl = repl; 
-    _ctx.src = RSTRING_PTR(src);
-    _ctx.repl = RSTRING_PTR(repl);
-
-    /* TODO: support non-8-bit src/repl */
-    assert(_ctx.src != NULL && _ctx.repl != NULL);
-
-    _ctx.src_len = strlen(_ctx.src);
-    _ctx.repl_len = strlen(_ctx.repl);
-    _ctx.sflag = sflag;
-    _ctx.changed = false;
-    _ctx.opt = NULL;
-
-    str_charset_find((CFStringRef)str, &src, 1, _ctx.repl_len > 1,
-	rb_str_trans_cb, &_ctx); 
-
-    if (_ctx.opt != NULL)
-	CFRelease(_ctx.opt);
-
-    return _ctx.changed ? str : Qnil;
+    return rstr_justify(argc, argv, str, 'l');
 }
 
 /*
  *  call-seq:
- *     str.tr!(from_str, to_str)   => str or nil
+ *     str.rjust(integer, padstr=' ')   => new_str
  *  
- *  Translates <i>str</i> in place, using the same rules as
- *  <code>String#tr</code>. Returns <i>str</i>, or <code>nil</code> if no
- *  changes were made.
+ *  If <i>integer</i> is greater than the length of <i>str</i>, returns a new
+ *  <code>String</code> of length <i>integer</i> with <i>str</i> right justified
+ *  and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
+ *     
+ *     "hello".rjust(4)            #=> "hello"
+ *     "hello".rjust(20)           #=> "               hello"
+ *     "hello".rjust(20, '1234')   #=> "123412341234123hello"
  */
 
 static VALUE
-rb_str_tr_bang(VALUE str, SEL sel, VALUE src, VALUE repl)
+rstr_rjust(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    return tr_trans(str, src, repl, 0);
+    return rstr_justify(argc, argv, str, 'r');
 }
 
-
 /*
  *  call-seq:
- *     str.tr(from_str, to_str)   => new_str
+ *     str.center(integer, padstr)   => new_str
  *  
- *  Returns a copy of <i>str</i> with the characters in <i>from_str</i> replaced
- *  by the corresponding characters in <i>to_str</i>. If <i>to_str</i> is
- *  shorter than <i>from_str</i>, it is padded with its last character. Both
- *  strings may use the c1--c2 notation to denote ranges of characters, and
- *  <i>from_str</i> may start with a <code>^</code>, which denotes all
- *  characters except those listed.
+ *  If <i>integer</i> is greater than the length of <i>str</i>, returns a new
+ *  <code>String</code> of length <i>integer</i> with <i>str</i> centered and
+ *  padded with <i>padstr</i>; otherwise, returns <i>str</i>.
  *     
- *     "hello".tr('aeiou', '*')    #=> "h*ll*"
- *     "hello".tr('^aeiou', '*')   #=> "*e**o"
- *     "hello".tr('el', 'ip')      #=> "hippo"
- *     "hello".tr('a-y', 'b-z')    #=> "ifmmp"
+ *     "hello".center(4)         #=> "hello"
+ *     "hello".center(20)        #=> "       hello        "
+ *     "hello".center(20, '123') #=> "1231231hello12312312"
  */
 
 static VALUE
-rb_str_tr(VALUE str, SEL sel, VALUE src, VALUE repl)
+rstr_center(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    str = rb_str_new3(str);
-    rb_str_tr_bang(str, 0, src, repl);
-    return str;
+    return rstr_justify(argc, argv, str, 'c');
 }
 
 /*
  *  call-seq:
- *     str.delete!([other_str]+)   => str or nil
+ *     str.strip!   => str or nil
  *  
- *  Performs a <code>delete</code> operation in place, returning <i>str</i>, or
- *  <code>nil</code> if <i>str</i> was not modified.
+ *  Removes leading and trailing whitespace from <i>str</i>. Returns
+ *  <code>nil</code> if <i>str</i> was not altered.
  */
 
-static void
-rb_str_delete_bang_cb(CFRange *search_range, const CFRange *result_range, 
-    CFStringRef str, UniChar character, void *ctx)
+static VALUE
+str_strip(VALUE str, int direction)
 {
-    CFStringDelete((CFMutableStringRef)str, *result_range);
-    search_range->length -= result_range->length 
-	+ (result_range->location - search_range->location);
-    search_range->location = result_range->location;
-    *(bool *)ctx = true;
-}
+    rstr_modify(str);
 
-static VALUE
-rb_str_delete_bang(VALUE str, SEL sel, int argc, VALUE *argv)
-{
-    bool changed;
-    if (argc < 1) {
-	rb_raise(rb_eArgError, "wrong number of arguments");
+    long len = str_length(RSTR(str), false);
+    if (len == 0) {
+	return Qnil;
     }
-    rb_str_modify(str);
-    changed = false;
-    str_charset_find((CFStringRef)str, argv, argc, false,
-	rb_str_delete_bang_cb, &changed);
-    if (!changed) {
-    	return Qnil;
-    }
-    return str;
-}
 
-/*
- *  call-seq:
- *     str.delete([other_str]+)   => new_str
- *  
- *  Returns a copy of <i>str</i> with all characters in the intersection of its
- *  arguments deleted. Uses the same rules for building the set of characters as
- *  <code>String#count</code>.
- *     
- *     "hello".delete "l","lo"        #=> "heo"
- *     "hello".delete "lo"            #=> "he"
- *     "hello".delete "aeiou", "^e"   #=> "hell"
- *     "hello".delete "ej-m"          #=> "ho"
- */
+    bool changed = false;
 
-static VALUE
-rb_str_delete(VALUE str, SEL sel, int argc, VALUE *argv)
-{
-    str = rb_str_new3(str);
-    rb_str_delete_bang(str, 0, argc, argv);
-    return str;
-}
+    if (direction <= 0) {
+	// Strip left side.
+	long pos = 0;
+	while (pos < len) {
+	    if (!iswspace(rb_str_get_uchar(str, pos))) {
+		break;
+	    }
+	    pos++;
+	}
 
+	if (pos > 0) {
+	    str_delete(RSTR(str), 0, pos, false);
+	    len -= pos;
+	    changed = true;
+	}
+    }
 
-/*
- *  call-seq:
- *     str.squeeze!([other_str]*)   => str or nil
- *  
- *  Squeezes <i>str</i> in place, returning either <i>str</i>, or
- *  <code>nil</code> if no changes were made.
- */
+    if (direction >= 0) {
+	// Strip right side.
+	long pos = len - 1;
+	while (pos >= 0) {
+	    if (!iswspace(rb_str_get_uchar(str, pos))) {
+		break;
+	    }
+	    pos--;
+	}
 
-static void
-rb_str_squeeze_bang_cb(CFRange *search_range, const CFRange *result_range, 
-    CFStringRef str, UniChar character, void *ctx)
-{
-    if (result_range->length > 1) {
-	CFRange to_delete = *result_range;
-	to_delete.length--;
-	CFStringDelete((CFMutableStringRef)str, to_delete);
-	search_range->length -= result_range->length 
-	    + (result_range->location - search_range->location);
-	search_range->location = result_range->location + 1;
-	*(bool *)ctx = true;
+	if (pos < len - 1 && pos >= 0) {
+	    str_delete(RSTR(str), pos + 1, len - pos - 1, false);
+	    changed = true;
+	}
     }
+
+    return changed ? str : Qnil;
 }
 
 static VALUE
-rb_str_squeeze_bang(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_strip_bang(VALUE str, SEL sel)
 {
-    bool changed;
-    VALUE all_chars;
-    if (argc == 0) {
-	argc = 1;
-	all_chars = (VALUE)CFSTR("a-z");
-	argv = &all_chars;
-    }
-    rb_str_modify(str);
-    changed = false;
-    str_charset_find((CFStringRef)str, argv, argc, true,
-	rb_str_squeeze_bang_cb, &changed);
-    if (!changed)
-    	return Qnil;
-    return str;
+    return str_strip(str, 0);
 }
 
-
 /*
  *  call-seq:
- *     str.squeeze([other_str]*)    => new_str
+ *     str.strip   => new_str
  *  
- *  Builds a set of characters from the <i>other_str</i> parameter(s) using the
- *  procedure described for <code>String#count</code>. Returns a new string
- *  where runs of the same character that occur in this set are replaced by a
- *  single character. If no arguments are given, all runs of identical
- *  characters are replaced by a single character.
+ *  Returns a copy of <i>str</i> with leading and trailing whitespace removed.
  *     
- *     "yellow moon".squeeze                  #=> "yelow mon"
- *     "  now   is  the".squeeze(" ")         #=> " now is the"
- *     "putters shoot balls".squeeze("m-z")   #=> "puters shot balls"
+ *     "    hello    ".strip   #=> "hello"
+ *     "\tgoodbye\r\n".strip   #=> "goodbye"
  */
 
 static VALUE
-rb_str_squeeze(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_strip(VALUE str, SEL sel)
 {
-    str = rb_str_new3(str);
-    rb_str_squeeze_bang(str, 0, argc, argv);
+    str = rstr_dup(str, 0);
+    rstr_strip_bang(str, 0);
     return str;
 }
 
-
 /*
  *  call-seq:
- *     str.tr_s!(from_str, to_str)   => str or nil
+ *     str.lstrip!   => self or nil
  *  
- *  Performs <code>String#tr_s</code> processing on <i>str</i> in place,
- *  returning <i>str</i>, or <code>nil</code> if no changes were made.
+ *  Removes leading whitespace from <i>str</i>, returning <code>nil</code> if no
+ *  change was made. See also <code>String#rstrip!</code> and
+ *  <code>String#strip!</code>.
+ *     
+ *     "  hello  ".lstrip   #=> "hello  "
+ *     "hello".lstrip!      #=> nil
  */
 
 static VALUE
-rb_str_tr_s_bang(VALUE str, SEL sel, VALUE src, VALUE repl)
+rstr_lstrip_bang(VALUE str, SEL sel)
 {
-    return tr_trans(str, src, repl, 1);
+    return str_strip(str, -1);
 }
 
-
 /*
  *  call-seq:
- *     str.tr_s(from_str, to_str)   => new_str
+ *     str.lstrip   => new_str
  *  
- *  Processes a copy of <i>str</i> as described under <code>String#tr</code>,
- *  then removes duplicate characters in regions that were affected by the
- *  translation.
+ *  Returns a copy of <i>str</i> with leading whitespace removed. See also
+ *  <code>String#rstrip</code> and <code>String#strip</code>.
  *     
- *     "hello".tr_s('l', 'r')     #=> "hero"
- *     "hello".tr_s('el', '*')    #=> "h*o"
- *     "hello".tr_s('el', 'hx')   #=> "hhxo"
+ *     "  hello  ".lstrip   #=> "hello  "
+ *     "hello".lstrip       #=> "hello"
  */
 
 static VALUE
-rb_str_tr_s(VALUE str, SEL sel, VALUE src, VALUE repl)
+rstr_lstrip(VALUE str, SEL sel)
 {
-    str = rb_str_new3(str);
-    rb_str_tr_s_bang(str, 0, src, repl);
+    str = rstr_dup(str, 0);
+    rstr_lstrip_bang(str, 0);
     return str;
 }
 
-
 /*
  *  call-seq:
- *     str.count([other_str]+)   => fixnum
+ *     str.rstrip!   => self or nil
  *  
- *  Each <i>other_str</i> parameter defines a set of characters to count.  The
- *  intersection of these sets defines the characters to count in
- *  <i>str</i>. Any <i>other_str</i> that starts with a caret (^) is
- *  negated. The sequence c1--c2 means all characters between c1 and c2.
+ *  Removes trailing whitespace from <i>str</i>, returning <code>nil</code> if
+ *  no change was made. See also <code>String#lstrip!</code> and
+ *  <code>String#strip!</code>.
  *     
- *     a = "hello world"
- *     a.count "lo"            #=> 5
- *     a.count "lo", "o"       #=> 2
- *     a.count "hello", "^l"   #=> 4
- *     a.count "ej-m"          #=> 4
+ *     "  hello  ".rstrip   #=> "  hello"
+ *     "hello".rstrip!      #=> nil
  */
 
-static void
-rb_str_count_cb(CFRange *search_range, const CFRange *result_range, 
-    CFStringRef str, UniChar character, void *ctx)
-{
-    (*(int *)ctx) += result_range->length;
-}
-
 static VALUE
-rb_str_count(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_rstrip_bang(VALUE str, SEL sel)
 {
-    int count;
-    if (argc < 1) {
-	rb_raise(rb_eArgError, "wrong number of arguments");
-    }
-    count = 0;
-    str_charset_find((CFStringRef)str, argv, argc, false,
-	rb_str_count_cb, &count); 
-    return INT2NUM(count);
+    return str_strip(str, 1);
 }
 
 /*
  *  call-seq:
- *     str.split(pattern=$;, [limit])   => anArray
+ *     str.rstrip   => new_str
  *  
- *  Divides <i>str</i> into substrings based on a delimiter, returning an array
- *  of these substrings.
+ *  Returns a copy of <i>str</i> with trailing whitespace removed. See also
+ *  <code>String#lstrip</code> and <code>String#strip</code>.
  *     
- *  If <i>pattern</i> is a <code>String</code>, then its contents are used as
- *  the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
- *  space, <i>str</i> is split on whitespace, with leading whitespace and runs
- *  of contiguous whitespace characters ignored.
- *     
- *  If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
- *  pattern matches. Whenever the pattern matches a zero-length string,
- *  <i>str</i> is split into individual characters. If <i>pattern</i> contains
- *  groups, the respective matches will be returned in the array as well.
- *     
- *  If <i>pattern</i> is omitted, the value of <code>$;</code> is used.  If
- *  <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
- *  split on whitespace as if ` ' were specified.
- *     
- *  If the <i>limit</i> parameter is omitted, trailing null fields are
- *  suppressed. If <i>limit</i> is a positive number, at most that number of
- *  fields will be returned (if <i>limit</i> is <code>1</code>, the entire
- *  string is returned as the only entry in an array). If negative, there is no
- *  limit to the number of fields returned, and trailing null fields are not
- *  suppressed.
- *     
- *     " now's  the time".split        #=> ["now's", "the", "time"]
- *     " now's  the time".split(' ')   #=> ["now's", "the", "time"]
- *     " now's  the time".split(/ /)   #=> ["", "now's", "", "the", "time"]
- *     "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
- *     "hello".split(//)               #=> ["h", "e", "l", "l", "o"]
- *     "hello".split(//, 3)            #=> ["h", "e", "llo"]
- *     "hi mom".split(%r{\s*})         #=> ["h", "i", "m", "o", "m"]
- *     
- *     "mellow yellow".split("ello")   #=> ["m", "w y", "w"]
- *     "1,2,,3,4,,".split(',')         #=> ["1", "2", "", "3", "4"]
- *     "1,2,,3,4,,".split(',', 4)      #=> ["1", "2", "", "3,4,,"]
- *     "1,2,,3,4,,".split(',', -4)     #=> ["1", "2", "", "3", "4", "", ""]
+ *     "  hello  ".rstrip   #=> "  hello"
+ *     "hello".rstrip       #=> "hello"
  */
 
-#if MAC_OS_X_VERSION_MAX_ALLOWED < 1060
-static inline bool
-CFStringIsSurrogateHighCharacter(UniChar character)
-{
-    return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false);
-}
-#endif
-
 static VALUE
-rb_str_split_m(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_rstrip(VALUE str, SEL sel)
 {
-    rb_encoding *enc;
-    VALUE spat;
-    VALUE limit;
-    int awk_split = Qfalse;
-    int spat_string = Qfalse;
-    long beg, end, i = 0;
-    int lim = 0;
-    VALUE result, tmp;
-    long clen;
-
-    clen = RSTRING_LEN(str);
-
-    if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
-	lim = NUM2INT(limit);
-	if (lim <= 0) {
-	    limit = Qnil;
-	}
-	else if (lim == 1) {
-	    if (clen == 0) {
-		return rb_ary_new2(0);
-	    }
-	    return rb_ary_new3(1, str);
-	}
-	i = 1;
-    }
-
-    enc = STR_ENC_GET(str);
-    result = rb_ary_new();
-    if (NIL_P(spat)) {
-	if (!NIL_P(rb_fs)) {
-	    spat = rb_fs;
-	    goto fs_set;
-	}
-	awk_split = Qtrue;
-    }
-    else {
-      fs_set:
-	if (TYPE(spat) == T_STRING) {
-	    spat_string = Qtrue;
-	    if (RSTRING_LEN(spat) == 1
-		&& CFStringGetCharacterAtIndex((CFStringRef)spat, 0) == ' ') {
-		awk_split = Qtrue;
-	    }
-	}
-	else {
-	    spat = get_pat(spat, 1);
-	}
-    }
-
-    beg = 0;
-    if (awk_split || spat_string) {
-	CFRange search_range;
-	CFCharacterSetRef charset = NULL;
-	long spat_len = 0;
-	if (spat == Qnil) {
-	    charset = CFCharacterSetGetPredefined(
-		    kCFCharacterSetWhitespaceAndNewline);
-	}
-	else {
-            spat_len = RSTRING_LEN(spat);
-	}
-	search_range = CFRangeMake(0, clen);
-	do {
-	    CFRange result_range;
-	    CFRange substr_range;
-	    if (spat != Qnil) {
-	        if (spat_len == 0) {
-                    if (search_range.location + 1 < clen && search_range.length > 0) {
-                        result_range.location = search_range.location + 1;
-                        result_range.length = 0;
-                        UniChar c = CFStringGetCharacterAtIndex((CFStringRef)str,
-                                        search_range.location);
-                        if (CFStringIsSurrogateHighCharacter(c)) {
-                            if (result_range.location + 1 < clen) {
-                                ++result_range.location;
-                            }
-                            else {
-                                break;
-                            }
-                        }
-                    }
-                    else {
-                        break;
-                    }
-	        }
-	        else if (!CFStringFindWithOptions((CFStringRef)str, 
-			    (CFStringRef)spat,
-			    search_range,
-			    0,
-			    &result_range)) {
-		    break;
-		}
-	    }
-	    else {
-		if (!CFStringFindCharacterFromSet((CFStringRef)str,
-			    charset, 
-			    search_range,
-			    0,
-			    &result_range)) {
-		    break;
-		}
-	    }
-
-	    substr_range.location = search_range.location;
-	    substr_range.length = result_range.location 
-		- search_range.location;
-
-	    if (awk_split == Qfalse || substr_range.length > 0) {
-		VALUE substr = rb_str_subseq(str, substr_range.location,
-		    substr_range.length);
-
-		if (awk_split == Qtrue) {
-		    CFStringTrimWhitespace((CFMutableStringRef)substr);
-		    if (CFStringGetLength((CFStringRef)substr) > 0) {
-			rb_ary_push(result, substr);
-		    }
-		}
-		else {
-		    rb_ary_push(result, substr);
-		}
-	    }
-
-	    search_range.location = result_range.location + result_range.length;
-	    search_range.length = clen - search_range.location;
-	}
-	while ((limit == Qnil || --lim > 1));
-	beg = search_range.location;
-    }
-    else {
-	long start = beg;
-	bool last_null = false;
-	struct re_registers *regs;
-
-	while ((end = rb_reg_search2(spat, str, start, 0, false)) >= 0) {
-	    regs = RMATCH_REGS(rb_backref_get());
-	    if (start == end && BEG(0) == END(0)) {
-		if (0) {
-		    break;
-		}
-		else if (last_null) {
-		    rb_ary_push(result, rb_str_subseq(str, beg, 1));
-		    beg = start;
-		}
-		else {
-		    start++;
-		    last_null = true;
-		    continue;
-		}
-	    }
-	    else {
-		rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
-		beg = start = END(0);
-	    }
-	    last_null = false;
-
-	    for (long idx = 1; idx < regs->num_regs; idx++) {
-		if (BEG(idx) == -1) {
-		    continue;
-		}
-		if (BEG(idx) == END(idx)) {
-		    tmp = rb_str_new5(str, 0, 0);
-		}
-		else {
-		    tmp = rb_str_subseq(str, BEG(idx), END(idx) - BEG(idx));
-		}
-		rb_ary_push(result, tmp);
-	    }
-	    if (!NIL_P(limit) && lim <= ++i) {
-		break;
-	    }
-	}
-    }
-    if (clen > 0 && (!NIL_P(limit) || clen > beg || lim < 0)) {
-	if (clen == beg) {
-	    tmp = rb_str_new5(str, 0, 0);
-	}
-	else {
-	    tmp = rb_str_subseq(str, beg, clen-beg);
-	}
-	rb_ary_push(result, tmp);
-    }
-    if (NIL_P(limit) && lim == 0) {
-	while (RARRAY_LEN(result) > 0 &&
-	       RSTRING_LEN(RARRAY_AT(result, RARRAY_LEN(result)-1)) == 0)
-	    rb_ary_pop(result);
-    }
-
-    return result;
+    str = rstr_dup(str, 0);
+    rstr_rstrip_bang(str, 0);
+    return str;
 }
 
-VALUE
-rb_str_split(VALUE str, const char *sep0)
-{
-    VALUE sep;
-
-    StringValue(str);
-    sep = rb_str_new2(sep0);
-    return rb_str_split_m(str, 0, 1, &sep);
-}
-
-VALUE
-rb_str_split2(VALUE str, VALUE sep)
-{
-    StringValue(str);
-    StringValue(sep);
-    return rb_str_split_m(str, 0, 1, &sep);
-}
-
 /*
  *  Document-method: lines
  *  call-seq:
@@ -3796,139 +4069,63 @@
  */
 
 static VALUE
-rb_str_each_line(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_each_line(VALUE str, SEL sel, int argc, VALUE *argv)
 {
     VALUE rs;
-    long n;
-    CFStringRef substr;
-    CFRange sub_range, search_range, res_range;
-    bool zero_sep;
-
     if (rb_scan_args(argc, argv, "01", &rs) == 0) {
 	rs = rb_rs;
     }
+
     RETURN_ENUMERATOR(str, argc, argv);
+
     if (NIL_P(rs)) {
 	rb_yield(str);
 	return str;
     }
+
     StringValue(rs);
-    zero_sep = CFStringGetLength((CFStringRef)rs) == 0;
-    if (zero_sep) {
-	rs = rb_default_rs;
+
+    rb_str_t *rs_str = str_need_string(rs);
+    if (rs_str->length_in_bytes == 0) {
+	rs_str = str_need_string(rb_default_rs);
     }
-    n = CFStringGetLength((CFStringRef)str);
-    search_range = CFRangeMake(0, n);
-    sub_range = CFRangeMake(0, 0);
 
+    const long len = str_length(RSTR(str), false);
     const bool tainted = OBJ_TAINTED(str);
 
-#define YIELD_SUBSTR(range) \
-    do { \
-	VALUE mcopy; \
-	substr = CFStringCreateWithSubstring(NULL, (CFStringRef)str,  \
-	    range); \
-	mcopy = (VALUE)CFStringCreateMutableCopy(NULL, 0, \
-	    (CFStringRef)substr); \
-	CFMakeCollectable((CFTypeRef)mcopy); \
-	if (tainted) { \
-	    OBJ_TAINT(mcopy); \
-	} \
-	rb_yield(mcopy); \
-	RETURN_IF_BROKEN(); \
-    } \
-    while (0)
+    long pos = 0;
+    do {
+	const long off = str_index_for_string(RSTR(str), rs_str, pos, -1,
+		false, false);
 
-    while (CFStringFindWithOptions((CFStringRef)str, (CFStringRef)rs,
-	search_range, 0, &res_range)) {
-	if (zero_sep
-	    && sub_range.length > 0 
-	    && sub_range.location + sub_range.length 
-	       == res_range.location) {
-	    sub_range.length += res_range.length;
-	}		
+	long substr_len = 0;
+	if (off < 0) {
+	    if (pos == len) {
+		break;
+	    }
+	    substr_len = len - pos;
+	}
 	else {
-	    if (sub_range.length > 0)
-		YIELD_SUBSTR(sub_range);
-	    sub_range = CFRangeMake(search_range.location, 
-		res_range.location - search_range.location + res_range.length);
+	    substr_len = off - pos + 1;
 	}
-	search_range.location = res_range.location + res_range.length;
-	search_range.length = n - search_range.location;
-    }
 
-    if (sub_range.length != 0)
-	YIELD_SUBSTR(sub_range);
+	VALUE substr = rstr_substr(str, pos, substr_len);
+	if (tainted) {
+	    OBJ_TAINT(substr);
+	}
+	rb_yield(substr);
+	RETURN_IF_BROKEN();
 
-    if (search_range.location < n)
-	YIELD_SUBSTR(CFRangeMake(search_range.location, 
-	    n - search_range.location));
-
-#undef YIELD_SUBSTR
-
-    return str;
-}
-
-/*
- *  Document-method: bytes
- *  call-seq:
- *     str.bytes   => anEnumerator
- *     str.bytes {|fixnum| block }    => str
- *  
- *  Returns an enumerator that gives each byte in the string.  If a block is
- *  given, it iterates over each byte in the string.
- *     
- *     "hello".bytes.to_a        #=> [104, 101, 108, 108, 111]
- */
-
-/*
- *  Document-method: each_byte
- *  call-seq:
- *     str.each_byte {|fixnum| block }    => str
- *  
- *  Passes each byte in <i>str</i> to the given block.
- *     
- *     "hello".each_byte {|c| print c, ' ' }
- *     
- *  <em>produces:</em>
- *     
- *     104 101 108 108 111
- */
-
-static VALUE
-rb_str_each_byte(VALUE str, SEL sel)
-{
-    RETURN_ENUMERATOR(str, 0, 0);
-
-    long n = RSTRING_LEN(str);
-    if (n == 0) {
-	return str;
+	if (off < 0) {
+	    break;
+	}
+	pos = off + 1;
     }
+    while (true);
 
-    CFStringEncoding encoding = CFStringGetSmallestEncoding((CFStringRef)str);
-    const long buflen = CFStringGetMaximumSizeForEncoding(n, encoding);
-    UInt8 *buffer = (UInt8 *)alloca(buflen + 1);
-    long used_buflen = 0;
-
-    CFStringGetBytes((CFStringRef)str,
-	    CFRangeMake(0, n),
-	    encoding,
-	    0,
-	    false,
-	    buffer,
-	    buflen+1,
-	    &used_buflen);
-
-    long i;
-    for (i = 0; i < used_buflen; i++) {
-	rb_yield(INT2FIX(buffer[i]));
-	RETURN_IF_BROKEN();
-    }
-
     return str;
 }
 
-
 /*
  *  Document-method: chars
  *  call-seq:
@@ -3956,795 +4153,912 @@
  */
 
 static VALUE
-rb_str_each_char(VALUE str, SEL sel)
+rstr_each_char(VALUE str, SEL sel)
 {
-    CFStringInlineBuffer buf;
-    long i, n;
-
     RETURN_ENUMERATOR(str, 0, 0);
-    n = CFStringGetLength((CFStringRef)str);
-    CFStringInitInlineBuffer((CFStringRef)str, &buf, CFRangeMake(0, n));
-    for (i = 0; i < n; i++) {
-	UniChar c;
-	VALUE s;
 
-	c = CFStringGetCharacterFromInlineBuffer(&buf, i);
-	s = rb_str_new(NULL, 0);
-	CFStringAppendCharacters((CFMutableStringRef)s, &c, 1);
-	rb_yield(s);
-	RETURN_IF_BROKEN();
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+    rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+
+    for (long i = 0; i < chars_len; i++) {
+	VALUE charstr = rb_unicode_str_new(&chars[i], 1);
+	rb_yield(charstr);
+	ENSURE_AND_RETURN_IF_BROKEN(
+	    if (need_free) free(chars)
+	);
     }
+
+    if (need_free) {
+	free(chars);
+    }
+
     return str;
 }
 
 /*
+ *  Document-method: each_byte
  *  call-seq:
- *     str.chop!   => str or nil
+ *     str.each_byte {|fixnum| block }    => str
  *  
- *  Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
- *  or <code>nil</code> if <i>str</i> is the empty string.  See also
- *  <code>String#chomp!</code>.
+ *  Passes each byte in <i>str</i> to the given block.
+ *     
+ *     "hello".each_byte {|c| print c, ' ' }
+ *     
+ *  <em>produces:</em>
+ *     
+ *     104 101 108 108 111
  */
 
 static VALUE
-rb_str_chop_bang(VALUE str, SEL sel)
+rstr_each_byte(VALUE str, SEL sel)
 {
-    long n;
-    const char *p;
-    CFRange r;
+    RETURN_ENUMERATOR(str, 0, 0);
 
-    n = CFStringGetLength((CFStringRef)str);
-    if (n == 0)
-	return Qnil;
-    rb_str_modify(str);
-    p = RSTRING_PTR(str);
-    r = CFRangeMake(n - 1, 1);
-    if (n >= 2 && p[n - 1] == '\n' && p[n - 2] == '\r') {
-	/* We need this to pass the tests, but this is most probably 
-	 * unnecessary.
-	 */
-	r.location--;
-	r.length++;
+    for (long i = 0; i < RSTR(str)->length_in_bytes; i++) {
+	rb_yield(INT2FIX(RSTR(str)->data.bytes[i]));
+	RETURN_IF_BROKEN();
     }
-    CFStringDelete((CFMutableStringRef)str, r);
     return str;
 }
 
-
 /*
  *  call-seq:
- *     str.chop   => new_str
+ *     str.succ   => new_str
+ *     str.next   => new_str
  *  
- *  Returns a new <code>String</code> with the last character removed.  If the
- *  string ends with <code>\r\n</code>, both characters are removed. Applying
- *  <code>chop</code> to an empty string returns an empty
- *  string. <code>String#chomp</code> is often a safer alternative, as it leaves
- *  the string unchanged if it doesn't end in a record separator.
+ *  Returns the successor to <i>str</i>. The successor is calculated by
+ *  incrementing characters starting from the rightmost alphanumeric (or
+ *  the rightmost character if there are no alphanumerics) in the
+ *  string. Incrementing a digit always results in another digit, and
+ *  incrementing a letter results in another letter of the same case.
+ *  Incrementing nonalphanumerics uses the underlying character set's
+ *  collating sequence.
  *     
- *     "string\r\n".chop   #=> "string"
- *     "string\n\r".chop   #=> "string\n"
- *     "string\n".chop     #=> "string"
- *     "string".chop       #=> "strin"
- *     "x".chop.chop       #=> ""
+ *  If the increment generates a ``carry,'' the character to the left of
+ *  it is incremented. This process repeats until there is no carry,
+ *  adding an additional character if necessary.
+ *     
+ *     "abcd".succ        #=> "abce"
+ *     "THX1138".succ     #=> "THX1139"
+ *     "<<koala>>".succ   #=> "<<koalb>>"
+ *     "1999zzz".succ     #=> "2000aaa"
+ *     "ZZZ9999".succ     #=> "AAAA0000"
+ *     "***".succ         #=> "**+"
  */
 
 static VALUE
-rb_str_chop(VALUE str, SEL sel)
+rstr_succ(VALUE str, SEL sel)
 {
-    VALUE str2 = rb_str_new3(str);
-    rb_str_chop_bang(str2, 0);
-    return str2;
-}
+    if (rb_str_chars_len(str) == 0) {
+	return str;
+    }
 
-/*
- *  call-seq:
- *     str.chomp!(separator=$/)   => str or nil
- *  
- *  Modifies <i>str</i> in place as described for <code>String#chomp</code>,
- *  returning <i>str</i>, or <code>nil</code> if no modifications were made.
- */
+    if (!str_try_making_data_uchars(RSTR(str))) {
+	rb_raise(rb_eArgError,
+		"cannot make receiver data as Unicode characters");
+    }
 
-static VALUE
-rb_str_chomp_bang(VALUE str, SEL sel, int argc, VALUE *argv)
-{
-    VALUE rs;
-    long len, rslen;
-    CFRange range_result;
+    UChar *chars_buf = (UChar *)malloc(RSTR(str)->length_in_bytes
+	    + sizeof(UChar));
+    UChar *chars_ptr = &chars_buf[1];
 
-    if (rb_scan_args(argc, argv, "01", &rs) == 0) {
-	rs = rb_rs;
-    }
-    rb_str_modify(str);
-    if (rs == Qnil) {
-	return Qnil;
-    }
-    len = CFStringGetLength((CFStringRef)str);
-    if (len == 0) {
-	return Qnil;
-    }
-    StringValue(rs);
-    rslen = CFStringGetLength((CFStringRef)rs);
-    range_result = CFRangeMake(len, 0);
-    if (rs == rb_default_rs
-	|| rslen == 0
-	|| (rslen == 1 
-	    && CFStringGetCharacterAtIndex((CFStringRef)rs, 0) == '\n')) {
-	UniChar c;
-	c = CFStringGetCharacterAtIndex((CFStringRef)str, 
-		range_result.location - 1);
-	if (c == '\n') {
-	    range_result.location--;
-	    range_result.length++;
-	    c = CFStringGetCharacterAtIndex((CFStringRef)str, 
-		    range_result.location - 1);
+    memcpy(chars_ptr, RSTR(str)->data.uchars, RSTR(str)->length_in_bytes);
+
+    long len = BYTES_TO_UCHARS(RSTR(str)->length_in_bytes);
+    UChar carry = 0;
+    bool modified = false;
+
+    for (long i = len - 1; i >= 0; i--) {
+	UChar c = chars_ptr[i];
+	if (isdigit(c)) {
+	    modified = true;
+	    if (c != '9') {
+		chars_ptr[i]++;
+		carry = 0;
+		break;
+	    }
+	    else {
+		chars_ptr[i] = '0';
+		carry = '1';
+	    }
 	}
-	if (c == '\r' && (rslen > 0 || range_result.location != len)) {
-	    /* MS is the devil */
-	    range_result.location--;
-	    range_result.length++;
+	else if (isalpha(c)) {
+	    const bool lower = islower(c);
+	    UChar e = lower ? 'z' : 'Z';
+	    modified = true;
+	    if (c != e) {
+		chars_ptr[i]++;
+		carry = 0;
+		break;
+	    }
+	    else {
+		carry = chars_ptr[i] = lower ? 'a' : 'A';
+	    }
 	}
+#if 0 // TODO: this requires more love
+	else if (!isascii(c)) {
+	    modified = true;
+	    chars_ptr[i]++;
+	    carry = 0;
+	    break;
+	}
+#endif
     }
-    else {
-	StringValue(rs);
-	CFStringFindWithOptions((CFStringRef)str, (CFStringRef)rs,
-		CFRangeMake(len - rslen, rslen), 0, &range_result);
+
+    if (!modified) {
+	chars_ptr[len - 1]++;
     }
-    if (range_result.length == 0 
-	|| range_result.location + range_result.length > len) {
-	return Qnil;
+    else if (carry != 0) {
+	chars_ptr = chars_buf;
+	chars_ptr[0] = carry;
+	len++;
     }
-    CFStringDelete((CFMutableStringRef)str, range_result);
-    return str;
+
+    VALUE newstr = rb_unicode_str_new(chars_ptr, len);
+    free(chars_buf);
+    return newstr;
 }
 
-
 /*
  *  call-seq:
- *     str.chomp(separator=$/)   => new_str
+ *     str.succ!   => str
+ *     str.next!   => str
  *  
- *  Returns a new <code>String</code> with the given record separator removed
- *  from the end of <i>str</i> (if present). If <code>$/</code> has not been
- *  changed from the default Ruby record separator, then <code>chomp</code> also
- *  removes carriage return characters (that is it will remove <code>\n</code>,
- *  <code>\r</code>, and <code>\r\n</code>).
- *     
- *     "hello".chomp            #=> "hello"
- *     "hello\n".chomp          #=> "hello"
- *     "hello\r\n".chomp        #=> "hello"
- *     "hello\n\r".chomp        #=> "hello\n"
- *     "hello\r".chomp          #=> "hello"
- *     "hello \n there".chomp   #=> "hello \n there"
- *     "hello".chomp("llo")     #=> "he"
+ *  Equivalent to <code>String#succ</code>, but modifies the receiver in
+ *  place.
  */
 
 static VALUE
-rb_str_chomp(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_succ_bang(VALUE str, SEL sel)
 {
-    str = rb_str_new3(str);
-    rb_str_chomp_bang(str, 0, argc, argv);
+    rstr_replace(str, 0, rstr_succ(str, 0));
     return str;
 }
 
 /*
  *  call-seq:
- *     str.lstrip!   => self or nil
+ *     str.upto(other_str, exclusive=false) {|s| block }   => str
  *  
- *  Removes leading whitespace from <i>str</i>, returning <code>nil</code> if no
- *  change was made. See also <code>String#rstrip!</code> and
- *  <code>String#strip!</code>.
+ *  Iterates through successive values, starting at <i>str</i> and
+ *  ending at <i>other_str</i> inclusive, passing each value in turn to
+ *  the block. The <code>String#succ</code> method is used to generate
+ *  each value.  If optional second argument exclusive is omitted or is <code>false</code>,
+ *  the last value will be included; otherwise it will be excluded.
  *     
- *     "  hello  ".lstrip   #=> "hello  "
- *     "hello".lstrip!      #=> nil
+ *     "a8".upto("b6") {|s| print s, ' ' }
+ *     for s in "a8".."b6"
+ *       print s, ' '
+ *     end
+ *     
+ *  <em>produces:</em>
+ *     
+ *     a8 a9 b0 b1 b2 b3 b4 b5 b6
+ *     a8 a9 b0 b1 b2 b3 b4 b5 b6
  */
 
 static VALUE
-rb_str_strip_bang2(VALUE str, int direction)
+rstr_upto(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    long i, n, orig_n;
-    CFStringInlineBuffer buf;
-    CFCharacterSetRef charset;
-    bool changed;
+    VALUE beg = str;
+    VALUE end, exclusive;
+    rb_scan_args(argc, argv, "11", &end, &exclusive);
 
-    rb_str_modify(str);
-    n = orig_n = CFStringGetLength((CFStringRef)str);
-    if (n == 0)
-	return Qnil;
-    CFStringInitInlineBuffer((CFStringRef)str, &buf, CFRangeMake(0, n));
-    charset = CFCharacterSetGetPredefined(kCFCharacterSetWhitespaceAndNewline);
-    changed = false;
+    bool excl = RTEST(exclusive);
+    StringValue(end);
 
-    if (direction >= 0) {
-	for (i = n - 1; i >= 0; i--) {
-	    UniChar c = CFStringGetCharacterFromInlineBuffer(&buf, i);
-	    if (!CFCharacterSetIsCharacterMember(charset, c))
+    if (rb_str_chars_len(beg) == 1 && rb_str_chars_len(end) == 1) {
+	UChar begc = rb_str_get_uchar(beg, 0);
+	UChar endc = rb_str_get_uchar(end, 0);
+
+	if (begc > endc || (excl && begc == endc)) {
+	    return beg;
+	}
+	while (true) {
+	    rb_yield(rb_unicode_str_new(&begc, 1));
+	    RETURN_IF_BROKEN();
+	    if (!excl && begc == endc) {
 		break;
+	    }
+	    begc++;
+	    if (excl && begc == endc) {
+		break;
+	    }
 	}
-	if (i < n - 1) {
-	    CFRange range = CFRangeMake(i + 1, n - i - 1);
-	    CFStringDelete((CFMutableStringRef)str, range);
-	    n -= range.length;	    
-	}
+	return beg;
     }
 
-    if (direction <= 0) {
-	for (i = 0; i < n; i++) {
-	    UniChar c = CFStringGetCharacterFromInlineBuffer(&buf, i);
-	    if (!CFCharacterSetIsCharacterMember(charset, c))
-		break;
+    const int cmp = rb_str_cmp(beg, end);
+    if (cmp > 0 || (excl && cmp == 0)) {
+	return beg;
+    }
+
+    SEL succ_sel = sel_registerName("succ");
+
+    VALUE current = beg;
+    VALUE after_end = rb_vm_call(end, succ_sel, 0, NULL, false);
+    StringValue(after_end);
+    while (!rb_str_equal(current, after_end)) {
+	rb_yield(current);
+	RETURN_IF_BROKEN();
+	if (!excl && rb_str_equal(current, end)) {
+	    break;
 	}
-	if (i > 0) {
-	    CFRange range = CFRangeMake(0, i);
-	    CFStringDelete((CFMutableStringRef)str, range);
+	current = rb_vm_call(current, succ_sel, 0, NULL, false);
+	StringValue(current);
+	if (excl && rb_str_equal(current, end)) {
+	    break;
 	}
+	if (rb_str_chars_len(current) > rb_str_chars_len(end)
+		|| rb_str_chars_len(current) == 0) {
+	    break;
+	}
     }
-
-    return orig_n != n ? str : Qnil;
+    return beg;
 }
 
+// :nodoc
 static VALUE
-rb_str_lstrip_bang(VALUE str, SEL sel)
+rstr_transform(VALUE str, SEL sel, VALUE transform_pat)
 {
-    return rb_str_strip_bang2(str, -1);
+    StringValue(transform_pat);
+
+    UChar *new_chars = NULL;
+    long new_chars_len = 0;
+    bool need_free = false;
+    rb_str_get_uchars(str, &new_chars, &new_chars_len, &need_free);
+
+    if (new_chars_len == 0) {
+	return Qnil;
+    }
+
+    if (!need_free) {
+	UChar *tmp = (UChar *)malloc(sizeof(UChar) * new_chars_len);
+	memcpy(tmp, new_chars, sizeof(UChar) * new_chars_len);
+	new_chars = tmp;
+    }
+
+    UChar *transform_chars = NULL;
+    long transform_chars_len = 0;
+    need_free = false;
+    rb_str_get_uchars(transform_pat, &transform_chars, &transform_chars_len,
+	    &need_free);
+
+    UErrorCode status = U_ZERO_ERROR;
+    UTransliterator *trans = utrans_openU(transform_chars, transform_chars_len,
+	    UTRANS_FORWARD, NULL, 0, NULL, &status);
+
+    if (trans == NULL) {
+	if (need_free) {
+	    free(transform_chars);
+	}
+	rb_raise(rb_eArgError, "cannot create transliterator");
+    }
+
+    int32_t capacity = (int32_t)new_chars_len;
+    int32_t limit = capacity;
+    utrans_transUChars(trans, new_chars, &capacity, capacity,
+	    0, &limit, &status);
+
+    new_chars_len = (long)capacity;
+
+    VALUE newstr = rb_unicode_str_new(new_chars, new_chars_len);
+
+    if (need_free) {
+	free(transform_chars);
+    }
+    free(new_chars);
+
+    return newstr;
 }
 
-
 /*
  *  call-seq:
- *     str.lstrip   => new_str
+ *     str.reverse!   => str
  *  
- *  Returns a copy of <i>str</i> with leading whitespace removed. See also
- *  <code>String#rstrip</code> and <code>String#strip</code>.
- *     
- *     "  hello  ".lstrip   #=> "hello  "
- *     "hello".lstrip       #=> "hello"
+ *  Reverses <i>str</i> in place.
  */
 
 static VALUE
-rb_str_lstrip(VALUE str)
+rstr_reverse_bang(VALUE str, SEL sel)
 {
-    str = rb_str_dup(str);
-    rb_str_lstrip_bang(str, 0);
+    rstr_modify(str);
+
+    if (str_try_making_data_uchars(RSTR(str))) {
+	const long len = BYTES_TO_UCHARS(RSTR(str)->length_in_bytes);
+	if (len <= 1) {
+	    return str;
+	}
+	for (long i = 0; i < (len / 2); i++) {
+	    UChar c = RSTR(str)->data.uchars[i];
+	    RSTR(str)->data.uchars[i] = RSTR(str)->data.uchars[len - i - 1];
+	    RSTR(str)->data.uchars[len - i - 1] = c; 
+	}
+    }
+    else {
+	const long len = RSTR(str)->length_in_bytes;
+	if (len <= 1) {
+	    return str;
+	}
+	for (long i = 0; i < (len / 2); i++) {
+	    char c = RSTR(str)->data.bytes[i];
+	    RSTR(str)->data.bytes[i] = RSTR(str)->data.bytes[len - i - 1];
+	    RSTR(str)->data.bytes[len - i - 1] = c; 
+	}
+    }
+
     return str;
 }
 
-
 /*
  *  call-seq:
- *     str.rstrip!   => self or nil
+ *     str.reverse   => new_str
  *  
- *  Removes trailing whitespace from <i>str</i>, returning <code>nil</code> if
- *  no change was made. See also <code>String#lstrip!</code> and
- *  <code>String#strip!</code>.
+ *  Returns a new string with the characters from <i>str</i> in reverse order.
  *     
- *     "  hello  ".rstrip   #=> "  hello"
- *     "hello".rstrip!      #=> nil
+ *     "stressed".reverse   #=> "desserts"
  */
 
 static VALUE
-rb_str_rstrip_bang(VALUE str, SEL sel)
+rstr_reverse(VALUE str, SEL sel)
 {
-    return rb_str_strip_bang2(str, 1);
+    VALUE obj = rb_str_new3(str);
+    rstr_reverse_bang(obj, 0);
+    return obj;
 }
 
-
 /*
  *  call-seq:
- *     str.rstrip   => new_str
+ *     str.count([other_str]+)   => fixnum
  *  
- *  Returns a copy of <i>str</i> with trailing whitespace removed. See also
- *  <code>String#lstrip</code> and <code>String#strip</code>.
+ *  Each <i>other_str</i> parameter defines a set of characters to count.  The
+ *  intersection of these sets defines the characters to count in
+ *  <i>str</i>. Any <i>other_str</i> that starts with a caret (^) is
+ *  negated. The sequence c1--c2 means all characters between c1 and c2.
  *     
- *     "  hello  ".rstrip   #=> "  hello"
- *     "hello".rstrip       #=> "hello"
+ *     a = "hello world"
+ *     a.count "lo"            #=> 5
+ *     a.count "lo", "o"       #=> 2
+ *     a.count "hello", "^l"   #=> 4
+ *     a.count "ej-m"          #=> 4
  */
 
-static VALUE
-rb_str_rstrip(VALUE str)
+static void
+fill_linear_charset_buffer(char *buf, long bufsize, long *lenp, bool *negatep,
+	VALUE source)
 {
-    str = rb_str_dup(str);
-    rb_str_rstrip_bang(str, 0);
-    return str;
+    StringValue(source);
+
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+    rb_str_get_uchars(source, &chars, &chars_len, &need_free);
+
+    long pos = 0;
+    if (negatep != NULL) {
+	if (chars_len > 0 && chars[0] == '^') {
+	    *negatep = true;
+	    pos++;
+	} 
+	else {
+	    *negatep = false;
+	}
+    }
+
+    bool error = false;
+    long bufpos = 0;
+
+    while (pos < chars_len) {
+	UChar c = chars[pos];
+
+	if (pos + 2 < chars_len && chars[pos + 1] == '-') {
+	    // Range
+	    UChar e = chars[pos + 2];
+	    if (c > e) {
+		error = true;
+		goto bail;
+	    }
+
+	    if (c < 0xff && e < 0xff) {
+		while (c <= e) {
+		    if (bufpos >= bufsize) {
+			error = true;
+			goto bail;
+		    }
+		    buf[bufpos++] = (char)c;
+		    c++; 
+		}
+	    }
+	    pos += 2;
+	}
+	else {
+	    if (c < 0xff) {
+		if (bufpos >= bufsize) {
+		    error = true;
+		    goto bail;
+		}
+		buf[bufpos++] = (char)c;
+	    }
+	    pos++;
+	}
+    }
+
+    *lenp = bufpos;
+
+bail:
+    if (need_free) {
+	free(chars);
+    }
+
+    if (error) {
+	rb_raise(rb_eArgError, "invalid string transliteration");
+    }
 }
 
+static void
+intersect_charset_table(char *tbl, VALUE source)
+{
+    // Generate linear buffer based on source pattern.
+    char buf[0xff];
+    bool negate = false;
+    long buflen = 0;
+    fill_linear_charset_buffer(buf, sizeof buf, &buflen, &negate, source);
 
-/*
- *  call-seq:
- *     str.strip!   => str or nil
- *  
- *  Removes leading and trailing whitespace from <i>str</i>. Returns
- *  <code>nil</code> if <i>str</i> was not altered.
- */
+    // Create character table based on linear buffer.
+    char source_tbl[0xff];
+    char cflag = negate ? 1 : 0;
+    for (int i = 0; i < 0xff; i++) {
+	source_tbl[i] = cflag;
+    }
+    cflag = negate ? 0 : 1;
+    for (long i = 0; i < buflen; i++) {
+	char c = buf[i];
+	source_tbl[(int)c] = cflag;
+    }
 
-static VALUE
-rb_str_strip_bang(VALUE str, SEL sel)
-{
-    return rb_str_strip_bang2(str, 0);
+    // Intersect both tables.
+    for (int i = 0; i < 0xff; i++) {
+	tbl[i] = tbl[i] && source_tbl[i];
+    }
 }
 
+static void
+create_intersected_charset_table(char *tbl, int argc, VALUE *argv)
+{
+    if (argc < 1) {
+	rb_raise(rb_eArgError, "wrong number of arguments");
+    }
 
-/*
- *  call-seq:
- *     str.strip   => new_str
- *  
- *  Returns a copy of <i>str</i> with leading and trailing whitespace removed.
- *     
- *     "    hello    ".strip   #=> "hello"
- *     "\tgoodbye\r\n".strip   #=> "goodbye"
- */
+    // Fill the table with 1s before starting the intersections.
+    for (int i = 0; i < 0xff; i++) {
+	tbl[i] = 1;
+    }
 
-static VALUE
-rb_str_strip(VALUE str, SEL sel)
-{
-    str = rb_str_dup(str);
-    rb_str_strip_bang(str, 0);
-    return str;
+    for (int i = 0; i < argc; i++) {
+	intersect_charset_table(tbl, argv[i]);	
+    }
 }
 
-static VALUE
-scan_once(VALUE str, VALUE pat, long *start, long strlen, bool pat_is_string)
+static void
+create_translate_charset_table(char *tbl, VALUE source, VALUE repl)
 {
-    VALUE result, match;
-    struct re_registers *regs;
-    long i;
+    // Generate linear buffer based on source pattern.
+    char source_buf[0xff];
+    bool negate = false;
+    long source_buflen = 0;
+    fill_linear_charset_buffer(source_buf, sizeof source_buf, &source_buflen,
+	    &negate, source);
 
-    if (pat_is_string) {
-	/* XXX this is sometimes slower than the regexp search, especially for
-	 * long pattern strings 
-	 */
-	CFRange result_range;
-	if (CFStringFindWithOptions((CFStringRef)str, 
-	    (CFStringRef)pat,
-	    CFRangeMake(*start, strlen - *start),
-	    0,
-	    &result_range)) {
-	    CFStringRef substr = CFStringCreateWithSubstring(NULL, 
-		(CFStringRef)str, result_range);
-	    *start = result_range.location + result_range.length + 1;
-	    result = (VALUE)CFStringCreateMutableCopy(NULL, 0, substr);
-	    CFRelease(substr);
-	    CFMakeCollectable((CFTypeRef)result);
+    // Generate linear buffer based on repl pattern.
+    char repl_buf[0xff];
+    long repl_buflen = 0;
+    fill_linear_charset_buffer(repl_buf, sizeof repl_buf, &repl_buflen,
+	    NULL, repl);
+    assert(repl_buflen > 0);
+
+    // Fill the table based on the values from the linear buffers.
+    if (negate) {
+	for (int i = 0; i < 0xff; i++) {
+	    tbl[i] = 1;
 	}
-	else {
-	    result = Qnil;
+
+	long pos = 0;
+	while (pos < source_buflen) {
+	    const char source_c = source_buf[pos];
+	    tbl[(int)source_c] = 0;
+	    pos++;
 	}
-	return result;
-    }
 
-    if (rb_reg_search2(pat, str, *start, 0, false) >= 0) {
-	match = rb_backref_get();
-	GC_WB(&RMATCH(match)->str, str);
-	regs = RMATCH_REGS(match);
-	if (BEG(0) == END(0)) {
-	    /*
-	     * Always consume at least one character of the input string
-	     */
-	    *start = END(0)+1;
+	for (int i = 0, pos = 0; i < 0xff; i++) {
+	    if (tbl[i] == 1) {
+		const char repl_c = pos >= repl_buflen
+		    ? repl_buf[repl_buflen - 1] : repl_buf[pos];
+		tbl[i] = repl_c;
+		pos++;
+	    }
 	}
-	else {
-	    *start = END(0);
+    }
+    else {
+	for (int i = 0; i < 0xff; i++) {
+	    tbl[i] = 0;
 	}
-	if (regs->num_regs == 1) {
-	    return rb_reg_nth_match(0, match);
+
+	long pos = 0;
+	while (pos < source_buflen) {
+	    const char source_c = source_buf[pos];
+	    const char repl_c = pos >= repl_buflen
+		? repl_buf[repl_buflen - 1] : repl_buf[pos];
+	    tbl[(int)source_c] = repl_c;
+	    pos++;
 	}
-	result = rb_ary_new2(regs->num_regs);
-	for (i=1; i < regs->num_regs; i++) {
-	    rb_ary_push(result, rb_reg_nth_match(i, match));
-	}
-
-	return result;
-    }
-    return Qnil;
+    } 
 }
 
+#define INTERSECT_CHARSET_TABLE_CREATE() \
+	char __tbl__[0xff]; \
+	create_intersected_charset_table(__tbl__, argc, argv);
 
-/*
- *  call-seq:
- *     str.scan(pattern)                         => array
- *     str.scan(pattern) {|match, ...| block }   => str
- *  
- *  Both forms iterate through <i>str</i>, matching the pattern (which may be a
- *  <code>Regexp</code> or a <code>String</code>). For each match, a result is
- *  generated and either added to the result array or passed to the block. If
- *  the pattern contains no groups, each individual result consists of the
- *  matched string, <code>$&</code>.  If the pattern contains groups, each
- *  individual result is itself an array containing one entry per group.
- *     
- *     a = "cruel world"
- *     a.scan(/\w+/)        #=> ["cruel", "world"]
- *     a.scan(/.../)        #=> ["cru", "el ", "wor"]
- *     a.scan(/(...)/)      #=> [["cru"], ["el "], ["wor"]]
- *     a.scan(/(..)(..)/)   #=> [["cr", "ue"], ["l ", "wo"]]
- *     
- *  And the block form:
- *     
- *     a.scan(/\w+/) {|w| print "<<#{w}>> " }
- *     print "\n"
- *     a.scan(/(.)(.)/) {|x,y| print y, x }
- *     print "\n"
- *     
- *  <em>produces:</em>
- *     
- *     <<cruel>> <<world>>
- *     rceu lowlr
- */
+#define CHARSET_TABLE_INCLUDES(c) \
+	((c) < 0xff && __tbl__[(c) & 0xff] == 1)
 
 static VALUE
-rb_str_scan(VALUE str, SEL sel, VALUE pat)
+rstr_count(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    VALUE result;
-    long start = 0;
-    VALUE match = Qnil;
-    long len = CFStringGetLength((CFStringRef)str);
-    bool pat_is_string = TYPE(pat) == T_STRING;
-    
-    if (!pat_is_string) {
-	pat = get_pat(pat, 1);
-    }
-    if (!rb_block_given_p()) {
-	VALUE ary = rb_ary_new();
+    INTERSECT_CHARSET_TABLE_CREATE();
 
-	while (!NIL_P(result = scan_once(str, pat, &start, len, 
-					 pat_is_string))) {
-	    rb_ary_push(ary, result);
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+    rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+
+    long count = 0;
+    for (long i = 0; i < chars_len; i++) {
+	if (CHARSET_TABLE_INCLUDES(chars[i])) {
+	    count++;
 	}
-	return ary;
     }
 
-    while (!NIL_P(result = scan_once(str, pat, &start, len, pat_is_string))) {
-	match = rb_backref_get();
-	rb_match_busy(match);
-	rb_yield(result);
-	RETURN_IF_BROKEN();
-	rb_backref_set(match);	/* restore $~ value */
+    if (need_free) {
+	free(chars);
     }
-    rb_backref_set(match);
-    return str;
+
+    return LONG2NUM(count); 
 }
 
-
 /*
  *  call-seq:
- *     str.hex   => integer
+ *     str.delete!([other_str]+)   => str or nil
  *  
- *  Treats leading characters from <i>str</i> as a string of hexadecimal digits
- *  (with an optional sign and an optional <code>0x</code>) and returns the
- *  corresponding number. Zero is returned on error.
- *     
- *     "0x0a".hex     #=> 10
- *     "-1234".hex    #=> -4660
- *     "0".hex        #=> 0
- *     "wombat".hex   #=> 0
+ *  Performs a <code>delete</code> operation in place, returning <i>str</i>, or
+ *  <code>nil</code> if <i>str</i> was not modified.
  */
 
 static VALUE
-rb_str_hex(VALUE str, SEL sel)
+rstr_delete_bang(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    rb_encoding *enc = rb_enc_get(str);
+    rstr_modify(str);
 
-    if (!rb_enc_asciicompat(enc)) {
-	rb_raise(rb_eArgError, "ASCII incompatible encoding: %s", rb_enc_name(enc));
+    INTERSECT_CHARSET_TABLE_CREATE();
+
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+    rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+
+    bool modified = false;
+    for (long i = 0; i < chars_len; i++) {
+	while (i < chars_len && CHARSET_TABLE_INCLUDES(chars[i])) {
+	    for (long j = i; j < chars_len - 1; j++) {
+		chars[j] = chars[j + 1];
+	    }
+	    chars_len--;
+	    modified = true;
+	}
     }
-    return rb_str_to_inum(str, 16, Qfalse);
+
+    if (!modified) {
+	if (need_free) {
+	    free(chars);
+	}
+	return Qnil;
+    }
+
+    if (need_free) {
+	str_replace_with_uchars(RSTR(str), chars, chars_len);
+	free(chars);
+    }
+    else {
+	RSTR(str)->length_in_bytes = UCHARS_TO_BYTES(chars_len);
+    }
+
+    return str;
 }
 
-
 /*
  *  call-seq:
- *     str.oct   => integer
+ *     str.delete([other_str]+)   => new_str
  *  
- *  Treats leading characters of <i>str</i> as a string of octal digits (with an
- *  optional sign) and returns the corresponding number.  Returns 0 if the
- *  conversion fails.
+ *  Returns a copy of <i>str</i> with all characters in the intersection of its
+ *  arguments deleted. Uses the same rules for building the set of characters as
+ *  <code>String#count</code>.
  *     
- *     "123".oct       #=> 83
- *     "-377".oct      #=> -255
- *     "bad".oct       #=> 0
- *     "0377bad".oct   #=> 255
+ *     "hello".delete "l","lo"        #=> "heo"
+ *     "hello".delete "lo"            #=> "he"
+ *     "hello".delete "aeiou", "^e"   #=> "hell"
+ *     "hello".delete "ej-m"          #=> "ho"
  */
 
 static VALUE
-rb_str_oct(VALUE str, SEL sel)
+rstr_delete(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    rb_encoding *enc = rb_enc_get(str);
-
-    if (!rb_enc_asciicompat(enc)) {
-	rb_raise(rb_eArgError, "ASCII incompatible encoding: %s", rb_enc_name(enc));
-    }
-    return rb_str_to_inum(str, -8, Qfalse);
+    str = rb_str_new3(str);
+    rstr_delete_bang(str, 0, argc, argv);
+    return str;
 }
 
-
 /*
  *  call-seq:
- *     str.crypt(other_str)   => new_str
+ *     str.squeeze!([other_str]*)   => str or nil
  *  
- *  Applies a one-way cryptographic hash to <i>str</i> by invoking the standard
- *  library function <code>crypt</code>. The argument is the salt string, which
- *  should be two characters long, each character drawn from
- *  <code>[a-zA-Z0-9./]</code>.
+ *  Squeezes <i>str</i> in place, returning either <i>str</i>, or
+ *  <code>nil</code> if no changes were made.
  */
 
-extern char *crypt(const char *, const char *);
-
 static VALUE
-rb_str_crypt(VALUE str, SEL sel, VALUE salt)
+rstr_squeeze_bang(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    StringValue(salt);
-    if (RSTRING_LEN(salt) < 2) {
-	rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
-    }
+    rstr_modify(str);
 
-    size_t str_len = RSTRING_LEN(str);
-    char *s = alloca(str_len + 1);
-    strncpy(s, RSTRING_PTR(str), str_len + 1);
-
-    VALUE crypted = rb_str_new2(crypt(s, RSTRING_PTR(salt)));
-    if (OBJ_TAINTED(str) || OBJ_TAINTED(salt)) {
-	OBJ_TAINT(crypted);
+    // If no arguments are provided, we build a pattern string that contains
+    // the characters of the receiver itself.
+    VALUE tmp[1];
+    if (argc == 0) {
+	tmp[0] = str;
+	argv = tmp;
+	argc = 1;
     }
-    return crypted;
-}
 
+    INTERSECT_CHARSET_TABLE_CREATE();
 
-/*
- *  call-seq:
- *     str.intern   => symbol
- *     str.to_sym   => symbol
- *  
- *  Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
- *  symbol if it did not previously exist. See <code>Symbol#id2name</code>.
- *     
- *     "Koala".intern         #=> :Koala
- *     s = 'cat'.to_sym       #=> :cat
- *     s == :cat              #=> true
- *     s = '@cat'.to_sym      #=> :@cat
- *     s == :@cat             #=> true
- *
- *  This can also be used to create symbols that cannot be represented using the
- *  <code>:xxx</code> notation.
- *     
- *     'cat and dog'.to_sym   #=> :"cat and dog"
- */
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+    rb_str_get_uchars(str, &chars, &chars_len, &need_free);
 
-VALUE
-rb_str_intern_fast(VALUE s)
-{
-    char *cptr = (char *)CFStringGetCStringPtr((CFStringRef)s, 0);
-    if (cptr != NULL) {
-	return ID2SYM(rb_intern(cptr));
+    bool modified = false;
+    for (long i = 0; i < chars_len; i++) {
+	UChar c = chars[i];
+	if (CHARSET_TABLE_INCLUDES(c)) {
+	    while (i + 1 < chars_len && chars[i + 1] == c) {
+		for (long j = i + 1; j < chars_len - 1; j++) {
+		    chars[j] = chars[j + 1];
+		}
+		chars_len--;
+		modified = true;
+	    }
+	}
     }
 
-    char buf[200];
-    if (CFStringGetLength((CFStringRef)s) > sizeof(buf)) {
-	return ID2SYM(rb_intern(RSTRING_PTR(s)));
+    if (!modified) {
+	if (need_free) {
+	    free(chars);
+	}
+	return Qnil;
     }
 
-    if (!CFStringGetCString((CFStringRef)s, buf, sizeof buf,
-		kCFStringEncodingUTF8)) {
-	// Probably an UTF16 string...
-	rb_raise(rb_eRuntimeError, "can't intern string `%p'", (void *)s);
+    if (need_free) {
+	str_replace_with_uchars(RSTR(str), chars, chars_len);
+	free(chars);
     }
-    return ID2SYM(rb_intern(buf));
-}
+    else {
+	RSTR(str)->length_in_bytes = UCHARS_TO_BYTES(chars_len);
+    }
 
-static VALUE
-rb_str_intern(VALUE str, SEL sel)
-{
-    if (OBJ_TAINTED(str) && rb_safe_level() >= 1) {
-	rb_raise(rb_eSecurityError, "Insecure: can't intern tainted string");
-    }
-    return rb_str_intern_fast(str);
+    return str;
 }
 
 /*
  *  call-seq:
- *     str.ord   => integer
+ *     str.squeeze([other_str]*)    => new_str
  *  
- *  Return the <code>Integer</code> ordinal of a one-character string.
+ *  Builds a set of characters from the <i>other_str</i> parameter(s) using the
+ *  procedure described for <code>String#count</code>. Returns a new string
+ *  where runs of the same character that occur in this set are replaced by a
+ *  single character. If no arguments are given, all runs of identical
+ *  characters are replaced by a single character.
  *     
- *     "a".ord         #=> 97
+ *     "yellow moon".squeeze                  #=> "yelow mon"
+ *     "  now   is  the".squeeze(" ")         #=> " now is the"
+ *     "putters shoot balls".squeeze("m-z")   #=> "puters shot balls"
  */
 
 static VALUE
-rb_str_ord(VALUE s, SEL sel)
+rstr_squeeze(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    if (CFStringGetLength((CFStringRef)s) == 0) {
-	rb_raise(rb_eArgError, "empty string");
-    }
-    return INT2NUM(CFStringGetCharacterAtIndex((CFStringRef)s, 0));
+    str = rb_str_new3(str);
+    rstr_squeeze_bang(str, 0, argc, argv);
+    return str;
 }
 
 /*
  *  call-seq:
- *     str.sum(n=16)   => integer
+ *     str.tr!(from_str, to_str)   => str or nil
  *  
- *  Returns a basic <em>n</em>-bit checksum of the characters in <i>str</i>,
- *  where <em>n</em> is the optional <code>Fixnum</code> parameter, defaulting
- *  to 16. The result is simply the sum of the binary value of each character in
- *  <i>str</i> modulo <code>2n - 1</code>. This is not a particularly good
- *  checksum.
+ *  Translates <i>str</i> in place, using the same rules as
+ *  <code>String#tr</code>. Returns <i>str</i>, or <code>nil</code> if no
+ *  changes were made.
  */
 
 static VALUE
-rb_str_sum(VALUE str, SEL sel, int argc, VALUE *argv)
+translate(VALUE str, VALUE source, VALUE repl, bool sflag)
 {
-    VALUE vbits;
-    int bits;
-    const char *ptr, *p, *pend;
-    long len;
+    StringValue(source);
+    StringValue(repl);
 
-    if (argc == 0) {
-	bits = 16;
+    if (rb_str_chars_len(repl) == 0) {
+	return rstr_delete_bang(str, 0, 1, &source);
     }
-    else {
-	rb_scan_args(argc, argv, "01", &vbits);
-	bits = NUM2INT(vbits);
-    }
-    ptr = p = RSTRING_PTR(str);
-    len = RSTRING_LEN(str);
-    pend = p + len;
-    if (bits >= sizeof(long)*CHAR_BIT) {
-	VALUE sum = INT2FIX(0);
 
-	while (p < pend) {
-	    str_mod_check(str, ptr, len);
-	    sum = rb_funcall(sum, '+', 1, INT2FIX((unsigned char)*p));
-	    p++;
+    rstr_modify(str);
+
+    char tbl[0xff]; 
+    create_translate_charset_table(tbl, source, repl);
+
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+    rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+
+    bool modified = false;
+    for (long i = 0; i < chars_len; i++) {
+	UChar c = chars[i];
+	if (c < 0xff) {
+	    char repl = tbl[(c & 0xff)];
+	    if (repl != 0) {
+		chars[i] = repl;
+		modified = true;
+// TODO
+//		if (sflag) {
+//		}
+	    }
 	}
-	if (bits != 0) {
-	    VALUE mod;
+    } 
 
-	    mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits));
-	    mod = rb_funcall(mod, '-', 1, INT2FIX(1));
-	    sum = rb_funcall(sum, '&', 1, mod);
+    if (!modified) {
+	if (need_free) {
+	    free(chars);
 	}
-	return sum;
+	return Qnil;
     }
-    else {
-       unsigned long sum = 0;
 
-	while (p < pend) {
-	    str_mod_check(str, ptr, len);
-	    sum += (unsigned char)*p;
-	    p++;
-	}
-	if (bits != 0) {
-           sum &= (((unsigned long)1)<<bits)-1;
-	}
-	return rb_int2inum(sum);
+    if (need_free) {
+	str_replace_with_uchars(RSTR(str), chars, chars_len);
+	free(chars);
     }
+//    else {
+//	RSTR(str)->length_in_bytes = UCHARS_TO_BYTES(chars_len);
+//    }
+
+    return str;
 }
 
-static inline void
-rb_str_justify0(VALUE str, VALUE pad, long width, long padwidth, long index)
+static VALUE
+rstr_tr_bang(VALUE str, SEL sel, VALUE src, VALUE repl)
 {
-    do {
-	if (padwidth > width) {
-	    pad = (VALUE)CFStringCreateWithSubstring(
-		    NULL,
-		    (CFStringRef)pad,
-		    CFRangeMake(0, width));
-	    CFMakeCollectable((CFTypeRef)pad);
-	}
-	CFStringInsert((CFMutableStringRef)str, index, (CFStringRef)pad);
-	width -= padwidth;
-	index += padwidth;
-    }
-    while (width > 0);
+    return translate(str, src, repl, false);
 }
 
+/*
+ *  call-seq:
+ *     str.tr(from_str, to_str)   => new_str
+ *  
+ *  Returns a copy of <i>str</i> with the characters in <i>from_str</i> replaced
+ *  by the corresponding characters in <i>to_str</i>. If <i>to_str</i> is
+ *  shorter than <i>from_str</i>, it is padded with its last character. Both
+ *  strings may use the c1--c2 notation to denote ranges of characters, and
+ *  <i>from_str</i> may start with a <code>^</code>, which denotes all
+ *  characters except those listed.
+ *     
+ *     "hello".tr('aeiou', '*')    #=> "h*ll*"
+ *     "hello".tr('^aeiou', '*')   #=> "*e**o"
+ *     "hello".tr('el', 'ip')      #=> "hippo"
+ *     "hello".tr('a-y', 'b-z')    #=> "ifmmp"
+ */
+
 static VALUE
-rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
+rstr_tr(VALUE str, SEL sel, VALUE src, VALUE repl)
 {
-    VALUE w, pad;
-    long n, width, padwidth;
-
-    rb_scan_args(argc, argv, "11", &w, &pad);
-    width = NUM2LONG(w);
-
-    if (NIL_P(pad)) {
-	pad = rb_str_new(" ", 1);
-	padwidth = 1;
-    }
-    else {
-	StringValue(pad);
-	padwidth = CFStringGetLength((CFStringRef)pad);
-    }
-
-    if (padwidth == 0) {
-	rb_raise(rb_eArgError, "zero width padding");
-    }
-
-    n = CFStringGetLength((CFStringRef)str);
-   
     str = rb_str_new3(str);
-    if (width < 0 || width <= n) {
-	return str;
-    }
-    width -= n;
-
-    if (jflag == 'c') {
-	rb_str_justify0(str, pad, ceil(width / 2.0), padwidth, n);
-	rb_str_justify0(str, pad, floor(width / 2.0), padwidth, 0);
-    }
-    else if (jflag == 'l') {
-	rb_str_justify0(str, pad, width, padwidth, n);
-    }
-    else if (jflag == 'r') {
-	rb_str_justify0(str, pad, width, padwidth, 0);
-    }
-    else {
-	rb_bug("invalid jflag");
-    }
-
-    if (OBJ_TAINTED(pad)) {
-	OBJ_TAINT(str);
-    }
-
+    rstr_tr_bang(str, 0, src, repl);
     return str;
 }
 
-
 /*
  *  call-seq:
- *     str.ljust(integer, padstr=' ')   => new_str
+ *     str.tr_s!(from_str, to_str)   => str or nil
  *  
- *  If <i>integer</i> is greater than the length of <i>str</i>, returns a new
- *  <code>String</code> of length <i>integer</i> with <i>str</i> left justified
- *  and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
- *     
- *     "hello".ljust(4)            #=> "hello"
- *     "hello".ljust(20)           #=> "hello               "
- *     "hello".ljust(20, '1234')   #=> "hello123412341234123"
+ *  Performs <code>String#tr_s</code> processing on <i>str</i> in place,
+ *  returning <i>str</i>, or <code>nil</code> if no changes were made.
  */
 
 static VALUE
-rb_str_ljust(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_tr_s_bang(VALUE str, SEL sel, VALUE src, VALUE repl)
 {
-    return rb_str_justify(argc, argv, str, 'l');
+    return translate(str, src, repl, true);
 }
 
-
 /*
  *  call-seq:
- *     str.rjust(integer, padstr=' ')   => new_str
+ *     str.tr_s(from_str, to_str)   => new_str
  *  
- *  If <i>integer</i> is greater than the length of <i>str</i>, returns a new
- *  <code>String</code> of length <i>integer</i> with <i>str</i> right justified
- *  and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
+ *  Processes a copy of <i>str</i> as described under <code>String#tr</code>,
+ *  then removes duplicate characters in regions that were affected by the
+ *  translation.
  *     
- *     "hello".rjust(4)            #=> "hello"
- *     "hello".rjust(20)           #=> "               hello"
- *     "hello".rjust(20, '1234')   #=> "123412341234123hello"
+ *     "hello".tr_s('l', 'r')     #=> "hero"
+ *     "hello".tr_s('el', '*')    #=> "h*o"
+ *     "hello".tr_s('el', 'hx')   #=> "hhxo"
  */
 
 static VALUE
-rb_str_rjust(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_tr_s(VALUE str, SEL sel, VALUE src, VALUE repl)
 {
-    return rb_str_justify(argc, argv, str, 'r');
+    str = rb_str_new3(str);
+    rstr_tr_s_bang(str, 0, src, repl);
+    return str;
 }
 
-
 /*
  *  call-seq:
- *     str.center(integer, padstr)   => new_str
+ *     str.sum(n=16)   => integer
  *  
- *  If <i>integer</i> is greater than the length of <i>str</i>, returns a new
- *  <code>String</code> of length <i>integer</i> with <i>str</i> centered and
- *  padded with <i>padstr</i>; otherwise, returns <i>str</i>.
- *     
- *     "hello".center(4)         #=> "hello"
- *     "hello".center(20)        #=> "       hello        "
- *     "hello".center(20, '123') #=> "1231231hello12312312"
+ *  Returns a basic <em>n</em>-bit checksum of the characters in <i>str</i>,
+ *  where <em>n</em> is the optional <code>Fixnum</code> parameter, defaulting
+ *  to 16. The result is simply the sum of the binary value of each character in
+ *  <i>str</i> modulo <code>2n - 1</code>. This is not a particularly good
+ *  checksum.
  */
 
 static VALUE
-rb_str_center(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_sum(VALUE str, SEL sel, int argc, VALUE *argv)
 {
-    return rb_str_justify(argc, argv, str, 'c');
+    int bits = 16;
+    if (argc > 0) {
+	VALUE vbits;
+	rb_scan_args(argc, argv, "01", &vbits);
+	bits = NUM2INT(vbits);
+    }
+
+    if (bits >= sizeof(long) * CHAR_BIT) {
+	rb_raise(rb_eArgError, "bits argument too big");
+    }
+
+    unsigned long sum = 0;
+    for (long i = 0; i < RSTR(str)->length_in_bytes; i++) {
+	sum += (unsigned char)RSTR(str)->data.bytes[i];
+    }
+    if (bits != 0) {
+	sum &= (((unsigned long)1) << bits) - 1;
+    }
+
+    return rb_int2inum(sum);
 }
 
 /*
+ * call-seq:
+ *    str.hash   => fixnum
+ *
+ * Return a hash based on the string's length and content.
+ */
+
+static VALUE
+rstr_hash(VALUE str, SEL sel)
+{
+    return LONG2NUM(rb_str_hash(str));
+}
+
+/*
  *  call-seq:
  *     str.partition(sep)              => [head, sep, tail]
  *  
@@ -4757,35 +5071,36 @@
  */
 
 static VALUE
-rb_str_partition(VALUE str, SEL sel, VALUE sep)
+rstr_partition(VALUE str, SEL sel, VALUE sep)
 {
-    long pos;
-    int regex = Qfalse;
-    long strlen, seplen = 0;
+    long pos = 0;
+    long seplen = 0;
+    bool regex = false;
 
     if (TYPE(sep) == T_REGEXP) {
-	pos = rb_reg_search(sep, str, 0, 0);
-	regex = Qtrue;
+	pos = rb_reg_search(sep, str, 0, false);
+	regex = true;
     }
     else {
 	StringValue(sep);
-	pos = rb_str_index(str, sep, 0);
-	seplen = CFStringGetLength((CFStringRef)sep);
+	seplen = rb_str_chars_len(sep);
+	pos = str_index_for_string(RSTR(str), str_need_string(sep),
+		0, -1, false, true);
     }
     if (pos < 0) {
-      failed:
-	return rb_ary_new3(3, str, rb_str_new(0,0),rb_str_new(0,0));
+failed:
+	return rb_ary_new3(3, str, rb_str_new(NULL,0), rb_str_new(NULL,0));
     }
     if (regex) {
 	sep = rb_str_subpat(str, sep, 0);
-	seplen = CFStringGetLength((CFStringRef)sep);
-	if (pos == 0 && seplen == 0) goto failed;
+	seplen = rb_str_chars_len(sep);
+	if (pos == 0 && seplen == 0) {
+	    goto failed;
+	}
     }
-    strlen = CFStringGetLength((CFStringRef)str);
-    return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
-		          sep,
-		          rb_str_subseq(str, pos+seplen,
-					     strlen-pos-seplen));
+    const long len = rb_str_chars_len(str);
+    return rb_ary_new3(3, rstr_substr(str, 0, pos), sep,
+	    rstr_substr(str, pos + seplen, len - pos - seplen));
 }
 
 /*
@@ -4802,1049 +5117,950 @@
  */
 
 static VALUE
-rb_str_rpartition(VALUE str, SEL sel, VALUE sep)
+rstr_rpartition(VALUE str, SEL sel, VALUE sep)
 {
-    long pos = RSTRING_LEN(str);
-    int regex = Qfalse;
-    long seplen;
+    const long len = rb_str_chars_len(str);
+    long pos = len;
+    bool regex = false;
 
     if (TYPE(sep) == T_REGEXP) {
-	pos = rb_reg_search(sep, str, pos, 1);
-	regex = Qtrue;
+	pos = rb_reg_search(sep, str, pos, true);
+	regex = true;
     }
     else {
-	VALUE tmp;
-
-	tmp = rb_check_string_type(sep);
-	if (NIL_P(tmp)) {
-	    rb_raise(rb_eTypeError, "type mismatch: %s given",
-		     rb_obj_classname(sep));
-	}
-	pos = rb_str_sublen(str, pos);
-	pos = rb_str_rindex(str, sep, pos);
+	StringValue(sep);
+	pos = str_index_for_string(RSTR(str), str_need_string(sep),
+		0, -1, true, true);
     }
     if (pos < 0) {
-	return rb_ary_new3(3, rb_str_new(0,0),rb_str_new(0,0), str);
+failed:
+	return rb_ary_new3(3, rb_str_new(NULL, 0), rb_str_new(NULL,0), str);
     }
     if (regex) {
 	sep = rb_reg_nth_match(0, rb_backref_get());
-	if (sep == Qnil)
-	    return rb_ary_new3(3, rb_str_new(0,0),rb_str_new(0,0), str);
+	if (sep == Qnil) {
+	    goto failed;
+	}
     }
-    seplen = RSTRING_LEN(sep);
-    return rb_ary_new3(3, rb_str_substr(str, 0, pos),
-		          sep,
-		          rb_str_substr(str, pos + seplen, seplen));
+    const long seplen = rb_str_chars_len(sep);
+    return rb_ary_new3(3, rstr_substr(str, 0, pos), sep,
+	    rstr_substr(str, pos + seplen, len - pos - seplen));
 }
 
 /*
  *  call-seq:
- *     str.start_with?([prefix]+)   => true or false
+ *     str.crypt(other_str)   => new_str
  *  
- *  Returns true if <i>str</i> starts with the prefix given.
+ *  Applies a one-way cryptographic hash to <i>str</i> by invoking the standard
+ *  library function <code>crypt</code>. The argument is the salt string, which
+ *  should be two characters long, each character drawn from
+ *  <code>[a-zA-Z0-9./]</code>.
  */
 
 static VALUE
-rb_str_start_with(VALUE str, SEL sel, int argc, VALUE *argv)
+rstr_crypt(VALUE str, SEL sel, VALUE salt)
 {
-    int i;
+    StringValue(salt);
+    if (RSTRING_LEN(salt) < 2) {
+	rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
+    }
 
-    for (i = 0; i < argc; i++) {
-	VALUE tmp = rb_check_string_type(argv[i]);
-	if (NIL_P(tmp)) {
-	    continue;
-	}
-	if (CFStringHasPrefix((CFStringRef)str, (CFStringRef)tmp)) {
-	    return Qtrue;
-	}
+    VALUE crypted = rb_str_new2(crypt(RSTRING_PTR(str), RSTRING_PTR(salt)));
+    if (OBJ_TAINTED(str) || OBJ_TAINTED(salt)) {
+	OBJ_TAINT(crypted);
     }
-    return Qfalse;
+    return crypted;
 }
 
-/*
- *  call-seq:
- *     str.end_with?([suffix]+)   => true or false
- *  
- *  Returns true if <i>str</i> ends with the suffix given.
- */
+// NSString primitives.
 
-static VALUE
-rb_str_end_with(VALUE str, SEL sel, int argc, VALUE *argv)
+static void
+check_bounds(void *rcv, long pos, bool can_be_end)
 {
-    int i;
-
-    for (i = 0; i < argc; i++) {
-	VALUE tmp = rb_check_string_type(argv[i]);
-	if (NIL_P(tmp)) {
-	    continue;
+    const long len = str_length(RSTR(rcv), true);
+    if (pos >= 0) {
+	if (can_be_end) {
+	    if (pos <= len) {
+		return;
+	    }
 	}
-	if (CFStringHasSuffix((CFStringRef)str, (CFStringRef)tmp)) {
-	    return Qtrue;
+	else if (pos < len) {
+	    return;
 	}
     }
-    return Qfalse;
+
+    char buf[100];
+    snprintf(buf, sizeof buf, "Position (%ld) out of bounds (%ld)",
+	    pos, len);
+    rb_objc_exception_raise("NSRangeException", buf);
 }
 
-void
-rb_str_setter(VALUE val, ID id, VALUE *var)
+static CFIndex
+rstr_imp_length(void *rcv, SEL sel)
 {
-    if (!NIL_P(val) && TYPE(val) != T_STRING) {
-	rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
-    }
-    *var = val;
+    return str_length(RSTR(rcv), true);
 }
 
+static UniChar
+rstr_imp_characterAtIndex(void *rcv, SEL sel, CFIndex idx)
+{
+    check_bounds(rcv, idx, false);
+    return str_get_uchar(RSTR(rcv), idx, true);
+}
 
-/*
- *  call-seq:
- *     str.force_encoding(encoding)   => str
- *
- *  Changes the encoding to +encoding+ and returns self.
- */
-
-static VALUE
-rb_str_force_encoding(VALUE str, SEL sel, VALUE enc)
+static void
+rstr_imp_getCharactersRange(void *rcv, SEL sel, UniChar *buffer, CFRange range)
 {
-    // TODO
-    str_modifiable(str);
-    return str;
+    check_bounds(rcv, range.location + range.length, true);
+    if (range.length > 0) {
+	if (str_try_making_data_uchars(RSTR(rcv))) {
+	    memcpy(buffer, &RSTR(rcv)->data.uchars[range.location],
+		    sizeof(UniChar) * range.length);
+	}
+	else {
+	    for (long i = range.location, j = 0;
+		    i < range.location + range.length;
+		    i++, j++) {
+		buffer[j] = RSTR(rcv)->data.bytes[i];
+	    }
+	}
+    }
 }
 
-/*
- *  call-seq:
- *     str.valid_encoding?  => true or false
- *  
- *  Returns true for a string which encoded correctly.
- *
- *    "\xc2\xa1".force_encoding("UTF-8").valid_encoding? => true
- *    "\xc2".force_encoding("UTF-8").valid_encoding? => false
- *    "\x80".force_encoding("UTF-8").valid_encoding? => false
- */
-
-static VALUE
-rb_str_valid_encoding_p(VALUE str, SEL sel)
+static void
+rstr_imp_replaceCharactersInRangeWithString(void *rcv, SEL sel, CFRange range,
+	void *str)
 {
-    return *(VALUE *)str == rb_cByteString ? Qfalse : Qtrue;
+    check_bounds(rcv, range.location + range.length, true);
+    rb_str_t *spat = str_need_string((VALUE)str);
+    str_splice(RSTR(rcv), range.location, range.length, spat, true);
 }
 
-/*
- *  call-seq:
- *     str.ascii_only?  => true or false
- *  
- *  Returns true for a string which has only ASCII characters.
- *
- *    "abc".force_encoding("UTF-8").ascii_only? => true
- *    "abc\u{6666}".force_encoding("UTF-8").ascii_only? => false
- */
-
+// :nodoc:
 static VALUE
-rb_str_is_ascii_only_p(VALUE str, SEL sel)
+nsdata_to_str(VALUE data, SEL sel)
 {
-	CFCharacterSetRef ascii = CFCharacterSetCreateWithCharactersInRange(NULL, CFRangeMake(0, 128));
-	CFCharacterSetRef this = CFCharacterSetCreateWithCharactersInString(NULL, (CFStringRef)str);
-	Boolean b = CFCharacterSetIsSupersetOfSet(ascii, this);
-	CFRelease(ascii); CFRelease(this);
-	return (b ? Qtrue : Qfalse);
+    CFDataRef dataref = (CFDataRef)data;
+    return rb_bstr_new_with_data(CFDataGetBytePtr(dataref),
+	    CFDataGetLength(dataref));
 }
 
-static VALUE
-rb_str_transform_bang(VALUE str, SEL sel, VALUE transform_name)
+void
+Init_String(void)
 {
-    CFRange range;
+    // TODO create NSString.m
+    rb_cNSString = (VALUE)objc_getClass("NSString");
+    assert(rb_cNSString != 0);
+    rb_cString = rb_cNSString;
+    rb_include_module(rb_cString, rb_mComparable);
+    rb_cNSMutableString = (VALUE)objc_getClass("NSMutableString");
+    assert(rb_cNSMutableString != 0);
 
-    rb_str_modify(str);
-    StringValue(transform_name);
-    
-    range = CFRangeMake(0, RSTRING_LEN(str));
+    // rb_cRubyString is defined earlier in Init_PreVM().
+    rb_set_class_path(rb_cRubyString, rb_cObject, "String");
+    rb_const_set(rb_cObject, rb_intern("String"), rb_cRubyString);
 
-    if (!CFStringTransform((CFMutableStringRef)str, 
-		&range,
-		(CFStringRef)transform_name,
-		false)) {
-	rb_raise(rb_eRuntimeError, "cannot apply transformation `%s' to `%s'",
-		RSTRING_PTR(transform_name), RSTRING_PTR(str));
-    }
+    rb_objc_define_method(*(VALUE *)rb_cRubyString, "alloc", rstr_alloc, 0);
+    rb_objc_define_method(*(VALUE *)rb_cRubyString, "try_convert",
+	    rstr_try_convert, 1);
+    rb_objc_define_method(rb_cRubyString, "initialize", rstr_initialize, -1);
+    rb_objc_define_method(rb_cRubyString, "initialize_copy", rstr_replace, 1);
+    rb_objc_define_method(rb_cRubyString, "dup", rstr_dup, 0);
+    rb_objc_define_method(rb_cRubyString, "clone", rstr_clone, 0);
+    rb_objc_define_method(rb_cRubyString, "replace", rstr_replace, 1);
+    rb_objc_define_method(rb_cRubyString, "clear", rstr_clear, 0);
+    rb_objc_define_method(rb_cRubyString, "encoding", rstr_encoding, 0);
+    rb_objc_define_method(rb_cRubyString, "size", rstr_length, 0);
+    rb_objc_define_method(rb_cRubyString, "empty?", rstr_empty, 0);
+    rb_objc_define_method(rb_cRubyString, "bytesize", rstr_bytesize, 0);
+    rb_objc_define_method(rb_cRubyString, "getbyte", rstr_getbyte, 1);
+    rb_objc_define_method(rb_cRubyString, "setbyte", rstr_setbyte, 2);
+    rb_objc_define_method(rb_cRubyString, "force_encoding",
+	    rstr_force_encoding, 1);
+    rb_objc_define_method(rb_cRubyString, "valid_encoding?",
+	    rstr_is_valid_encoding, 0);
+    rb_objc_define_method(rb_cRubyString, "ascii_only?", rstr_is_ascii_only, 0);
+    rb_objc_define_method(rb_cRubyString, "[]", rstr_aref, -1);
+    rb_objc_define_method(rb_cRubyString, "[]=", rstr_aset, -1);
+    rb_objc_define_method(rb_cRubyString, "slice", rstr_aref, -1);
+    rb_objc_define_method(rb_cRubyString, "insert", rstr_insert, 2);
+    rb_objc_define_method(rb_cRubyString, "index", rstr_index, -1);
+    rb_objc_define_method(rb_cRubyString, "rindex", rstr_rindex, -1);
+    rb_objc_define_method(rb_cRubyString, "+", rstr_plus, 1);
+    rb_objc_define_method(rb_cRubyString, "*", rstr_times, 1);
+    rb_objc_define_method(rb_cRubyString, "%", rstr_format, 1);
+    rb_objc_define_method(rb_cRubyString, "<<", rstr_concat, 1);
+    rb_objc_define_method(rb_cRubyString, "concat", rstr_concat, 1);
+    rb_objc_define_method(rb_cRubyString, "==", rstr_equal, 1);
+    rb_objc_define_method(rb_cRubyString, "<=>", rstr_cmp, 1);
+    rb_objc_define_method(rb_cRubyString, "casecmp", rstr_casecmp, 1);
+    rb_objc_define_method(rb_cRubyString, "eql?", rstr_eql, 1);
+    rb_objc_define_method(rb_cRubyString, "include?", rstr_includes, 1);
+    rb_objc_define_method(rb_cRubyString, "start_with?", rstr_start_with, -1);
+    rb_objc_define_method(rb_cRubyString, "end_with?", rstr_end_with, -1);
+    rb_objc_define_method(rb_cRubyString, "to_s", rstr_to_s, 0);
+    rb_objc_define_method(rb_cRubyString, "to_str", rstr_to_s, 0);
+    rb_objc_define_method(rb_cRubyString, "to_sym", rstr_intern, 0);
+    rb_objc_define_method(rb_cRubyString, "intern", rstr_intern, 0);
+    rb_objc_define_method(rb_cRubyString, "inspect", rstr_inspect, 0);
+    rb_objc_define_method(rb_cRubyString, "dump", rstr_dump, 0);
+    rb_objc_define_method(rb_cRubyString, "match", rstr_match2, -1);
+    rb_objc_define_method(rb_cRubyString, "=~", rstr_match, 1);
+    rb_objc_define_method(rb_cRubyString, "scan", rstr_scan, 1);
+    rb_objc_define_method(rb_cRubyString, "split", rstr_split, -1);
+    rb_objc_define_method(rb_cRubyString, "to_i", rstr_to_i, -1);
+    rb_objc_define_method(rb_cRubyString, "hex", rstr_hex, 0);
+    rb_objc_define_method(rb_cRubyString, "oct", rstr_oct, 0);
+    rb_objc_define_method(rb_cRubyString, "ord", rstr_ord, 0);
+    rb_objc_define_method(rb_cRubyString, "chr", rstr_chr, 0);
+    rb_objc_define_method(rb_cRubyString, "to_f", rstr_to_f, 0);
+    rb_objc_define_method(rb_cRubyString, "chomp", rstr_chomp, -1);
+    rb_objc_define_method(rb_cRubyString, "chomp!", rstr_chomp_bang, -1);
+    rb_objc_define_method(rb_cRubyString, "chop", rstr_chop, -1);
+    rb_objc_define_method(rb_cRubyString, "chop!", rstr_chop_bang, -1);
+    rb_objc_define_method(rb_cRubyString, "sub", rstr_sub, -1);
+    rb_objc_define_method(rb_cRubyString, "sub!", rstr_sub_bang, -1);
+    rb_objc_define_method(rb_cRubyString, "gsub", rstr_gsub, -1);
+    rb_objc_define_method(rb_cRubyString, "gsub!", rstr_gsub_bang, -1);
+    rb_objc_define_method(rb_cRubyString, "downcase", rstr_downcase, 0);
+    rb_objc_define_method(rb_cRubyString, "downcase!", rstr_downcase_bang, 0);
+    rb_objc_define_method(rb_cRubyString, "upcase", rstr_upcase, 0);
+    rb_objc_define_method(rb_cRubyString, "upcase!", rstr_upcase_bang, 0);
+    rb_objc_define_method(rb_cRubyString, "swapcase", rstr_swapcase, 0);
+    rb_objc_define_method(rb_cRubyString, "swapcase!", rstr_swapcase_bang, 0);
+    rb_objc_define_method(rb_cRubyString, "capitalize", rstr_capitalize, 0);
+    rb_objc_define_method(rb_cRubyString, "capitalize!",
+	    rstr_capitalize_bang, 0);
+    rb_objc_define_method(rb_cRubyString, "ljust", rstr_ljust, -1);
+    rb_objc_define_method(rb_cRubyString, "rjust", rstr_rjust, -1);
+    rb_objc_define_method(rb_cRubyString, "center", rstr_center, -1);
+    rb_objc_define_method(rb_cRubyString, "strip", rstr_strip, 0);
+    rb_objc_define_method(rb_cRubyString, "lstrip", rstr_lstrip, 0);
+    rb_objc_define_method(rb_cRubyString, "rstrip", rstr_rstrip, 0);
+    rb_objc_define_method(rb_cRubyString, "strip!", rstr_strip_bang, 0);
+    rb_objc_define_method(rb_cRubyString, "lstrip!", rstr_lstrip_bang, 0);
+    rb_objc_define_method(rb_cRubyString, "rstrip!", rstr_rstrip_bang, 0);
+    rb_objc_define_method(rb_cRubyString, "lines", rstr_each_line, -1);
+    rb_objc_define_method(rb_cRubyString, "each_line", rstr_each_line, -1);
+    rb_objc_define_method(rb_cRubyString, "chars", rstr_each_char, 0);
+    rb_objc_define_method(rb_cRubyString, "each_char", rstr_each_char, 0);
+    rb_objc_define_method(rb_cRubyString, "bytes", rstr_each_byte, 0);
+    rb_objc_define_method(rb_cRubyString, "each_byte", rstr_each_byte, 0);
+    rb_objc_define_method(rb_cRubyString, "succ", rstr_succ, 0);
+    rb_objc_define_method(rb_cRubyString, "succ!", rstr_succ_bang, 0);
+    rb_objc_define_method(rb_cRubyString, "next", rstr_succ, 0);
+    rb_objc_define_method(rb_cRubyString, "next!", rstr_succ_bang, 0);
+    rb_objc_define_method(rb_cRubyString, "upto", rstr_upto, -1);
+    rb_objc_define_method(rb_cRubyString, "reverse", rstr_reverse, 0);
+    rb_objc_define_method(rb_cRubyString, "reverse!", rstr_reverse_bang, 0);
+    rb_objc_define_method(rb_cRubyString, "count", rstr_count, -1);
+    rb_objc_define_method(rb_cRubyString, "delete", rstr_delete, -1);
+    rb_objc_define_method(rb_cRubyString, "delete!", rstr_delete_bang, -1);
+    rb_objc_define_method(rb_cRubyString, "squeeze", rstr_squeeze, -1);
+    rb_objc_define_method(rb_cRubyString, "squeeze!", rstr_squeeze_bang, -1);
+    rb_objc_define_method(rb_cRubyString, "tr", rstr_tr, 2);
+    rb_objc_define_method(rb_cRubyString, "tr!", rstr_tr_bang, 2);
+    rb_objc_define_method(rb_cRubyString, "tr_s", rstr_tr_s, 2);
+    rb_objc_define_method(rb_cRubyString, "tr_s!", rstr_tr_s_bang, 2);
+    rb_objc_define_method(rb_cRubyString, "sum", rstr_sum, -1);
+    rb_objc_define_method(rb_cRubyString, "hash", rstr_hash, 0);
+    rb_objc_define_method(rb_cRubyString, "partition", rstr_partition, 1);
+    rb_objc_define_method(rb_cRubyString, "rpartition", rstr_rpartition, 1);
+    rb_objc_define_method(rb_cRubyString, "crypt", rstr_crypt, 1);
 
-    return range.length == kCFNotFound ? Qnil : str;
-}
+    // MacRuby extensions.
+    rb_objc_define_method(rb_cRubyString, "transform", rstr_transform, 1);
 
-static VALUE
-rb_str_transform(VALUE str, SEL sel, VALUE transform_name)
-{
-    str = rb_str_dup(str);
-    rb_str_transform_bang(str, 0, transform_name);
-    return str;
-}
+    // MacRuby extensions (debugging).
+    rb_objc_define_method(rb_cRubyString, "__chars_count__",
+	    rstr_chars_count, 0);
+    rb_objc_define_method(rb_cRubyString, "__getchar__", rstr_getchar, 1);
+    rb_objc_define_method(rb_cRubyString, "__stored_in_uchars__?",
+	    rstr_is_stored_in_uchars, 0);
 
-/**********************************************************************
- * Document-class: Symbol
- *
- *  <code>Symbol</code> objects represent names and some strings
- *  inside the Ruby
- *  interpreter. They are generated using the <code>:name</code> and
- *  <code>:"string"</code> literals
- *  syntax, and by the various <code>to_sym</code> methods. The same
- *  <code>Symbol</code> object will be created for a given name or string
- *  for the duration of a program's execution, regardless of the context
- *  or meaning of that name. Thus if <code>Fred</code> is a constant in
- *  one context, a method in another, and a class in a third, the
- *  <code>Symbol</code> <code>:Fred</code> will be the same object in
- *  all three contexts.
- *     
- *     module One
- *       class Fred
- *       end
- *       $f1 = :Fred
- *     end
- *     module Two
- *       Fred = 1
- *       $f2 = :Fred
- *     end
- *     def Fred()
- *     end
- *     $f3 = :Fred
- *     $f1.object_id   #=> 2514190
- *     $f2.object_id   #=> 2514190
- *     $f3.object_id   #=> 2514190
- *     
- */
+    // Cocoa primitives.
+    rb_objc_install_method2((Class)rb_cRubyString, "length",
+	    (IMP)rstr_imp_length);
+    rb_objc_install_method2((Class)rb_cRubyString, "characterAtIndex:",
+	    (IMP)rstr_imp_characterAtIndex);
+    rb_objc_install_method2((Class)rb_cRubyString, "getCharacters:range:",
+	    (IMP)rstr_imp_getCharactersRange);
+    rb_objc_install_method2((Class)rb_cRubyString,
+	    "replaceCharactersInRange:withString:", 
+	    (IMP)rstr_imp_replaceCharactersInRangeWithString);
 
+    rb_fs = Qnil;
+    rb_define_variable("$;", &rb_fs);
+    rb_define_variable("$-F", &rb_fs);
 
-/*
- *  call-seq:
- *     sym == obj   => true or false
- *  
- *  Equality---If <i>sym</i> and <i>obj</i> are exactly the same
- *  symbol, returns <code>true</code>. Otherwise, compares them
- *  as strings.
- */
-
-static VALUE
-sym_equal(VALUE sym1, SEL sel, VALUE sym2)
-{
-    return sym1 == sym2 ? Qtrue : Qfalse;
+    // NSData extensions.
+    VALUE NSData = (VALUE)objc_getClass("NSData");
+    assert(NSData != 0);
+    rb_objc_define_method(NSData, "to_str", nsdata_to_str, 0);
 }
 
-static VALUE
-sym_cmp(VALUE sym1, SEL sel, VALUE sym2)
+bool
+rb_objc_str_is_pure(VALUE str)
 {
-    int code;
-    if (CLASS_OF(sym2) != rb_cSymbol) {
-	return Qnil;
+    VALUE k = *(VALUE *)str;
+    while (RCLASS_SINGLETON(k)) {
+        k = RCLASS_SUPER(k);
     }
-    code = strcmp(RSYMBOL(sym1)->str, RSYMBOL(sym2)->str);
-    if (code > 0) {
-	code = 1;
+    if (k == rb_cRubyString) {
+        return true;
     }
-    else if (code < 0) {
-	code = -1;
+    while (k != 0) {
+        if (k == rb_cRubyString) {
+            return false;
+        }
+        k = RCLASS_SUPER(k);
     }
-    return INT2FIX(code);
+    return true;
 }
 
-/*
- *  call-seq:
- *     sym.inspect    => string
- *  
- *  Returns the representation of <i>sym</i> as a symbol literal.
- *     
- *     :fred.inspect   #=> ":fred"
- */
-
-static inline bool
-sym_printable(const char *str, long len)
+void
+rb_objc_install_string_primitives(Class klass)
 {
-    // TODO multibyte symbols
-    long i;
-    for (i = 0; i < len; i++) {
-	if (!isprint(str[i])) {
-	    return false;
-	}
-    }
-    return true;
+    // TODO
 }
 
-static VALUE
-sym_inspect(VALUE sym, SEL sel)
-{
-    const char *symstr = RSYMBOL(sym)->str;
+// ByteString emulation.
 
-    long len = strlen(symstr);
-    if (len == 0) {
-	return rb_str_new2(":\"\"");
-    }
+#define IS_BSTR(obj) (IS_RSTR(obj) && !str_is_stored_in_uchars(RSTR(obj)))
 
-    VALUE str = rb_str_new2(":");
-    if (!rb_symname_p(symstr) || !sym_printable(symstr, len)) {
-	rb_str_buf_cat2(str, "\"");
-	rb_str_buf_append(str, sym);
-	rb_str_buf_cat2(str, "\"");
+VALUE
+rb_str_bstr(VALUE str)
+{
+    if (IS_RSTR(str)) {
+	str_make_data_binary(RSTR(str));
+	return str;
     }
-    else {
-	rb_str_buf_append(str, sym);
-    }
+    abort(); // TODO
+}
 
-    return str;
+uint8_t *
+rb_bstr_bytes(VALUE str)
+{
+    assert(IS_BSTR(str));
+    return (uint8_t *)RSTR(str)->data.bytes;
 }
 
+VALUE
+rb_bstr_new_with_data(const uint8_t *bytes, long len)
+{
+    rb_str_t *str = str_alloc(rb_cRubyString);
+    str_replace_with_bytes(str, (char *)bytes, len,
+	    rb_encodings[ENCODING_BINARY]);
+    return (VALUE)str;
+}
 
-/*
- *  call-seq:
- *     sym.id2name   => string
- *     sym.to_s      => string
- *  
- *  Returns the name or string corresponding to <i>sym</i>.
- *     
- *     :fred.id2name   #=> "fred"
- */
+VALUE
+rb_bstr_new(void)
+{
+    return rb_bstr_new_with_data(NULL, 0);
+}
 
+long
+rb_bstr_length(VALUE str)
+{
+    assert(IS_BSTR(str));
+    return RSTR(str)->length_in_bytes;
+}
 
-static VALUE
-rb_sym_to_s_imp(VALUE sym, SEL sel)
+void
+rb_bstr_concat(VALUE str, const uint8_t *bytes, long len)
 {
-    return rb_str_new2(RSYMBOL(sym)->str);
+    assert(IS_BSTR(str));
+    str_concat_bytes(RSTR(str), (const char *)bytes, len);
 }
 
-VALUE
-rb_sym_to_s(VALUE sym)
+void
+rb_bstr_resize(VALUE str, long capa)
 {
-    return rb_sym_to_s_imp(sym, 0);
+    assert(IS_BSTR(str));
+    str_resize_bytes(RSTR(str), capa);
+    RSTR(str)->length_in_bytes = capa;
 }
 
-/*
- * call-seq:
- *   sym.to_sym   => sym
- *   sym.intern   => sym
- *
- * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
- * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
- * in this case.
- */
-
-static VALUE
-sym_to_sym(VALUE sym, SEL sel)
+void
+rb_bstr_set_length(VALUE str, long len)
 {
-    return sym;
+    assert(IS_BSTR(str));
+    assert(len <= RSTR(str)->capacity_in_bytes);
+    RSTR(str)->length_in_bytes = len;
 }
 
-/*
- * call-seq:
- *   sym.to_proc
- *
- * Returns a _Proc_ object which respond to the given method by _sym_.
- *
- *   (1..3).collect(&:to_s)  #=> ["1", "2", "3"]
- */
+// Compiler primitives.
 
-static VALUE
-sym_to_proc(VALUE sym, SEL sel)
+VALUE
+rb_str_new_empty(void)
 {
-    SEL msel = sel_registerName(rb_id2name(SYM2ID(sym)));
-    rb_vm_block_t *b = rb_vm_create_block_calling_sel(msel);
-    return rb_proc_alloc_with_block(rb_cProc, b);
+    return (VALUE)str_alloc(rb_cRubyString);
 }
 
-ID
-rb_to_id(VALUE name)
+VALUE
+rb_unicode_str_new(const UniChar *ptr, const size_t len)
 {
-    VALUE tmp;
-    ID id;
+    VALUE str = rb_str_new_empty();
+    str_replace_with_uchars(RSTR(str), ptr, len);
+    return str;
+}
 
-    switch (TYPE(name)) {
-      default:
-	tmp = rb_check_string_type(name);
-	if (NIL_P(tmp)) {
-	    rb_raise(rb_eTypeError, "%s is not a symbol",
-		     RSTRING_PTR(rb_inspect(name)));
+VALUE
+rb_str_new_fast(int argc, ...)
+{
+    VALUE str = (VALUE)str_alloc(rb_cRubyString);
+
+    if (argc > 0) {
+	va_list ar;
+	va_start(ar, argc);
+	for (int i = 0; i < argc; ++i) {
+	    VALUE fragment = va_arg(ar, VALUE);
+	    switch (TYPE(fragment)) {
+		default:
+		    fragment = rb_obj_as_string(fragment);
+		    // fall through
+
+		case T_STRING:
+		    rstr_concat(str, 0, fragment);
+		    break;
+	    }
 	}
-	name = tmp;
-	/* fall through */
-      case T_STRING:
-	name = rb_str_intern(name, 0);
-	/* fall through */
-      case T_SYMBOL:
-	return SYM2ID(name);
+	va_end(ar);
     }
-    return id;
+
+    return str;
 }
 
-#define PREPARE_RCV(x) \
-    Class old = *(Class *)x; \
-    *(Class *)x = (Class)rb_cCFString;
+VALUE
+rb_str_intern_fast(VALUE str)
+{
+    // TODO: this currently does 2 hash lookups, could be optimized.
+    return ID2SYM(rb_intern_str(str));
+}
 
-#define RESTORE_RCV(x) \
-    *(Class *)x = old;
+// MRI C-API compatibility.
 
-bool
-rb_objc_str_is_pure(VALUE str)
+VALUE
+rb_enc_str_new(const char *cstr, long len, rb_encoding_t *enc)
 {
-    return *(Class *)str == (Class)rb_cCFString;
+    // XXX should we assert that enc is single byte?
+    if (enc == NULL) {
+	// This function can be called with a NULL encoding. 
+	enc = rb_encodings[ENCODING_UTF8];
+    }
+    else {
+	// People must use the bstr_ APIs to deal with binary.
+	assert(enc != rb_encodings[ENCODING_BINARY]);
+    }
+    rb_str_t *str = str_alloc(rb_cRubyString);
+    str_replace_with_bytes(str, cstr, len, enc);
+    return (VALUE)str;
 }
 
-static CFIndex
-imp_rb_str_length(void *rcv, SEL sel)
+VALUE
+rb_str_new(const char *cstr, long len)
 {
-    CFIndex length;
-    PREPARE_RCV(rcv);
-    length = CFStringGetLength((CFStringRef)rcv);
-    RESTORE_RCV(rcv);
-    return length;
+    return rb_enc_str_new(cstr, len, rb_encodings[ENCODING_UTF8]);
 }
 
-static UniChar
-imp_rb_str_characterAtIndex(void *rcv, SEL sel, CFIndex idx)
+VALUE
+rb_str_buf_new(long len)
 {
-    UniChar character;
-    PREPARE_RCV(rcv);
-    character = CFStringGetCharacterAtIndex((CFStringRef)rcv, idx);
-    RESTORE_RCV(rcv);
-    return character;
+    return rb_str_new(NULL, len);
 }
 
-static void
-imp_rb_str_getCharactersRange(void *rcv, SEL sel, UniChar *buffer, 
-			      CFRange range)
+VALUE
+rb_str_new2(const char *cstr)
 {
-    PREPARE_RCV(rcv);
-    CFStringGetCharacters((CFStringRef)rcv, range, buffer);
-    RESTORE_RCV(rcv);
+    return rb_str_new(cstr, strlen(cstr));
 }
 
-static void
-imp_rb_str_replaceCharactersInRangeWithString(void *rcv, SEL sel, 
-					      CFRange range, void *str)
+VALUE
+rb_str_new3(VALUE source)
 {
-    PREPARE_RCV(rcv);
-    CFStringReplace((CFMutableStringRef)rcv, range, (CFStringRef)str);
-    RESTORE_RCV(rcv);
+    rb_str_t *str = str_alloc(rb_obj_class(source));
+    str_replace(str, source);
+    if (OBJ_TAINTED(source)) {
+	OBJ_TAINT(str);
+    }
+    return (VALUE)str;
 }
 
-static const UniChar *
-imp_rb_str_fastCharacterContents(void *rcv, SEL sel)
+VALUE
+rb_str_new4(VALUE source)
 {
-    const UniChar *ptr;
-    PREPARE_RCV(rcv);
-    ptr = CFStringGetCharactersPtr((CFStringRef)rcv);
-    RESTORE_RCV(rcv);
-    return ptr;
+    VALUE str = rb_str_new3(source);
+    OBJ_FREEZE(str);
+    return str;
 }
 
-static const char *
-imp_rb_str_fastCStringContents(void *rcv, SEL sel, bool nullTerminaisonRequired)
+VALUE
+rb_str_new5(VALUE source, const char *cstr, long len)
 {
-    const char *cstr;
-    PREPARE_RCV(rcv);
-    cstr = CFStringGetCStringPtr((CFStringRef)rcv, 0);
-    /* XXX nullTerminaisonRequired should perhaps be honored */
-    RESTORE_RCV(rcv);
-    return cstr;
+    rb_str_t *str = str_alloc(rb_obj_class(source));
+    str_replace_with_bytes(str, cstr, len, rb_encodings[ENCODING_UTF8]);
+    return (VALUE)str;
 }
 
-static CFStringEncoding
-imp_rb_str_fastestEncodingInCFStringEncoding(void *rcv, SEL sel)
+VALUE
+rb_tainted_str_new(const char *cstr, long len)
 {
-    CFStringEncoding encoding;
-    PREPARE_RCV(rcv);
-    encoding =  CFStringGetFastestEncoding((CFStringRef)rcv);
-    RESTORE_RCV(rcv);
-    return encoding;
+    VALUE str = rb_str_new(cstr, len);
+    OBJ_TAINT(str);
+    return str;
 }
 
-static bool
-imp_rb_str_isEqual(void *rcv, SEL sel, void *other)
+VALUE
+rb_tainted_str_new2(const char *cstr)
 {
-    bool flag;
-    PREPARE_RCV(rcv);
-    flag = (other != NULL) && CFEqual((CFTypeRef)rcv, (CFTypeRef)other);    
-    RESTORE_RCV(rcv);
-    return flag;
+    return rb_tainted_str_new(cstr, strlen(cstr));
 }
 
-static void *
-imp_rb_str_copy(void *rcv, SEL sel)
+VALUE
+rb_usascii_str_new(const char *cstr, long len)
 {
-    void *dup;
-    PREPARE_RCV(rcv);
-    dup = (void *)objc_msgSend(rcv, selCopy);
-    RESTORE_RCV(rcv);
-    return dup;
+    VALUE str = rb_str_new(cstr, len);
+    RSTR(str)->encoding = rb_encodings[ENCODING_ASCII];
+    return str;
 }
 
-static void *
-imp_rb_str_mutableCopy(void *rcv, SEL sel)
+VALUE
+rb_usascii_str_new2(const char *cstr)
 {
-    void *dup;
-    PREPARE_RCV(rcv);
-    dup = (void *)objc_msgSend(rcv, selMutableCopy);
-    RESTORE_RCV(rcv);
-    return dup;
+    return rb_usascii_str_new(cstr, strlen(cstr));
 }
 
-void
-rb_objc_install_string_primitives(Class klass)
+const char *
+rb_str_cstr(VALUE str)
 {
-    rb_objc_install_method2(klass, "length", (IMP)imp_rb_str_length);
-    rb_objc_install_method2(klass, "characterAtIndex:",
-	    (IMP)imp_rb_str_characterAtIndex);
-    rb_objc_install_method2(klass, "getCharacters:range:",
-	    (IMP)imp_rb_str_getCharactersRange);
-    rb_objc_install_method2(klass, "_fastCharacterContents",
-	    (IMP)imp_rb_str_fastCharacterContents);
-    rb_objc_install_method2(klass, "_fastCStringContents:",
-	    (IMP)imp_rb_str_fastCStringContents);
-    rb_objc_install_method2(klass, "_fastestEncodingInCFStringEncoding",
-	(IMP)imp_rb_str_fastestEncodingInCFStringEncoding);
-    rb_objc_install_method2(klass, "isEqual:", (IMP)imp_rb_str_isEqual);
-    rb_objc_install_method2(klass, "copy", (IMP)imp_rb_str_copy);
-    rb_objc_install_method2(klass, "mutableCopy", (IMP)imp_rb_str_mutableCopy);
+    if (IS_RSTR(str)) {
+	if (RSTR(str)->length_in_bytes == 0) {
+	    return "";
+	}
+	str_make_data_binary(RSTR(str));
+	str_ensure_null_terminator(RSTR(str));
+	return RSTR(str)->data.bytes;
+    }
 
-    const bool mutable = class_getSuperclass(klass)
-	== (Class)rb_cNSMutableString;
+    // CFString code path, hopefully this should not happen very often.
+    const char *cptr = (const char *)CFStringGetCStringPtr((CFStringRef)str, 0);
+    if (cptr != NULL) {
+	return cptr;
+    }
 
-    if (mutable) {
-	rb_objc_install_method2(klass, "replaceCharactersInRange:withString:", 
-		(IMP)imp_rb_str_replaceCharactersInRangeWithString);
+    const long max = CFStringGetMaximumSizeForEncoding(
+	    CFStringGetLength((CFStringRef)str),
+	    kCFStringEncodingUTF8);
+    char *cptr2 = (char *)xmalloc(max + 1);
+    if (!CFStringGetCString((CFStringRef)str, cptr2, max + 1,
+		kCFStringEncodingUTF8)) {
+	// Probably an UTF16 string...
+	xfree(cptr2);
+	return NULL;
     }
+    return cptr2;
+}
 
-    rb_objc_define_method(*(VALUE *)klass, "alloc", str_alloc, 0);
+long
+rb_str_clen(VALUE str)
+{
+    if (IS_RSTR(str)) {
+	str_make_data_binary(RSTR(str));
+	return RSTR(str)->length_in_bytes;
+    }
+    return CFStringGetLength((CFStringRef)str);
 }
 
-static CFIndex
-imp_rb_symbol_length(void *rcv, SEL sel)
+char *
+rb_string_value_cstr(volatile VALUE *ptr)
 {
-    return RSYMBOL(rcv)->len;
+    VALUE str = rb_string_value(ptr);
+    return (char *)rb_str_cstr(str);
 }
 
-static UniChar
-imp_rb_symbol_characterAtIndex(void *rcv, SEL sel, CFIndex idx)
+char *
+rb_string_value_ptr(volatile VALUE *ptr)
 {
-    if (idx < 0 || idx > RSYMBOL(rcv)->len) {
-	rb_bug("[Symbol characterAtIndex:] out of bounds");
-    }
-    return RSYMBOL(rcv)->str[idx];
+    return rb_string_value_cstr(ptr);
 }
 
-static void
-imp_rb_symbol_getCharactersRange(void *rcv, SEL sel, UniChar *buffer, 
-	CFRange range)
+VALUE
+rb_string_value(volatile VALUE *ptr)
 {
-    if (range.location + range.length > RSYMBOL(rcv)->len) {
-	rb_bug("[Symbol getCharacters:range:] out of bounds");
+    VALUE s = *ptr;
+    if (TYPE(s) != T_STRING) {
+	s = rb_str_to_str(s);
+	*ptr = s;
     }
+    return s;
+}
 
-    for (int i = range.location; i < range.location + range.length; i++) {
-	*buffer = RSYMBOL(rcv)->str[i];
-	buffer++;
-    }
+VALUE
+rb_check_string_type(VALUE str)
+{
+    return rb_check_convert_type(str, T_STRING, "String", "to_str");
 }
 
-static bool
-imp_rb_symbol_isEqual(void *rcv, SEL sel, void *other)
+VALUE
+rb_str_to_str(VALUE str)
 {
-    if (rcv == other) {
-	return true;
+    return rb_convert_type(str, T_STRING, "String", "to_str");
+}
+
+VALUE
+rb_obj_as_string(VALUE obj)
+{
+    if (TYPE(obj) == T_STRING || TYPE(obj) == T_SYMBOL) {
+	return obj;
     }
-    if (other == NULL || *(VALUE *)other != rb_cSymbol) {
-	return false;
+    VALUE str = rb_vm_call(obj, selToS, 0, NULL, false);
+    if (TYPE(str) != T_STRING) {
+	return rb_any_to_s(obj);
     }
-    if (RSYMBOL(rcv)->len != RSYMBOL(other)->len) {
-	return false;
+    if (OBJ_TAINTED(obj)) {
+	OBJ_TAINT(str);
     }
-    return strcmp(RSYMBOL(rcv)->str, RSYMBOL(other)->str) == 0;
+    return str;
 }
 
-static void *
-imp_rb_symbol_mutableCopy(void *rcv, SEL sel)
+void
+rb_str_setter(VALUE val, ID id, VALUE *var)
 {
-    CFMutableStringRef new_str = CFStringCreateMutable(NULL, 0);
-    CFStringAppendCString(new_str, RSYMBOL(rcv)->str, kCFStringEncodingUTF8);
-    CFMakeCollectable(new_str);
-    return new_str;
+    if (!NIL_P(val) && TYPE(val) != T_STRING) {
+	rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
+    }
+    *var = val;
 }
 
-static void
-install_symbol_primitives(void)
+ID
+rb_to_id(VALUE name)
 {
-    Class klass = (Class)rb_cSymbol;
+    VALUE tmp;
+    switch (TYPE(name)) {
+	default:
+	    tmp = rb_check_string_type(name);
+	    if (NIL_P(tmp)) {
+		rb_raise(rb_eTypeError, "%s is not a symbol",
+			RSTRING_PTR(rb_inspect(name)));
+	    }
+	    name = tmp;
+	    /* fall through */
+	case T_STRING:
+	    name = rstr_intern(name, 0);
+	    /* fall through */
+	case T_SYMBOL:
+	    return SYM2ID(name);
+    }
+}
 
-    rb_objc_install_method2(klass, "length", (IMP)imp_rb_symbol_length);
-    rb_objc_install_method2(klass, "characterAtIndex:", (IMP)imp_rb_symbol_characterAtIndex);
-    rb_objc_install_method2(klass, "getCharacters:range:", (IMP)imp_rb_symbol_getCharactersRange);
-    rb_objc_install_method2(klass, "isEqual:", (IMP)imp_rb_symbol_isEqual);
-    rb_objc_install_method2(klass, "mutableCopy", (IMP)imp_rb_symbol_mutableCopy);
+UChar
+rb_str_get_uchar(VALUE str, long pos)
+{
+    if (RSTR(str)) {
+	return str_get_uchar(RSTR(str), pos, false);
+    }
+    assert(pos >= 0 && pos < CFStringGetLength((CFStringRef)str));
+    return CFStringGetCharacterAtIndex((CFStringRef)str, pos);
 }
 
-#undef INSTALL_METHOD
-
-CFMutableDataRef 
-rb_bytestring_wrapped_data(VALUE bstr)
+void
+rb_str_append_uchar(VALUE str, UChar c)
 {
-    return ((rb_bstr_t *)bstr)->data;
+    if (RSTR(str)) {
+	str_append_uchar(RSTR(str), c);	
+    }
+    else {
+	CFStringAppendCharacters((CFMutableStringRef)str, &c, 1);
+    }	
 }
 
 void
-rb_bytestring_set_wrapped_data(VALUE bstr, CFMutableDataRef data)
+rb_str_append_uchars(VALUE str, const UChar *chars, long len)
 {
-    GC_WB(&((rb_bstr_t *)bstr)->data, data);
+    assert(chars != NULL && len >= 0);
+
+    if (len > 0) {
+	if (RSTR(str)) {
+	    str_concat_uchars(RSTR(str), chars, len);
+	}
+	else {
+	    CFStringAppendCharacters((CFMutableStringRef)str, chars, len);
+	}
+    }
 }
 
-UInt8 *
-rb_bytestring_byte_pointer(VALUE bstr)
+long
+rb_str_chars_len(VALUE str)
 {
-    return CFDataGetMutableBytePtr(rb_bytestring_wrapped_data(bstr));
+    if (IS_RSTR(str)) {
+	return str_length(RSTR(str), false);
+    }
+    return CFStringGetLength((CFStringRef)str);
 }
 
-static inline VALUE
-bytestring_alloc(void)
+VALUE
+rb_str_length(VALUE str)
 {
-    NEWOBJ(bstr, rb_bstr_t);
-    bstr->basic.flags = 0;
-    bstr->basic.klass = rb_cByteString;
-    bstr->data = NULL;
-    return (VALUE)bstr;
+    return LONG2NUM(rb_str_chars_len(str));
 }
 
-static VALUE
-rb_bytestring_alloc(VALUE klass, SEL sel)
+VALUE
+rb_str_buf_new2(const char *cstr)
 {
-    VALUE bstr = bytestring_alloc();
+    return rb_str_new2(cstr);
+}
 
-    CFMutableDataRef data = CFDataCreateMutable(NULL, 0);
-    rb_bytestring_set_wrapped_data(bstr, data);
-    CFMakeCollectable(data);
-
-    return bstr;
+VALUE
+rb_enc_str_buf_cat(VALUE str, const char *cstr, long len, rb_encoding_t *enc)
+{
+    if (IS_RSTR(str)) {
+	// XXX this could be optimized
+	VALUE substr = rb_enc_str_new(cstr, len, enc);
+	str_concat_string(RSTR(str), RSTR(substr));
+    }
+    else {
+	abort(); // TODO	
+    }
+    return str;
 }
 
-VALUE 
-rb_bytestring_new() 
+VALUE
+rb_str_buf_cat(VALUE str, const char *cstr, long len)
 {
-    return rb_bytestring_alloc(0, 0);
+    return rb_enc_str_buf_cat(str, cstr, len, RSTR(str)->encoding);
 }
 
 VALUE
-rb_bytestring_new_with_data(const UInt8 *buf, long size)
+rb_str_buf_cat2(VALUE str, const char *cstr)
 {
-    VALUE v = rb_bytestring_new();
-    CFDataAppendBytes(rb_bytestring_wrapped_data(v), buf, size);
-    return v;
+    return rb_str_buf_cat(str, cstr, strlen(cstr));
 }
 
 VALUE
-rb_bytestring_new_with_cfdata(CFMutableDataRef data)
+rb_str_cat(VALUE str, const char *cstr, long len)
 {
-    VALUE v = bytestring_alloc();
-    rb_bytestring_set_wrapped_data(v, data);
-    return v;
+    return rb_str_buf_cat(str, cstr, len);
 }
 
-static VALUE
-bytestring_from_data(VALUE klass, SEL sel, VALUE data)
+VALUE
+rb_str_cat2(VALUE str, const char *cstr)
 {
-    return rb_bytestring_new_with_cfdata((CFMutableDataRef)data);
+    return rb_str_buf_cat2(str, cstr);
 }
 
-static void inline
-rb_bytestring_copy_cfstring_content(VALUE bstr, CFStringRef str)
+VALUE
+rb_str_buf_cat_ascii(VALUE str, const char *cstr)
 {
-    if (CFStringGetLength(str) != 0) {
-        const char *cptr = RSTRING_PTR((VALUE)str);
-	assert(cptr != NULL); // TODO handle UTF-16 strings
-
-	CFDataAppendBytes(rb_bytestring_wrapped_data(bstr), (UInt8 *)cptr, 
-		CFStringGetLength(str));
-    }
+    return rb_str_buf_cat2(str, cstr);
 }
 
-static VALUE
-rb_bytestring_initialize(VALUE recv, SEL sel, int argc, VALUE *argv)
+VALUE
+rb_str_buf_append(VALUE str, VALUE str2)
 {
-    VALUE orig;
-
-    rb_scan_args(argc, argv, "01", &orig);
-
-    if (!NIL_P(orig)) {
-	StringValue(orig);
-	rb_bytestring_copy_cfstring_content(recv, (CFStringRef)orig);
+    if (IS_RSTR(str)) {
+	return rstr_concat(str, 0, str2);
     }
-    return orig;
+    CFStringAppend((CFMutableStringRef)str, (CFStringRef)str2);
+    return str;
 }
 
 VALUE
-rb_coerce_to_bytestring(VALUE str)
+rb_str_append(VALUE str, VALUE str2)
 {
-    VALUE new = rb_bytestring_alloc(0, 0);
-    rb_bytestring_copy_cfstring_content(new, (CFStringRef)str);
-    return new;
+    return rb_str_buf_append(str, str2);
 }
 
-inline long 
-rb_bytestring_length(VALUE str)
+VALUE
+rb_str_concat(VALUE str, VALUE str2)
 {
-    return CFDataGetLength(rb_bytestring_wrapped_data(str));
+    return rb_str_buf_append(str, str2);
 }
 
 void
-rb_bytestring_resize(VALUE str, long newsize)
+rb_str_associate(VALUE str, VALUE add)
 {
-    CFDataSetLength(rb_bytestring_wrapped_data(str), newsize);
+    // Do nothing.
 }
 
-void
-rb_bytestring_append_bytes(VALUE str, const UInt8* bytes, long len)
+VALUE
+rb_str_associated(VALUE str)
 {
-    CFDataAppendBytes(rb_bytestring_wrapped_data(str), bytes, len);
+    // Do nothing.
+    return Qfalse;
 }
 
-CFStringRef
-rb_bytestring_resolve_cfstring(VALUE str)
+VALUE
+rb_str_resize(VALUE str, long len)
 {
-    CFDataRef data = rb_bytestring_wrapped_data(str);
-    CFStringRef cfstr = CFStringCreateFromExternalRepresentation(NULL,
-	data, kCFStringEncodingUTF8);
-    if (cfstr == NULL) {
-	// If UTF8 doesn't work, try ASCII.
-	cfstr = CFStringCreateFromExternalRepresentation(NULL,
-		data, kCFStringEncodingASCII);
+    if (IS_RSTR(str)) {
+	str_resize_bytes(RSTR(str), len);
     }
-    if (cfstr != NULL) {
-	return CFMakeCollectable(cfstr);
+    else {
+	abort(); // TODO
     }
-    return (CFStringRef)str;
+    return str;
 }
 
-static bool
-imp_rb_bytestring_isEqual(void *rcv, SEL sel, void *other)
+void
+rb_str_set_len(VALUE str, long len)
 {
-    if (rcv == other) {
-	return true;
+    if (IS_RSTR(str)) {
+	const long len_bytes = str_is_stored_in_uchars(RSTR(str))
+	    ? UCHARS_TO_BYTES(len) : len;
+	assert(len_bytes <= RSTR(str)->length_in_bytes);
+	RSTR(str)->length_in_bytes = len_bytes;
     }
-    if (*(VALUE *)other == rb_cByteString) {
-	// Both operands are bytestrings.
-	CFDataRef rcv_data = rb_bytestring_wrapped_data((VALUE)rcv);
-	CFDataRef other_data = rb_bytestring_wrapped_data((VALUE)other);
-	if (CFDataGetLength(rcv_data) != CFDataGetLength(other_data)) {
-	    return false;
-	}
-	return CFEqual(rcv_data, other_data);
-    }
     else {
-	// Given operand is a character string.
-	CFStringRef rcv_str = rb_bytestring_resolve_cfstring((VALUE)rcv);
-	if (rcv_str == (CFStringRef)rcv) {
-	    // Can't resolve a character string based on that data.
-	    return false;
-	}
-	return CFEqual(rcv_str, (CFTypeRef)other);
+	abort(); // TODO
     }
 }
 
-static CFIndex
-imp_rb_bytestring_length(void *rcv, SEL sel) 
+VALUE
+rb_str_equal(VALUE str, VALUE str2)
 {
-    return rb_bytestring_length((VALUE)rcv);
+    if (IS_RSTR(str)) {
+	return rstr_equal(str, 0, str2);
+    }
+    return CFEqual((CFStringRef)str, (CFStringRef)str2) ? Qtrue : Qfalse;
 }
 
-static inline long
-bytestring_index(VALUE bstr, VALUE idx)
+VALUE
+rb_str_dup(VALUE str)
 {
-    long index = NUM2LONG(idx);
-    while (index < 0) {
-	// adjusting for negative indices
-	index += rb_bytestring_length(bstr);
+    if (IS_RSTR(str)) {
+	return (VALUE)str_dup(RSTR(str));
     }
-    return index;
+    if (TYPE(str) == T_SYMBOL) {
+	return rb_sym_to_s(str);
+    }
+    abort(); // TODO
 }
 
-static VALUE
-rb_bytestring_getbyte(VALUE bstr, SEL sel, VALUE idx)
+// Unicode characters hashing function, copied from CoreFoundation.
+// This function might have some performance issues on large strings.
+unsigned long
+rb_str_hash_uchars(const UChar *chars, long len)
 {
-    const long index = bytestring_index(bstr, idx);
-    return INT2FIX(rb_bytestring_byte_pointer(bstr)[index]);
+    if (len == 0 || chars == NULL) {
+	return 0;
+    }
+#define HashNextFourUniChars(accessStart, accessEnd, pointer) \
+    {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593  + (accessStart 1 accessEnd) * 66049  + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;}
+
+#define HashNextUniChar(accessStart, accessEnd, pointer) \
+    {result = result * 257 + (accessStart 0 accessEnd); pointer++;}
+
+    unsigned long result = len;
+    const UChar *end4 = chars + (len & ~3);
+    const UChar *end = chars + len;
+    // First count in fours
+    while (chars < end4) HashNextFourUniChars(chars[, ], chars);
+    // Then for the last <4 chars, count in ones...
+    while (chars < end) HashNextUniChar(chars[, ], chars);
+    return result + (result << (len & 31));
+
+#undef HashNextFourUniChars
+#undef HashNextUniChar
 }
 
-static VALUE
-rb_bytestring_setbyte(VALUE bstr, SEL sel, VALUE idx, VALUE newbyte)
+unsigned long
+rb_str_hash(VALUE str)
 {
-    const long index = bytestring_index(bstr, idx);
-    rb_bytestring_byte_pointer(bstr)[index] = FIX2UINT(newbyte);
-    return Qnil;
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+    rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+    const unsigned long hash = rb_str_hash_uchars(chars, chars_len);
+    if (need_free) {
+	free(chars);
+    }
+    return hash;
 }
 
-static VALUE
-rb_bytestring_bytesize(VALUE bstr, SEL sel)
+long
+rb_uchar_strtol(UniChar *chars, long chars_len, long pos, long *end_offset)
 {
-    return LONG2NUM(CFDataGetLength(rb_bytestring_wrapped_data(bstr)));
+    assert(chars != NULL && chars_len > 0 && pos >= 0);
+
+    UErrorCode status = U_ZERO_ERROR;
+    UNumberFormat *nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status);
+    assert(nf != NULL);
+
+    int32_t parse_pos = (int32_t)pos;
+    int64_t val = unum_parseInt64(nf, chars, chars_len, &parse_pos, &status);
+    unum_close(nf);
+
+    if (end_offset != NULL) {
+	*end_offset = (long)parse_pos;
+    }
+    return val;
 }
 
-static UniChar
-imp_rb_bytestring_characterAtIndex(void *rcv, SEL sel, CFIndex idx)
+long
+rb_memhash(const void *ptr, long len)
 {
-    // XXX should be encoding aware
-    return rb_bytestring_byte_pointer((VALUE)rcv)[idx];
+    CFDataRef data = CFDataCreate(NULL, (const UInt8 *)ptr, len);
+    const long code = CFHash(data);
+    CFRelease((CFTypeRef)data);
+    return code;
 }
 
-static void
-imp_rb_bytestring_replaceCharactersInRange_withString(void *rcv, SEL sel,
-	CFRange range, void *str)
+VALUE
+rb_str_inspect(VALUE rcv)
 {
-    const UInt8 *bytes = (const UInt8 *)RSTRING_PTR(str);
-    const long length = RSTRING_LEN(str);
-    CFMutableDataRef data = rb_bytestring_wrapped_data((VALUE)rcv);
+    if (RSTR(rcv)) {
+	return rstr_inspect(rcv, 0);
+    }
+    // TODO
+    return rcv;
+}
 
-    // No need to check if the given range fits in the data's bounds,
-    // CFDataReplaceBytes() will grow the object automatically for us.
-    CFDataReplaceBytes(data, range, bytes, length);
+VALUE
+rb_str_subseq(VALUE str, long beg, long len)
+{
+    if (IS_RSTR(str)) {
+	return rstr_substr(str, beg, len);
+    }
+    abort(); // TODO
 }
 
 VALUE
-rb_bytestring_copy(VALUE bstr)
+rb_str_substr(VALUE str, long beg, long len)
 {
-    VALUE new_bstr = rb_bytestring_new();
-    CFMutableDataRef rcv_data = rb_bytestring_wrapped_data(bstr);
-    CFMutableDataRef new_data = rb_bytestring_wrapped_data(new_bstr);
-    CFDataAppendBytes(new_data, (const UInt8 *)CFDataGetMutableBytePtr(rcv_data),
-	    CFDataGetLength(rcv_data));
-    return new_bstr;
+    return rb_str_subseq(str, beg, len);
 }
 
-static void *
-imp_rb_bytestring_mutableCopy(void *rcv, SEL sel)
+void
+rb_str_update(VALUE str, long beg, long len, VALUE val)
 {
-    return (void *)rb_bytestring_copy((VALUE)rcv);
+    if (IS_RSTR(str)) {
+	rstr_splice(str, beg, len, val);
+    }
+    else {
+	abort(); // TODO
+    }
 }
 
-static void
-imp_rb_bytestring_cfAppendCString_length(void *rcv, SEL sel, const UInt8 *cstr,
-					 long len)
+void
+rb_str_delete(VALUE str, long beg, long len)
 {
-    CFDataAppendBytes(rb_bytestring_wrapped_data((VALUE)rcv), cstr, len);
+    if (IS_RSTR(str)) {
+	str_delete(RSTR(str), beg, len, false);
+    }
+    else {
+	abort(); // TODO
+    }
 }
 
-static void
-imp_rb_bytestring_setString(void *rcv, SEL sel, void *new_str)
+int
+rb_str_cmp(VALUE str1, VALUE str2)
 {
-    CFMutableDataRef data = rb_bytestring_wrapped_data((VALUE)rcv);
-    CFRange data_range = CFRangeMake(0, CFDataGetLength(data));
-    const char *cstr = RSTRING_PTR(new_str);
-    const long len = RSTRING_LEN(new_str);
-    CFDataReplaceBytes(data, data_range, (const UInt8 *)cstr, len);
-} 
+    return str_compare(str_need_string(str1), str_need_string(str2));
+}
 
-/*
- *  A <code>String</code> object holds and manipulates an arbitrary sequence of
- *  bytes, typically representing characters. String objects may be created
- *  using <code>String::new</code> or as literals.
- *     
- *  Because of aliasing issues, users of strings should be aware of the methods
- *  that modify the contents of a <code>String</code> object.  Typically,
- *  methods with names ending in ``!'' modify their receiver, while those
- *  without a ``!'' return a new <code>String</code>.  However, there are
- *  exceptions, such as <code>String#[]=</code>.
- *     
- */
-
-#if MAC_OS_X_VERSION_MAX_ALLOWED < 1070
-# define NSCFSTRING_CNAME "NSCFString"
-#else
-# define NSCFSTRING_CNAME "__NSCFString"
-#endif
-
-void
-Init_String(void)
+int
+rb_str_casecmp(VALUE str1, VALUE str2)
 {
-    rb_cCFString = (VALUE)objc_getClass(NSCFSTRING_CNAME);
-    assert(rb_cCFString != 0);
-    rb_const_set(rb_cObject, rb_intern("NSCFString"), rb_cCFString);
-    rb_cString = rb_cNSString = (VALUE)objc_getClass("NSString");
-    rb_cNSMutableString = (VALUE)objc_getClass("NSMutableString");
-    rb_const_set(rb_cObject, rb_intern("String"), rb_cNSMutableString);
-    rb_set_class_path(rb_cNSMutableString, rb_cObject, "NSMutableString");
-
-    rb_include_module(rb_cString, rb_mComparable);
-
-    rb_objc_define_method(*(VALUE *)rb_cString, "try_convert", rb_str_s_try_convert, 1);
-    rb_objc_define_method(rb_cString, "initialize", rb_str_init, -1);
-    rb_objc_define_method(rb_cString, "initialize_copy", rb_str_replace_imp, 1);
-    rb_objc_define_method(rb_cString, "<=>", rb_str_cmp_m, 1);
-    rb_objc_define_method(rb_cString, "==", rb_str_equal_imp, 1);
-    rb_objc_define_method(rb_cString, "eql?", rb_str_eql, 1);
-    rb_objc_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
-    rb_objc_define_method(rb_cString, "+", rb_str_plus_imp, 1);
-    rb_objc_define_method(rb_cString, "*", rb_str_times, 1);
-    rb_objc_define_method(rb_cString, "%", rb_str_format_m, 1);
-    rb_objc_define_method(rb_cString, "[]", rb_str_aref_m, -1);
-    rb_objc_define_method(rb_cString, "[]=", rb_str_aset_m, -1);
-    rb_objc_define_method(rb_cString, "insert", rb_str_insert, 2);
-    rb_objc_define_method(rb_cString, "size", rb_str_length_imp, 0);
-    rb_objc_define_method(rb_cString, "bytesize", rb_str_bytesize, 0);
-    rb_objc_define_method(rb_cString, "empty?", rb_str_empty, 0);
-    rb_objc_define_method(rb_cString, "=~", rb_str_match, 1);
-    rb_objc_define_method(rb_cString, "match", rb_str_match_m, -1);
-    rb_objc_define_method(rb_cString, "succ", rb_str_succ, 0);
-    rb_objc_define_method(rb_cString, "succ!", rb_str_succ_bang, 0);
-    rb_objc_define_method(rb_cString, "next", rb_str_succ, 0);
-    rb_objc_define_method(rb_cString, "next!", rb_str_succ_bang, 0);
-    rb_objc_define_method(rb_cString, "upto", rb_str_upto, -1);
-    rb_objc_define_method(rb_cString, "index", rb_str_index_m, -1);
-    rb_objc_define_method(rb_cString, "rindex", rb_str_rindex_m, -1);
-    rb_objc_define_method(rb_cString, "replace", rb_str_replace_imp, 1);
-    rb_objc_define_method(rb_cString, "clear", rb_str_clear, 0);
-    rb_objc_define_method(rb_cString, "chr", rb_str_chr, 0);
-    rb_objc_define_method(rb_cString, "getbyte", rb_str_getbyte, 1);
-    rb_objc_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
-
-    rb_objc_define_method(rb_cString, "to_i", rb_str_to_i, -1);
-    rb_objc_define_method(rb_cString, "to_f", rb_str_to_f, 0);
-    rb_objc_define_method(rb_cString, "to_s", rb_str_to_s, 0);
-    rb_objc_define_method(rb_cString, "to_str", rb_str_to_s, 0);
-    rb_objc_define_method(rb_cString, "inspect", rb_str_inspect, 0);
-    rb_objc_define_method(rb_cString, "dump", rb_str_dump, 0);
-
-    rb_objc_define_method(rb_cString, "upcase", rb_str_upcase, 0);
-    rb_objc_define_method(rb_cString, "downcase", rb_str_downcase, 0);
-    rb_objc_define_method(rb_cString, "capitalize", rb_str_capitalize, 0);
-    rb_objc_define_method(rb_cString, "swapcase", rb_str_swapcase, 0);
-
-    rb_objc_define_method(rb_cString, "upcase!", rb_str_upcase_bang, 0);
-    rb_objc_define_method(rb_cString, "downcase!", rb_str_downcase_bang, 0);
-    rb_objc_define_method(rb_cString, "capitalize!", rb_str_capitalize_bang, 0);
-    rb_objc_define_method(rb_cString, "swapcase!", rb_str_swapcase_bang, 0);
-
-    rb_objc_define_method(rb_cString, "hex", rb_str_hex, 0);
-    rb_objc_define_method(rb_cString, "oct", rb_str_oct, 0);
-    rb_objc_define_method(rb_cString, "split", rb_str_split_m, -1);
-    rb_objc_define_method(rb_cString, "lines", rb_str_each_line, -1);
-    rb_objc_define_method(rb_cString, "bytes", rb_str_each_byte, 0);
-    rb_objc_define_method(rb_cString, "chars", rb_str_each_char, 0);
-    rb_objc_define_method(rb_cString, "reverse", rb_str_reverse, 0);
-    rb_objc_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0);
-    rb_objc_define_method(rb_cString, "concat", rb_str_concat_imp, 1);
-    rb_objc_define_method(rb_cString, "<<", rb_str_concat_imp, 1);
-    rb_objc_define_method(rb_cString, "crypt", rb_str_crypt, 1);
-    rb_objc_define_method(rb_cString, "intern", rb_str_intern, 0);
-    rb_objc_define_method(rb_cString, "to_sym", rb_str_intern, 0);
-    rb_objc_define_method(rb_cString, "ord", rb_str_ord, 0);
-
-    rb_objc_define_method(rb_cString, "include?", rb_str_include, 1);
-    rb_objc_define_method(rb_cString, "start_with?", rb_str_start_with, -1);
-    rb_objc_define_method(rb_cString, "end_with?", rb_str_end_with, -1);
-
-    rb_objc_define_method(rb_cString, "scan", rb_str_scan, 1);
-
-    rb_objc_define_method(rb_cString, "ljust", rb_str_ljust, -1);
-    rb_objc_define_method(rb_cString, "rjust", rb_str_rjust, -1);
-    rb_objc_define_method(rb_cString, "center", rb_str_center, -1);
-
-    rb_objc_define_method(rb_cString, "sub", rb_str_sub, -1);
-    rb_objc_define_method(rb_cString, "gsub", rb_str_gsub, -1);
-    rb_objc_define_method(rb_cString, "chop", rb_str_chop, 0);
-    rb_objc_define_method(rb_cString, "chomp", rb_str_chomp, -1);
-    rb_objc_define_method(rb_cString, "strip", rb_str_strip, 0);
-    rb_objc_define_method(rb_cString, "lstrip", rb_str_lstrip, 0);
-    rb_objc_define_method(rb_cString, "rstrip", rb_str_rstrip, 0);
-
-    rb_objc_define_method(rb_cString, "sub!", rb_str_sub_bang, -1);
-    rb_objc_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1);
-    rb_objc_define_method(rb_cString, "chop!", rb_str_chop_bang, 0);
-    rb_objc_define_method(rb_cString, "chomp!", rb_str_chomp_bang, -1);
-    rb_objc_define_method(rb_cString, "strip!", rb_str_strip_bang, 0);
-    rb_objc_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, 0);
-    rb_objc_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, 0);
-
-    rb_objc_define_method(rb_cString, "tr", rb_str_tr, 2);
-    rb_objc_define_method(rb_cString, "tr_s", rb_str_tr_s, 2);
-    rb_objc_define_method(rb_cString, "delete", rb_str_delete, -1);
-    rb_objc_define_method(rb_cString, "squeeze", rb_str_squeeze, -1);
-    rb_objc_define_method(rb_cString, "count", rb_str_count, -1);
-
-    rb_objc_define_method(rb_cString, "tr!", rb_str_tr_bang, 2);
-    rb_objc_define_method(rb_cString, "tr_s!", rb_str_tr_s_bang, 2);
-    rb_objc_define_method(rb_cString, "delete!", rb_str_delete_bang, -1);
-    rb_objc_define_method(rb_cString, "squeeze!", rb_str_squeeze_bang, -1);
-
-    rb_objc_define_method(rb_cString, "each_line", rb_str_each_line, -1);
-    rb_objc_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
-    rb_objc_define_method(rb_cString, "each_char", rb_str_each_char, 0);
-
-    rb_objc_define_method(rb_cString, "sum", rb_str_sum, -1);
-
-    rb_objc_define_method(rb_cString, "slice", rb_str_aref_m, -1);
-    rb_objc_define_method(rb_cString, "slice!", rb_str_slice_bang, -1);
-
-    rb_objc_define_method(rb_cString, "partition", rb_str_partition, 1);
-    rb_objc_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
-
-    rb_objc_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
-    rb_objc_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
-    rb_objc_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
-    rb_objc_define_method(rb_cString, "ascii_only?", rb_str_is_ascii_only_p, 0);
-
-    rb_objc_define_method(rb_cString, "transform", rb_str_transform, 1);
-    rb_objc_define_method(rb_cString, "transform!", rb_str_transform_bang, 1);
-
-    /* to return mutable copies */
-    rb_objc_define_method(rb_cString, "dup", rb_str_dup_imp, 0);
-    rb_objc_define_method(rb_cString, "clone", rb_str_clone, 0);
-
-    id_to_s = rb_intern("to_s");
-
-    rb_fs = Qnil;
-    rb_define_variable("$;", &rb_fs);
-    rb_define_variable("$-F", &rb_fs);
-
-    /* rb_cSymbol is defined in parse.y because it's needed early */
-    rb_set_class_path(rb_cSymbol, rb_cObject, "Symbol");
-    rb_const_set(rb_cObject, rb_intern("Symbol"), rb_cSymbol);
-
-    rb_undef_alloc_func(rb_cSymbol);
-    rb_undef_method(CLASS_OF(rb_cSymbol), "new");
-    rb_objc_define_method(*(VALUE *)rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */
-
-    rb_objc_define_method(rb_cSymbol, "==", sym_equal, 1);
-    rb_objc_define_method(rb_cSymbol, "eql?", sym_equal, 1);
-    rb_objc_define_method(rb_cSymbol, "<=>", sym_cmp, 1);
-    rb_objc_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
-    rb_objc_define_method(rb_cSymbol, "dup", rb_obj_dup, 0);
-    rb_objc_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
-    rb_objc_define_method(rb_cSymbol, "to_s", rb_sym_to_s_imp, 0);
-    rb_objc_define_method(rb_cSymbol, "id2name", rb_sym_to_s_imp, 0);
-    rb_objc_define_method(rb_cSymbol, "description", rb_sym_to_s_imp, 0);
-    rb_objc_define_method(rb_cSymbol, "intern", sym_to_sym, 0);
-    rb_objc_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
- 
-    rb_undef_method(rb_cSymbol, "to_str");
-    rb_undef_method(rb_cSymbol, "include?");
-
-    install_symbol_primitives();
-
-    rb_cByteString = rb_define_class("ByteString", rb_cNSMutableString);
-    RCLASS_SET_VERSION_FLAG(rb_cByteString, RCLASS_IS_STRING_SUBCLASS);
-
-    rb_objc_define_method(*(VALUE *)rb_cString, "__new_bytestring__",
-	    bytestring_from_data, 1);
-
-    rb_objc_install_method2((Class)rb_cByteString, "isEqual:",
-	    (IMP)imp_rb_bytestring_isEqual);
-    rb_objc_install_method2((Class)rb_cByteString, "length",
-	    (IMP)imp_rb_bytestring_length);
-    rb_objc_install_method2((Class)rb_cByteString, "characterAtIndex:",
-	    (IMP)imp_rb_bytestring_characterAtIndex);
-    rb_objc_install_method2((Class)rb_cByteString,
-	    "replaceCharactersInRange:withString:",
-	    (IMP)imp_rb_bytestring_replaceCharactersInRange_withString);
-    rb_objc_install_method2((Class)rb_cByteString, "mutableCopy",
-	    (IMP)imp_rb_bytestring_mutableCopy);
-    rb_objc_install_method2((Class)rb_cByteString, "_cfAppendCString:length:",
-	    (IMP)imp_rb_bytestring_cfAppendCString_length);
-    rb_objc_install_method2((Class)rb_cByteString, "setString:",
-	    (IMP)imp_rb_bytestring_setString);
-    rb_objc_define_method(rb_cByteString, "initialize",
-	    rb_bytestring_initialize, -1);
-    rb_objc_define_method(*(VALUE *)rb_cByteString, "alloc",
-	    rb_bytestring_alloc, 0);
-    rb_objc_define_method(rb_cByteString, "bytesize",
-	    rb_bytestring_bytesize, 0);
-    rb_objc_define_method(rb_cByteString, "getbyte", rb_bytestring_getbyte, 1);
-    rb_objc_define_method(rb_cByteString, "setbyte", rb_bytestring_setbyte, 2);
+    return str_case_compare(str_need_string(str1), str_need_string(str2));
 }

Copied: MacRuby/trunk/symbol.c (from rev 3744, MacRuby/branches/icu/symbol.c)
===================================================================
--- MacRuby/trunk/symbol.c	                        (rev 0)
+++ MacRuby/trunk/symbol.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,667 @@
+/* 
+ * MacRuby Symbols.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ * 
+ * Copyright (C) 2010, Apple Inc. All rights reserved.
+ */
+
+#include <wctype.h>
+
+#include "ruby.h"
+#include "ruby/encoding.h"
+#include "encoding.h"
+#include "symbol.h"
+#include "ruby/node.h"
+#include "vm.h"
+#include "objc.h"
+
+VALUE rb_cSymbol;
+
+static CFMutableDictionaryRef sym_id = NULL, id_str = NULL;
+static long last_id = 0;
+
+typedef struct {
+    VALUE klass;
+    VALUE str;
+    ID id;
+} rb_sym_t;
+
+#define RSYM(obj) ((rb_sym_t *)(obj))
+
+static rb_sym_t *
+sym_alloc(VALUE str, ID id)
+{
+    rb_sym_t *sym = (rb_sym_t *)malloc(sizeof(rb_sym_t));
+    assert(rb_cSymbol != 0);
+    sym->klass = rb_cSymbol;
+    GC_RETAIN(str); // never released
+    sym->str = str;
+    sym->id = id;
+    return sym;
+}
+
+static bool
+is_identchar(UChar c)
+{
+    return isalnum(c) || c == '_' || !isascii(c);
+}
+
+ID
+rb_intern_str(VALUE str)
+{
+    const unsigned long name_hash = rb_str_hash(str);
+    ID id = (ID)CFDictionaryGetValue(sym_id, (const void *)name_hash); 
+    if (id != 0) {
+	return id;
+    } 
+
+    rb_sym_t *sym = NULL;
+
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+    rb_str_get_uchars(str, &chars, &chars_len, &need_free);
+
+    long pos = 0;
+    if (chars_len > 0) {
+	UChar c = chars[0];
+	switch (c) {
+	    case '$':
+		id = ID_GLOBAL;
+		goto new_id;
+
+	    case '@':
+		if (chars_len > 1 && chars[1] == '@') {
+		    pos++;
+		    id = ID_CLASS;
+		}
+		else {
+		    id = ID_INSTANCE;
+		}
+		pos++;
+		break;
+
+	    default:
+		if (chars_len > 1 && chars[chars_len - 1] == '=') {
+		    // Attribute assignment.
+		    id = rb_intern_str(rb_str_substr(str, 0, chars_len - 1));
+		    if (!is_attrset_id(id)) {
+			id = rb_id_attrset(id);
+			goto id_register;
+		    }
+		    id = ID_ATTRSET;
+		}
+		else if (iswupper(c)) {
+		    id = ID_CONST;
+		}
+		else {
+		    id = ID_LOCAL;
+		}
+		break;
+	}
+    }
+
+    if (pos < chars_len && !isdigit(chars[pos])) {
+	for (; pos < chars_len; pos++) {
+	    if (!is_identchar(chars[pos])) {
+		break;
+	    }
+	}
+    }
+    if (pos < chars_len) {
+	id = ID_JUNK;
+    }
+
+new_id:
+    id |= ++last_id << ID_SCOPE_SHIFT;
+
+id_register:
+//printf("register %s hash %ld id %ld\n", RSTRING_PTR(str), name_hash, id);
+    sym = sym_alloc(str, id);
+    CFDictionarySetValue(sym_id, (const void *)name_hash, (const void *)id);
+    CFDictionarySetValue(id_str, (const void *)id, (const void *)sym);
+
+    if (need_free) {
+	free(chars);
+    }
+    return id;
+}
+
+VALUE
+rb_id2str(ID id)
+{
+    VALUE sym = (VALUE)CFDictionaryGetValue(id_str, (const void *)id);
+    if (sym != 0) {
+//printf("lookup %ld -> %s\n", id, rb_sym2name(sym));
+	return sym;
+    }
+
+    if (is_attrset_id(id)) {
+	// Attribute assignment.
+	ID id2 = (id & ~ID_SCOPE_MASK) | ID_LOCAL;
+
+	while ((sym = rb_id2str(id2)) == 0) {
+	    if (!is_local_id(id2)) {
+//printf("lookup %ld -> FAIL\n", id);
+		return 0;
+	    }
+	    id2 = (id & ~ID_SCOPE_MASK) | ID_CONST;
+	}
+
+	VALUE str = rb_str_dup(RSYM(sym)->str);
+	rb_str_cat(str, "=", 1);
+	rb_intern_str(str);
+
+	// Retry one more time.
+	sym = (VALUE)CFDictionaryGetValue(id_str, (const void *)id);
+	if (sym != 0) {
+//printf("lookup %ld -> %s\n", id, rb_sym2name(sym));
+	    return sym;
+	}
+    }
+//printf("lookup %ld -> FAIL\n", id);
+    return 0;
+}
+
+ID
+rb_intern3(const char *name, long len, rb_encoding *enc)
+{
+    VALUE str = rb_enc_str_new(name, len, enc);
+    return rb_intern_str(str);
+}
+
+ID
+rb_intern2(const char *name, long len)
+{
+    return rb_intern_str(rb_str_new(name, len));
+}
+
+ID
+rb_intern(const char *name)
+{
+    return rb_intern_str(rb_str_new2(name));
+}
+
+ID
+rb_sym2id(VALUE sym)
+{
+    return RSYM(sym)->id;
+}
+
+VALUE
+rb_name2sym(const char *name)
+{
+    return rb_id2str(rb_intern(name));
+}
+
+VALUE
+rb_sym_to_s(VALUE sym)
+{
+    return rb_str_dup(RSYM(sym)->str);
+}
+
+const char *
+rb_sym2name(VALUE sym)
+{
+    return RSTRING_PTR(RSYM(sym)->str);
+}
+
+/*
+ *  call-seq:
+ *     Symbol.all_symbols    => array
+ *
+ *  Returns an array of all the symbols currently in Ruby's symbol
+ *  table.
+ *
+ *     Symbol.all_symbols.size    #=> 903
+ *     Symbol.all_symbols[1,20]   #=> [:floor, :ARGV, :Binding, :symlink,
+ *                                     :chown, :EOFError, :$;, :String,
+ *                                     :LOCK_SH, :"setuid?", :$<,
+ *                                     :default_proc, :compact, :extend,
+ *                                     :Tms, :getwd, :$=, :ThreadGroup,
+ *                                     :wait2, :$>]
+ */
+
+static VALUE
+rsym_all_symbols(VALUE klass, SEL sel)
+{
+    VALUE ary = rb_ary_new();
+    const long count = CFDictionaryGetCount(id_str);
+    if (count >= 0) {
+	const void **values = (const void **)malloc(sizeof(void *) * count);
+	CFDictionaryGetKeysAndValues(id_str, NULL, values);
+	for (long i = 0; i < count; i++) {
+	    rb_ary_push(ary, (VALUE)values[i]);
+	}
+	free(values);
+    }
+    return ary;
+}
+
+void
+Init_PreSymbol(void)
+{
+    sym_id = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
+    id_str = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
+    last_id = 1000;
+
+    // Pre-register parser symbols.
+    for (int i = 0; rb_op_tbl[i].token != 0; i++) {
+	ID id = rb_op_tbl[i].token;
+	VALUE str = rb_str_new2(rb_op_tbl[i].name);
+	rb_sym_t *sym = sym_alloc(str, id);
+	unsigned long name_hash = rb_str_hash(str);
+
+//printf("pre-register %s hash %ld id %ld\n", RSTRING_PTR(str), name_hash, id);
+
+	CFDictionarySetValue(sym_id, (const void *)name_hash, (const void *)id);
+	CFDictionarySetValue(id_str, (const void *)id, (const void *)sym);
+    }
+}
+
+/*
+ * call-seq:
+ *
+ *   str <=> other       => -1, 0, +1 or nil
+ *
+ * Compares _sym_ with _other_ in string form.
+ */
+
+static VALUE
+rsym_cmp(VALUE sym, SEL sel, VALUE other)
+{
+    if (TYPE(other) != T_SYMBOL) {
+	return Qnil;
+    }
+    return INT2FIX(rb_str_cmp(RSYM(sym)->str, RSYM(other)->str));
+}
+
+/*
+ * call-seq:
+ *
+ *   sym.casecmp(other)  => -1, 0, +1 or nil
+ *
+ * Case-insensitive version of <code>Symbol#<=></code>.
+ */
+
+static VALUE
+rsym_casecmp(VALUE sym, SEL sel, VALUE other)
+{
+    if (TYPE(other) != T_SYMBOL) {
+	return Qnil;
+    }
+    return INT2FIX(rb_str_casecmp(RSYM(sym)->str, RSYM(other)->str));
+}
+
+/*
+ *  call-seq:
+ *     sym == obj   => true or false
+ *  
+ *  Equality---If <i>sym</i> and <i>obj</i> are exactly the same
+ *  symbol, returns <code>true</code>. Otherwise, compares them
+ *  as strings.
+ */
+
+static VALUE
+rsym_equal(VALUE sym, SEL sel, VALUE other)
+{
+    return sym == other ? Qtrue : Qfalse;
+}
+
+/*
+ *  call-seq:
+ *     sym.inspect    => string
+ *  
+ *  Returns the representation of <i>sym</i> as a symbol literal.
+ *     
+ *     :fred.inspect   #=> ":fred"
+ */
+
+static bool
+is_special_global_name(UChar *ptr, long len)
+{
+    if (len <= 0) {
+	return false;
+    }
+
+    long pos = 0;
+    switch (ptr[pos]) {
+	case '~': case '*': case '$': case '?': case '!':
+	case '@': case '/': case '\\': case ';': case ',':
+	case '.': case '=': case ':': case '<': case '>': 
+	case '\"': case '&': case '`': case '\'': case '+': case '0':
+	    pos++;
+	    break;
+
+	case '-':
+	    pos++;
+	    if (pos < len && is_identchar(ptr[pos])) {
+		pos++;
+	    }
+	    break;
+
+	default:
+	    if (!isdigit(ptr[pos])) {
+		return false;
+	    }
+	    do {
+		pos++;
+	    }
+	    while (pos < len && isdigit(ptr[pos]));
+	    break;
+    }
+    return pos == len;
+}
+
+static bool
+sym_should_be_escaped(VALUE sym)
+{
+    UChar *chars = NULL;
+    long chars_len = 0;
+    bool need_free = false;
+    rb_str_get_uchars(RSYM(sym)->str, &chars, &chars_len, &need_free);
+
+    if (chars_len == 0) {
+	return true;
+    }
+
+    bool escape = false;
+    for (long i = 0; i < chars_len; i++) {
+	if (!isprint(chars[i])) {
+	    escape = true;
+	    break;
+	}
+    }
+
+    if (escape) {
+	goto bail;
+    }
+
+    long pos = 0;
+    bool localid = false;
+
+    switch (chars[pos]) {
+	case '\0':
+	    escape = true;
+	    break;
+
+	case '$':
+	    pos++;
+	    if (pos < chars_len && is_special_global_name(&chars[pos],
+			chars_len - pos)) {
+		goto bail;
+	    }
+	    goto id;
+
+	case '@':
+	    pos++;
+	    if (pos < chars_len && chars[pos] == '@') {
+		pos++;
+	    }
+	    goto id;
+
+	case '<':
+	    pos++;
+	    if (pos < chars_len) {
+		if (chars[pos] == '<') {
+		    pos++;
+		}
+		else if (chars[pos] == '=') {
+		    pos++;
+		    if (pos < chars_len && chars[pos] == '>') {
+			pos++;
+		    }
+		}
+	    }
+	    break;
+
+	case '>':
+	    pos++;
+	    if (pos < chars_len) {
+		if (chars[pos] == '>' || chars[pos] == '=') {
+		    pos++;
+		}
+	    }
+	    break;
+
+	case '=':
+	    pos++;
+	    if (pos == chars_len) {
+		escape = true;
+		goto bail;
+	    }
+	    else {
+		if (chars[pos] == '~') {
+		    pos++;
+		}
+		else if (chars[pos] == '=') {
+		    pos++;
+		    if (pos < chars_len && chars[pos] == '=') {
+			pos++;
+		    }
+		}
+		else {
+		    escape = true;
+		    goto bail;
+		}
+	    }
+	    break;
+
+	case '*':
+	    pos++;
+	    if (pos < chars_len && chars[pos] == '*') {
+		pos++;
+	    }
+	    break;
+
+	case '+':
+	case '-':
+	    pos++;
+	    if (pos < chars_len && chars[pos] == '@') {
+		pos++;
+	    }
+	    break;
+
+	case '|': case '^': case '&': case '/':
+	case '%': case '~': case '`':
+	    pos++;
+	    break;
+
+	case '[':
+	    pos++;
+	    if (pos < chars_len && chars[pos] != ']') {
+		escape = true;
+		goto bail;
+	    }
+	    pos++;
+	    if (pos < chars_len && chars[pos] == '=') {
+		pos++;
+	    }
+	    break;
+
+	case '!':
+	    pos++;
+	    if (pos == chars_len) {
+		goto bail;
+	    }
+	    else {
+		if (chars[pos] == '=' || chars[pos] == '~') {
+		    pos++;
+		}
+		else {
+		    escape = true;
+		    goto bail;
+		}
+	    }
+	    break;
+
+	default:
+	    localid = !isupper(chars[pos]);
+	    // fall through	
+
+	id:
+	    if (pos >= chars_len
+		    || (chars[pos] != '_' && !isalpha(chars[pos])
+			&& isascii(chars[pos]))) {
+		escape = true;
+		goto bail;
+	    }
+	    while (pos < chars_len && is_identchar(chars[pos])) {
+		pos++;
+	    }
+	    if (localid) {
+		if (pos < chars_len
+			&& (chars[pos] == '!' || chars[pos] == '?'
+			    || chars[pos] == '=')) {
+		    pos++;
+		}
+	    }
+	    break;
+    }
+
+    if (pos < chars_len) {
+	escape = true;
+    }
+
+bail:
+    if (need_free) {
+	free(chars);
+    }
+
+    return escape;
+}
+
+static VALUE
+rsym_inspect(VALUE sym, SEL sel)
+{
+    VALUE str = rb_str_new2(":");
+    if (sym_should_be_escaped(sym)) {
+	rb_str_concat(str, rb_str_inspect(RSYM(sym)->str));
+    }
+    else {
+	rb_str_concat(str, RSYM(sym)->str);
+    }
+    return str;
+}
+
+/*
+ * call-seq:
+ *   sym.to_proc
+ *
+ * Returns a _Proc_ object which respond to the given method by _sym_.
+ *
+ *   (1..3).collect(&:to_s)  #=> ["1", "2", "3"]
+ */
+
+static VALUE
+rsym_to_proc(VALUE sym, SEL sel)
+{
+    SEL msel = sel_registerName(rb_id2name(SYM2ID(sym)));
+    rb_vm_block_t *b = rb_vm_create_block_calling_sel(msel);
+    return rb_proc_alloc_with_block(rb_cProc, b);
+}
+
+/*
+ *  call-seq:
+ *     sym.id2name   => string
+ *     sym.to_s      => string
+ *  
+ *  Returns the name or string corresponding to <i>sym</i>.
+ *     
+ *     :fred.id2name   #=> "fred"
+ */
+
+static VALUE
+rsym_to_s(VALUE sym, SEL sel)
+{
+    return rb_sym_to_s(sym);
+}
+
+/*
+ * call-seq:
+ *   sym.to_sym   => sym
+ *   sym.intern   => sym
+ *
+ * In general, <code>to_sym</code> returns the <code>Symbol</code>
+ * corresponding to an object. As <i>sym</i> is already a symbol,
+ * <code>self</code> is returned in this case.
+ */
+
+static VALUE
+rsym_to_sym(VALUE sym, SEL sel)
+{
+    return sym;
+}
+
+/*
+ * call-seq:
+ *   sym.empty?   => true or false
+ *
+ * Returns that _sym_ is :"" or not.
+ */
+
+static VALUE
+rsym_empty(VALUE sym, SEL sel)
+{
+    return rb_str_chars_len(RSYM(sym)->str) == 0 ? Qtrue : Qfalse;
+}
+
+/*
+ * call-seq:
+ *   sym[idx]      => char
+ *   sym[b, n]     => char
+ *
+ * Returns <code>sym.to_s[]</code>.
+ */
+
+static VALUE
+rsym_aref(VALUE sym, SEL sel, int argc, VALUE *argv)
+{
+    return rstr_aref(RSYM(sym)->str, sel, argc, argv);
+}
+
+static CFIndex
+rsym_imp_length(void *rcv, SEL sel)
+{
+    return CFStringGetLength((CFStringRef)RSYM(rcv)->str);
+}
+
+static UniChar
+rsym_imp_characterAtIndex(void *rcv, SEL sel, CFIndex idx)
+{
+    return CFStringGetCharacterAtIndex((CFStringRef)RSYM(rcv)->str, idx);
+}
+
+void
+Init_Symbol(void)
+{
+    // rb_cSymbol is defined earlier in Init_PreVM().
+    rb_set_class_path(rb_cSymbol, rb_cObject, "Symbol");
+    rb_const_set(rb_cObject, rb_intern("Symbol"), rb_cSymbol);
+
+    rb_undef_alloc_func(rb_cSymbol);
+    rb_undef_method(*(VALUE *)rb_cSymbol, "new");
+    rb_objc_define_method(*(VALUE *)rb_cSymbol, "all_symbols",
+	    rsym_all_symbols, 0);
+
+    rb_objc_define_method(rb_cSymbol, "==", rsym_equal, 1);
+    rb_objc_define_method(rb_cSymbol, "<=>", rsym_cmp, 1);
+    rb_objc_define_method(rb_cSymbol, "casecmp", rsym_casecmp, 1);
+    rb_objc_define_method(rb_cSymbol, "eql?", rsym_equal, 1);
+    rb_objc_define_method(rb_cSymbol, "inspect", rsym_inspect, 0);
+    rb_objc_define_method(rb_cSymbol, "to_proc", rsym_to_proc, 0);
+    rb_objc_define_method(rb_cSymbol, "to_s", rsym_to_s, 0);
+    rb_objc_define_method(rb_cSymbol, "id2name", rsym_to_s, 0);
+    rb_objc_define_method(rb_cSymbol, "description", rsym_to_s, 0);
+    rb_objc_define_method(rb_cSymbol, "intern", rsym_to_sym, 0);
+    rb_objc_define_method(rb_cSymbol, "to_sym", rsym_to_sym, 0);
+    rb_objc_define_method(rb_cSymbol, "empty?", rsym_empty, 0);
+    rb_objc_define_method(rb_cSymbol, "[]", rsym_aref, -1);
+
+    // Cocoa primitives.
+    rb_objc_install_method2((Class)rb_cSymbol, "length",
+	    (IMP)rsym_imp_length);
+    rb_objc_install_method2((Class)rb_cSymbol, "characterAtIndex:",
+	    (IMP)rsym_imp_characterAtIndex);
+}

Copied: MacRuby/trunk/symbol.h (from rev 3744, MacRuby/branches/icu/symbol.h)
===================================================================
--- MacRuby/trunk/symbol.h	                        (rev 0)
+++ MacRuby/trunk/symbol.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,54 @@
+/* 
+ * MacRuby Symbols.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ * 
+ * Copyright (C) 2010, Apple Inc. All rights reserved.
+ */
+
+#ifndef __SYMBOL_H_
+#define __SYMBOL_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+// For the parser.
+#define ID_SCOPE_SHIFT 3
+#define ID_SCOPE_MASK 0x07
+#define ID_LOCAL      0x00
+#define ID_INSTANCE   0x01
+#define ID_GLOBAL     0x03
+#define ID_ATTRSET    0x04
+#define ID_CONST      0x05
+#define ID_CLASS      0x06
+#define ID_JUNK       0x07
+#define ID_INTERNAL   ID_JUNK
+
+#define is_notop_id(id) (true)
+#define is_local_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_LOCAL)
+#define is_global_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_GLOBAL)
+#define is_instance_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_INSTANCE)
+#define is_attrset_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_ATTRSET)
+#define is_const_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CONST)
+#define is_class_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_CLASS)
+#define is_junk_id(id) (is_notop_id(id)&&((id)&ID_SCOPE_MASK)==ID_JUNK)
+
+#define is_asgn_or_id(id) ((is_notop_id(id)) && \
+	(((id)&ID_SCOPE_MASK) == ID_GLOBAL || \
+	 ((id)&ID_SCOPE_MASK) == ID_INSTANCE || \
+	 ((id)&ID_SCOPE_MASK) == ID_CLASS))
+
+struct rb_op_tbl_entry {
+    ID token;
+    const char *name;
+};
+
+// Defined in parse.y.
+extern struct rb_op_tbl_entry rb_op_tbl[];
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
+
+#endif // __SYMBOL_H_

Modified: MacRuby/trunk/time.c
===================================================================
--- MacRuby/trunk/time.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/time.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -9,18 +9,16 @@
 
 **********************************************************************/
 
-#include "ruby/ruby.h"
 #include <sys/types.h>
 #include <time.h>
 #include <errno.h>
-#include "ruby/encoding.h"
-
-#ifdef HAVE_UNISTD_H
 #include <unistd.h>
-#endif
-
 #include <math.h>
 
+#include "ruby/ruby.h"
+#include "ruby/encoding.h"
+#include "encoding.h"
+
 VALUE rb_cTime;
 static VALUE time_utc_offset _((VALUE, SEL));
 
@@ -2171,7 +2169,7 @@
 	s = RSHIFT(s, 8);
     }
 
-    str = rb_bytestring_new_with_data(buf, 8);
+    str = rb_bstr_new_with_data(buf, 8);
     rb_copy_generic_ivar(str, time);
     if (nsec) {
         /*
@@ -2190,7 +2188,7 @@
         buf[0] |= (nsec % 10) << 4;
         if (buf[1] == 0)
             len = 1;
-        rb_ivar_set(str, id_submicro, rb_bytestring_new_with_data(buf, len));
+        rb_ivar_set(str, id_submicro, rb_bstr_new_with_data(buf, len));
     }
     return str;
 }
@@ -2238,10 +2236,10 @@
     rb_copy_generic_ivar(time, str);
 
     StringValue(str);
-    assert(*(VALUE *)str == rb_cByteString);
+    str = rb_str_bstr(str);
 
-    unsigned char *buf = (unsigned char *)rb_bytestring_byte_pointer(str);
-    const size_t buflen = rb_bytestring_length(str); 
+    uint8_t *buf = rb_bstr_bytes(str);
+    const long buflen = rb_bstr_length(str); 
     if (buflen != 8 && buflen != 9) {
 	rb_raise(rb_eTypeError, "marshaled time format differ");
     }

Copied: MacRuby/trunk/ucnv.c (from rev 3744, MacRuby/branches/icu/ucnv.c)
===================================================================
--- MacRuby/trunk/ucnv.c	                        (rev 0)
+++ MacRuby/trunk/ucnv.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -0,0 +1,443 @@
+/* 
+ * MacRuby implementation of Ruby 1.9 String.
+ *
+ * This file is covered by the Ruby license. See COPYING for more details.
+ * 
+ * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
+ * Copyright (C) 1993-2007 Yukihiro Matsumoto
+ * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
+ * Copyright (C) 2000 Information-technology Promotion Agency, Japan
+ */
+
+#include "ruby.h"
+#include "encoding.h"
+#include "unicode/ucnv.h"
+
+// do not forget to close the converter
+// before leaving the function
+#define USE_CONVERTER(cnv, str) \
+    assert(str->encoding->private_data != NULL); \
+    char cnv##_buffer[U_CNV_SAFECLONE_BUFFERSIZE]; \
+    UErrorCode cnv##_err = U_ZERO_ERROR; \
+    int32_t cnv##_buffer_size = U_CNV_SAFECLONE_BUFFERSIZE; \
+    UConverter *cnv = ucnv_safeClone( \
+	    (UConverter *)str->encoding->private_data, \
+	    cnv##_buffer, \
+	    &cnv##_buffer_size, \
+	    &cnv##_err \
+	); \
+    ucnv_reset(cnv);
+
+static void
+str_ucnv_update_flags(rb_str_t *self)
+{
+    assert(!str_is_stored_in_uchars(self));
+
+    USE_CONVERTER(cnv, self);
+
+    bool ascii_only = true;
+    bool valid_encoding = true;
+    bool has_supplementary = false;
+
+    const char *pos = self->data.bytes;
+    const char *end = pos + self->length_in_bytes;
+    for (;;) {
+	// iterate through the string one Unicode code point at a time
+	UErrorCode err = U_ZERO_ERROR;
+	UChar32 c = ucnv_getNextUChar(cnv, &pos, end, &err);
+	if (U_FAILURE(err)) {
+	    if (err == U_INDEX_OUTOFBOUNDS_ERROR) {
+		// end of the string
+		break;
+	    }
+	    else {
+		// conversion error
+		valid_encoding = false;
+		ascii_only = false;
+	    }
+	}
+	else {
+	    if (c > 127) {
+		ascii_only = false;
+		if (U_IS_SUPPLEMENTARY(c)) {
+		    has_supplementary = true;
+		}
+	    }
+	}
+    }
+
+    ucnv_close(cnv);
+
+    str_set_has_supplementary(self, has_supplementary);
+    str_set_valid_encoding(self, valid_encoding);
+    str_set_ascii_only(self, ascii_only);
+}
+
+static void
+str_ucnv_make_data_binary(rb_str_t *self)
+{
+    assert(str_is_stored_in_uchars(self));
+
+    USE_CONVERTER(cnv, self);
+
+    UErrorCode err = U_ZERO_ERROR;
+    long capa = UCNV_GET_MAX_BYTES_FOR_STRING(BYTES_TO_UCHARS(
+		self->length_in_bytes), ucnv_getMaxCharSize(cnv));
+    char *buffer = xmalloc(capa);
+    const UChar *source_pos = self->data.uchars;
+    const UChar *source_end = self->data.uchars
+	+ BYTES_TO_UCHARS(self->length_in_bytes);
+    char *target_pos = buffer;
+    char *target_end = buffer + capa;
+    ucnv_fromUnicode(cnv, &target_pos, target_end, &source_pos, source_end,
+	    NULL, true, &err);
+    // there should never be any conversion error here
+    // (if there's one it means some checking has been forgotten before)
+    assert(U_SUCCESS(err));
+
+    ucnv_close(cnv);
+
+    str_set_stored_in_uchars(self, false);
+    self->capacity_in_bytes = capa;
+    self->length_in_bytes = target_pos - buffer;
+    GC_WB(&self->data.bytes, buffer);
+}
+
+static long
+utf16_bytesize_approximation(rb_encoding_t *enc, int bytesize)
+{
+    long approximation;
+    if (UTF16_ENC(enc)) {
+	approximation = bytesize; // the bytesize in UTF-16 is the same
+				  // whatever the endianness
+    }
+    else if (UTF32_ENC(enc)) {
+	// the bytesize in UTF-16 is nearly half of the bytesize in UTF-32
+	// (if there characters not in the BMP it's a bit more though)
+	approximation = bytesize / 2;
+    }
+    else {
+	// take a quite large size to not have to reallocate
+	approximation = bytesize * 2;
+    }
+
+    if (ODD_NUMBER(approximation)) {
+	// the size must be an even number
+	++approximation;
+    }
+
+    return approximation;
+}
+
+static bool
+str_ucnv_try_making_data_uchars(rb_str_t *self)
+{
+    assert(!str_is_stored_in_uchars(self));
+
+    USE_CONVERTER(cnv, self);
+
+    long capa = utf16_bytesize_approximation(self->encoding,
+	    self->length_in_bytes);
+    const char *source_pos = self->data.bytes;
+    const char *source_end = self->data.bytes + self->length_in_bytes;
+    UChar *buffer = xmalloc(capa);
+    UChar *target_pos = buffer;
+    UErrorCode err = U_ZERO_ERROR;
+    for (;;) {
+	UChar *target_end = buffer + BYTES_TO_UCHARS(capa);
+	err = U_ZERO_ERROR;
+	ucnv_toUnicode(cnv, &target_pos, target_end, &source_pos, source_end,
+		NULL, true, &err);
+	if (err == U_BUFFER_OVERFLOW_ERROR) {
+	    long index = target_pos - buffer;
+	    capa *= 2; // double the buffer's size
+	    buffer = xrealloc(buffer, capa);
+	    target_pos = buffer + index;
+	}
+	else {
+	    break;
+	}
+    }
+
+    ucnv_close(cnv);
+
+    if (U_SUCCESS(err)) {
+	str_set_valid_encoding(self, true);
+	str_set_stored_in_uchars(self, true);
+	self->capacity_in_bytes = capa;
+	self->length_in_bytes = UCHARS_TO_BYTES(target_pos - buffer);
+	GC_WB(&self->data.uchars, buffer);
+	return true;
+    }
+    else {
+	str_set_valid_encoding(self, false);
+	return false;
+    }
+}
+
+static long
+str_ucnv_length(rb_str_t *self, bool ucs2_mode)
+{
+    assert(!str_is_stored_in_uchars(self));
+
+    USE_CONVERTER(cnv, self);
+
+    const char *pos = self->data.bytes;
+    const char *end = pos + self->length_in_bytes;
+    long len = 0;
+    bool valid_encoding = true;
+    for (;;) {
+	const char *character_start_pos = pos;
+	// iterate through the string one Unicode code point at a time
+	UErrorCode err = U_ZERO_ERROR;
+	UChar32 c = ucnv_getNextUChar(cnv, &pos, end, &err);
+	if (err == U_INDEX_OUTOFBOUNDS_ERROR) {
+	    // end of the string
+	    break;
+	}
+	else if (U_FAILURE(err)) {
+	    valid_encoding = false;
+	    long min_char_size = self->encoding->min_char_size;
+	    long converted_width = pos - character_start_pos;
+	    len += div_round_up(converted_width, min_char_size);
+	}
+	else {
+	    if (ucs2_mode && !U_IS_BMP(c)) {
+		len += 2;
+	    }
+	    else {
+		++len;
+	    }
+	}
+    }
+
+    ucnv_close(cnv);
+
+    str_set_valid_encoding(self, valid_encoding);
+
+    return len;
+}
+
+#define STACK_BUFFER_SIZE 1024
+static long
+str_ucnv_bytesize(rb_str_t *self)
+{
+    assert(str_is_stored_in_uchars(self));
+
+    // for strings stored in UTF-16 for which the Ruby encoding is not UTF-16,
+    // we have to convert back the string in its original encoding to get the
+    // length in bytes
+    USE_CONVERTER(cnv, self);
+
+    UErrorCode err = U_ZERO_ERROR;
+
+    long len = 0;
+    char buffer[STACK_BUFFER_SIZE];
+    const UChar *source_pos = self->data.uchars;
+    const UChar *source_end = self->data.uchars + BYTES_TO_UCHARS(
+	    self->length_in_bytes);
+    char *target_end = buffer + STACK_BUFFER_SIZE;
+    for (;;) {
+	err = U_ZERO_ERROR;
+	char *target_pos = buffer;
+	ucnv_fromUnicode(cnv, &target_pos, target_end, &source_pos, source_end,
+		NULL, true, &err);
+	len += target_pos - buffer;
+	if (err != U_BUFFER_OVERFLOW_ERROR) {
+	    // if the convertion failed, a check was missing somewhere
+	    assert(U_SUCCESS(err));
+	    break;
+	}
+    }
+
+    ucnv_close(cnv);
+    return len;
+}
+
+static character_boundaries_t
+str_ucnv_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode)
+{
+    assert(!str_is_stored_in_uchars(self));
+
+    character_boundaries_t boundaries = {-1, -1};
+
+    if (index < 0) {
+	// calculating the length is slow but we don't have much choice
+	index += str_ucnv_length(self, ucs2_mode);
+	if (index < 0) {
+	    return boundaries;
+	}
+    }
+
+    // the code has many similarities with str_length
+    USE_CONVERTER(cnv, self);
+
+    const char *pos = self->data.bytes;
+    const char *end = pos + self->length_in_bytes;
+    long current_index = 0;
+    for (;;) {
+	const char *character_start_pos = pos;
+	// iterate through the string one Unicode code point at a time
+	// (we dont care what the character is or if it's valid or not)
+	UErrorCode err = U_ZERO_ERROR;
+	UChar32 c = ucnv_getNextUChar(cnv, &pos, end, &err);
+	if (err == U_INDEX_OUTOFBOUNDS_ERROR) {
+	    // end of the string
+	    break;
+	}
+	long offset_in_bytes = character_start_pos - self->data.bytes;
+	long converted_width = pos - character_start_pos;
+	if (U_FAILURE(err)) {
+	    long min_char_size = self->encoding->min_char_size;
+	    // division of converted_width by min_char_size rounded up
+	    long diff = div_round_up(converted_width, min_char_size);
+	    long length_in_bytes;
+	    if (current_index == index) {
+		if (min_char_size > converted_width) {
+		    length_in_bytes = converted_width;
+		}
+		else {
+		    length_in_bytes = min_char_size;
+		}
+		boundaries.start_offset_in_bytes = offset_in_bytes;
+		boundaries.end_offset_in_bytes =
+		    boundaries.start_offset_in_bytes + length_in_bytes;
+		break;
+	    }
+	    else if (current_index + diff > index) {
+		long adjusted_offset = offset_in_bytes + (index
+			- current_index) * min_char_size;
+		if (adjusted_offset + min_char_size > offset_in_bytes
+			+ converted_width) {
+		    length_in_bytes = offset_in_bytes + converted_width
+			- adjusted_offset;
+		}
+		else {
+		    length_in_bytes = min_char_size;
+		}
+		boundaries.start_offset_in_bytes = adjusted_offset;
+		boundaries.end_offset_in_bytes =
+		    boundaries.start_offset_in_bytes + length_in_bytes;
+		break;
+	    }
+	    current_index += diff;
+	}
+	else {
+	    if (ucs2_mode && !U_IS_BMP(c)) {
+		// you cannot cut a surrogate in an encoding that is not UTF-16
+		if (current_index == index) {
+		    boundaries.start_offset_in_bytes = offset_in_bytes;
+		    break;
+		}
+		else if (current_index+1 == index) {
+		    boundaries.end_offset_in_bytes = offset_in_bytes
+			+ converted_width;
+		    break;
+		}
+		++current_index;
+	    }
+
+	    if (current_index == index) {
+		boundaries.start_offset_in_bytes = offset_in_bytes;
+		boundaries.end_offset_in_bytes =
+		    boundaries.start_offset_in_bytes + converted_width;
+		break;
+	    }
+
+	    ++current_index;
+	}
+    }
+
+    ucnv_close(cnv);
+
+    return boundaries;
+}
+
+static long
+str_ucnv_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes,
+	bool ucs2_mode)
+{
+    assert(!str_is_stored_in_uchars(self));
+
+    // the code has many similarities with str_length
+    USE_CONVERTER(cnv, self);
+
+    const char *current_position = self->data.bytes;
+    const char *searched_position = current_position + offset_in_bytes;
+    const char *end = current_position + self->length_in_bytes;
+    long index = 0;
+    for (;;) {
+	const char *character_start_position = current_position;
+	// iterate through the string one Unicode code point at a time
+	UErrorCode err = U_ZERO_ERROR;
+	UChar32 c = ucnv_getNextUChar(cnv, &current_position, end, &err);
+	if (err == U_INDEX_OUTOFBOUNDS_ERROR) {
+	    // end of the string
+	    // should not happen because str_offset_in_bytes_to_index
+	    // checks before that offset_in_bytes is inferior to the length
+	    // in bytes
+	    abort();
+	}
+	else if (U_FAILURE(err)) {
+	    long min_char_size = self->encoding->min_char_size;
+	    long converted_width = current_position - character_start_position;
+	    long to_add = div_round_up(converted_width, min_char_size);
+	    if (searched_position < character_start_position + to_add) {
+		long difference = searched_position - character_start_position;
+		index += (difference / min_char_size);
+		break;
+	    }
+	    index += to_add;
+	}
+	else {
+	    if (searched_position < current_position) {
+		break;
+	    }
+	    if (ucs2_mode && !U_IS_BMP(c)) {
+		index += 2;
+	    }
+	    else {
+		++index;
+	    }
+	}
+	if (searched_position == current_position) {
+	    break;
+	}
+    }
+
+    ucnv_close(cnv);
+
+    return index;
+}
+
+void
+enc_init_ucnv_encoding(rb_encoding_t *encoding)
+{
+    // create the ICU converter
+    UErrorCode err = U_ZERO_ERROR;
+    UConverter *converter = ucnv_open(encoding->public_name, &err);
+    if (!U_SUCCESS(err) || (converter == NULL)) {
+	fprintf(stderr, "Couldn't create the encoder for %s\n",
+		encoding->public_name);
+	abort();
+    }
+    // stop the conversion when the conversion failed
+    err = U_ZERO_ERROR;
+    ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
+	    &err);
+    err = U_ZERO_ERROR;
+    ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
+	    NULL, &err);
+
+    // fill the fields not filled yet
+    encoding->private_data = converter;
+    encoding->methods.update_flags = str_ucnv_update_flags;
+    encoding->methods.make_data_binary = str_ucnv_make_data_binary;
+    encoding->methods.try_making_data_uchars = str_ucnv_try_making_data_uchars;
+    encoding->methods.length = str_ucnv_length;
+    encoding->methods.bytesize = str_ucnv_bytesize;
+    encoding->methods.get_character_boundaries =
+	str_ucnv_get_character_boundaries;
+    encoding->methods.offset_in_bytes_to_index =
+	str_ucnv_offset_in_bytes_to_index;
+}

Modified: MacRuby/trunk/util.c
===================================================================
--- MacRuby/trunk/util.c	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/util.c	2010-03-12 23:56:52 UTC (rev 3746)
@@ -745,18 +745,9 @@
     if (getcwd(buf, sizeof buf) == NULL) {
 	rb_sys_fail("getcwd");
     }
-
-    CFStringRef tmp = CFStringCreateWithFileSystemRepresentation(NULL, buf);
-    assert(tmp != NULL);
-
-    CFMutableStringRef str = CFStringCreateMutableCopy(NULL, 0, tmp);
-    assert(str != NULL);
-    CFRelease(tmp);
-    CFMakeCollectable(str);
-    CFStringNormalize(str, kCFStringNormalizationFormC);
-
+    VALUE str = rb_str_new2(buf);
     OBJ_TAINT(str); 
-    return (VALUE)str;
+    return str;
 }
 
 /****************************************************************

Modified: MacRuby/trunk/vm.cpp
===================================================================
--- MacRuby/trunk/vm.cpp	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/vm.cpp	2010-03-12 23:56:52 UTC (rev 3746)
@@ -174,7 +174,7 @@
 	    return mm->getGOTBase();
 	}
 
-#if LLVM_TOT
+#if defined(LLVM_TOT) || defined(LLVM_PRE_TOT)
 	void SetDlsymTable(void *ptr) {
 	    mm->SetDlsymTable(ptr);
 	}
@@ -2856,8 +2856,8 @@
 
     int n = 0;
     VALUE args[3];
-    args[n++] = rb_funcall(rb_cNameErrorMesg, '!', 3, rb_str_new2(format),
-	    obj, meth);
+    VALUE not_args[3] = {rb_str_new2(format), obj, meth};
+    args[n++] = rb_vm_call(rb_cNameErrorMesg, selNot2, 3, not_args, false);
     args[n++] = meth;
     if (exc == rb_eNoMethodError) {
 	args[n++] = rb_ary_new4(argc - 1, argv + 1);
@@ -3374,9 +3374,6 @@
 }
 #endif
 
-extern "C" VALUE rb_reg_match_pre(VALUE match, SEL sel);
-extern "C" VALUE rb_reg_match_post(VALUE match, SEL sel);
-
 extern "C"
 VALUE
 rb_vm_get_special(char code)
@@ -3392,10 +3389,10 @@
 	    val = rb_reg_last_match(backref);
 	    break;
 	case '`':
-	    val = rb_reg_match_pre(backref, 0);
+	    val = rb_reg_match_pre(backref);
 	    break;
 	case '\'':
-	    val = rb_reg_match_post(backref, 0);
+	    val = rb_reg_match_post(backref);
 	    break;
 	case '+':
 	    val = rb_reg_match_last(backref);
@@ -4955,6 +4952,14 @@
     assert(m != NULL);
     old_resolveInstanceMethod_imp = method_getImplementation(m);
     method_setImplementation(m, (IMP)resolveInstanceMethod_imp);
+
+    // Early define some classes.
+    rb_cSymbol = rb_objc_create_class("Symbol",
+	    (VALUE)objc_getClass("NSString"));
+    rb_cEncoding = rb_objc_create_class("Encoding",
+	    (VALUE)objc_getClass("NSObject"));
+    rb_cRubyString = rb_objc_create_class("String",
+	    (VALUE)objc_getClass("NSMutableString"));
 }
 
 static VALUE

Modified: MacRuby/trunk/vm.h
===================================================================
--- MacRuby/trunk/vm.h	2010-03-12 22:55:54 UTC (rev 3745)
+++ MacRuby/trunk/vm.h	2010-03-12 23:56:52 UTC (rev 3746)
@@ -468,6 +468,16 @@
     } \
     while (0)
 
+#define ENSURE_AND_RETURN_IF_BROKEN(code) \
+    do { \
+        VALUE __v = rb_vm_pop_broken_value(); \
+        if (__v != Qundef) { \
+	    code; \
+            return __v; \
+        } \
+    } \
+    while (0)
+
 void rb_vm_finalize(void);
 
 void rb_vm_load_bridge_support(const char *path, const char *framework_path,
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/macruby-changes/attachments/20100312/5338338f/attachment-0001.html>


More information about the macruby-changes mailing list